5b48f6a119cb19445af585d49d5f6fd53d0ccc3a
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "hashtab.h"
42 #include "hash-set.h"
43 #include "vec.h"
44 #include "machmode.h"
45 #include "input.h"
46 #include "function.h"
47 #include "recog.h"
48 #include "expr.h"
49 #include "optabs.h"
50 #include "diagnostic-core.h"
51 #include "toplev.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "ggc.h"
62 #include "target.h"
63 #include "target-def.h"
64 #include "common/common-target.h"
65 #include "langhooks.h"
66 #include "reload.h"
67 #include "cgraph.h"
68 #include "hash-table.h"
69 #include "tree-ssa-alias.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
72 #include "tree-eh.h"
73 #include "gimple-expr.h"
74 #include "is-a.h"
75 #include "gimple.h"
76 #include "gimplify.h"
77 #include "cfgloop.h"
78 #include "dwarf2.h"
79 #include "df.h"
80 #include "tm-constrs.h"
81 #include "params.h"
82 #include "cselib.h"
83 #include "debug.h"
84 #include "sched-int.h"
85 #include "sbitmap.h"
86 #include "fibheap.h"
87 #include "opts.h"
88 #include "diagnostic.h"
89 #include "dumpfile.h"
90 #include "tree-pass.h"
91 #include "wide-int.h"
92 #include "context.h"
93 #include "pass_manager.h"
94 #include "target-globals.h"
95 #include "tree-vectorizer.h"
96 #include "shrink-wrap.h"
97 #include "builtins.h"
98
99 static rtx legitimize_dllimport_symbol (rtx, bool);
100 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
101 static rtx legitimize_pe_coff_symbol (rtx, bool);
102
103 #ifndef CHECK_STACK_LIMIT
104 #define CHECK_STACK_LIMIT (-1)
105 #endif
106
107 /* Return index of given mode in mult and division cost tables. */
108 #define MODE_INDEX(mode) \
109 ((mode) == QImode ? 0 \
110 : (mode) == HImode ? 1 \
111 : (mode) == SImode ? 2 \
112 : (mode) == DImode ? 3 \
113 : 4)
114
115 /* Processor costs (relative to an add) */
116 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
117 #define COSTS_N_BYTES(N) ((N) * 2)
118
119 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
120
121 static stringop_algs ix86_size_memcpy[2] = {
122 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
123 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
124 static stringop_algs ix86_size_memset[2] = {
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
126 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
127
128 const
129 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
130 COSTS_N_BYTES (2), /* cost of an add instruction */
131 COSTS_N_BYTES (3), /* cost of a lea instruction */
132 COSTS_N_BYTES (2), /* variable shift costs */
133 COSTS_N_BYTES (3), /* constant shift costs */
134 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
135 COSTS_N_BYTES (3), /* HI */
136 COSTS_N_BYTES (3), /* SI */
137 COSTS_N_BYTES (3), /* DI */
138 COSTS_N_BYTES (5)}, /* other */
139 0, /* cost of multiply per each bit set */
140 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
141 COSTS_N_BYTES (3), /* HI */
142 COSTS_N_BYTES (3), /* SI */
143 COSTS_N_BYTES (3), /* DI */
144 COSTS_N_BYTES (5)}, /* other */
145 COSTS_N_BYTES (3), /* cost of movsx */
146 COSTS_N_BYTES (3), /* cost of movzx */
147 0, /* "large" insn */
148 2, /* MOVE_RATIO */
149 2, /* cost for loading QImode using movzbl */
150 {2, 2, 2}, /* cost of loading integer registers
151 in QImode, HImode and SImode.
152 Relative to reg-reg move (2). */
153 {2, 2, 2}, /* cost of storing integer registers */
154 2, /* cost of reg,reg fld/fst */
155 {2, 2, 2}, /* cost of loading fp registers
156 in SFmode, DFmode and XFmode */
157 {2, 2, 2}, /* cost of storing fp registers
158 in SFmode, DFmode and XFmode */
159 3, /* cost of moving MMX register */
160 {3, 3}, /* cost of loading MMX registers
161 in SImode and DImode */
162 {3, 3}, /* cost of storing MMX registers
163 in SImode and DImode */
164 3, /* cost of moving SSE register */
165 {3, 3, 3}, /* cost of loading SSE registers
166 in SImode, DImode and TImode */
167 {3, 3, 3}, /* cost of storing SSE registers
168 in SImode, DImode and TImode */
169 3, /* MMX or SSE register to integer */
170 0, /* size of l1 cache */
171 0, /* size of l2 cache */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
174 2, /* Branch cost */
175 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
176 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
177 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
178 COSTS_N_BYTES (2), /* cost of FABS instruction. */
179 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
180 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
181 ix86_size_memcpy,
182 ix86_size_memset,
183 1, /* scalar_stmt_cost. */
184 1, /* scalar load_cost. */
185 1, /* scalar_store_cost. */
186 1, /* vec_stmt_cost. */
187 1, /* vec_to_scalar_cost. */
188 1, /* scalar_to_vec_cost. */
189 1, /* vec_align_load_cost. */
190 1, /* vec_unalign_load_cost. */
191 1, /* vec_store_cost. */
192 1, /* cond_taken_branch_cost. */
193 1, /* cond_not_taken_branch_cost. */
194 };
195
196 /* Processor costs (relative to an add) */
197 static stringop_algs i386_memcpy[2] = {
198 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
199 DUMMY_STRINGOP_ALGS};
200 static stringop_algs i386_memset[2] = {
201 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
202 DUMMY_STRINGOP_ALGS};
203
204 static const
205 struct processor_costs i386_cost = { /* 386 specific costs */
206 COSTS_N_INSNS (1), /* cost of an add instruction */
207 COSTS_N_INSNS (1), /* cost of a lea instruction */
208 COSTS_N_INSNS (3), /* variable shift costs */
209 COSTS_N_INSNS (2), /* constant shift costs */
210 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
211 COSTS_N_INSNS (6), /* HI */
212 COSTS_N_INSNS (6), /* SI */
213 COSTS_N_INSNS (6), /* DI */
214 COSTS_N_INSNS (6)}, /* other */
215 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
216 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
217 COSTS_N_INSNS (23), /* HI */
218 COSTS_N_INSNS (23), /* SI */
219 COSTS_N_INSNS (23), /* DI */
220 COSTS_N_INSNS (23)}, /* other */
221 COSTS_N_INSNS (3), /* cost of movsx */
222 COSTS_N_INSNS (2), /* cost of movzx */
223 15, /* "large" insn */
224 3, /* MOVE_RATIO */
225 4, /* cost for loading QImode using movzbl */
226 {2, 4, 2}, /* cost of loading integer registers
227 in QImode, HImode and SImode.
228 Relative to reg-reg move (2). */
229 {2, 4, 2}, /* cost of storing integer registers */
230 2, /* cost of reg,reg fld/fst */
231 {8, 8, 8}, /* cost of loading fp registers
232 in SFmode, DFmode and XFmode */
233 {8, 8, 8}, /* cost of storing fp registers
234 in SFmode, DFmode and XFmode */
235 2, /* cost of moving MMX register */
236 {4, 8}, /* cost of loading MMX registers
237 in SImode and DImode */
238 {4, 8}, /* cost of storing MMX registers
239 in SImode and DImode */
240 2, /* cost of moving SSE register */
241 {4, 8, 16}, /* cost of loading SSE registers
242 in SImode, DImode and TImode */
243 {4, 8, 16}, /* cost of storing SSE registers
244 in SImode, DImode and TImode */
245 3, /* MMX or SSE register to integer */
246 0, /* size of l1 cache */
247 0, /* size of l2 cache */
248 0, /* size of prefetch block */
249 0, /* number of parallel prefetches */
250 1, /* Branch cost */
251 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
252 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
253 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
254 COSTS_N_INSNS (22), /* cost of FABS instruction. */
255 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
256 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
257 i386_memcpy,
258 i386_memset,
259 1, /* scalar_stmt_cost. */
260 1, /* scalar load_cost. */
261 1, /* scalar_store_cost. */
262 1, /* vec_stmt_cost. */
263 1, /* vec_to_scalar_cost. */
264 1, /* scalar_to_vec_cost. */
265 1, /* vec_align_load_cost. */
266 2, /* vec_unalign_load_cost. */
267 1, /* vec_store_cost. */
268 3, /* cond_taken_branch_cost. */
269 1, /* cond_not_taken_branch_cost. */
270 };
271
272 static stringop_algs i486_memcpy[2] = {
273 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
274 DUMMY_STRINGOP_ALGS};
275 static stringop_algs i486_memset[2] = {
276 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
277 DUMMY_STRINGOP_ALGS};
278
279 static const
280 struct processor_costs i486_cost = { /* 486 specific costs */
281 COSTS_N_INSNS (1), /* cost of an add instruction */
282 COSTS_N_INSNS (1), /* cost of a lea instruction */
283 COSTS_N_INSNS (3), /* variable shift costs */
284 COSTS_N_INSNS (2), /* constant shift costs */
285 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
286 COSTS_N_INSNS (12), /* HI */
287 COSTS_N_INSNS (12), /* SI */
288 COSTS_N_INSNS (12), /* DI */
289 COSTS_N_INSNS (12)}, /* other */
290 1, /* cost of multiply per each bit set */
291 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
292 COSTS_N_INSNS (40), /* HI */
293 COSTS_N_INSNS (40), /* SI */
294 COSTS_N_INSNS (40), /* DI */
295 COSTS_N_INSNS (40)}, /* other */
296 COSTS_N_INSNS (3), /* cost of movsx */
297 COSTS_N_INSNS (2), /* cost of movzx */
298 15, /* "large" insn */
299 3, /* MOVE_RATIO */
300 4, /* cost for loading QImode using movzbl */
301 {2, 4, 2}, /* cost of loading integer registers
302 in QImode, HImode and SImode.
303 Relative to reg-reg move (2). */
304 {2, 4, 2}, /* cost of storing integer registers */
305 2, /* cost of reg,reg fld/fst */
306 {8, 8, 8}, /* cost of loading fp registers
307 in SFmode, DFmode and XFmode */
308 {8, 8, 8}, /* cost of storing fp registers
309 in SFmode, DFmode and XFmode */
310 2, /* cost of moving MMX register */
311 {4, 8}, /* cost of loading MMX registers
312 in SImode and DImode */
313 {4, 8}, /* cost of storing MMX registers
314 in SImode and DImode */
315 2, /* cost of moving SSE register */
316 {4, 8, 16}, /* cost of loading SSE registers
317 in SImode, DImode and TImode */
318 {4, 8, 16}, /* cost of storing SSE registers
319 in SImode, DImode and TImode */
320 3, /* MMX or SSE register to integer */
321 4, /* size of l1 cache. 486 has 8kB cache
322 shared for code and data, so 4kB is
323 not really precise. */
324 4, /* size of l2 cache */
325 0, /* size of prefetch block */
326 0, /* number of parallel prefetches */
327 1, /* Branch cost */
328 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
329 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
330 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
331 COSTS_N_INSNS (3), /* cost of FABS instruction. */
332 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
333 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
334 i486_memcpy,
335 i486_memset,
336 1, /* scalar_stmt_cost. */
337 1, /* scalar load_cost. */
338 1, /* scalar_store_cost. */
339 1, /* vec_stmt_cost. */
340 1, /* vec_to_scalar_cost. */
341 1, /* scalar_to_vec_cost. */
342 1, /* vec_align_load_cost. */
343 2, /* vec_unalign_load_cost. */
344 1, /* vec_store_cost. */
345 3, /* cond_taken_branch_cost. */
346 1, /* cond_not_taken_branch_cost. */
347 };
348
349 static stringop_algs pentium_memcpy[2] = {
350 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
351 DUMMY_STRINGOP_ALGS};
352 static stringop_algs pentium_memset[2] = {
353 {libcall, {{-1, rep_prefix_4_byte, false}}},
354 DUMMY_STRINGOP_ALGS};
355
356 static const
357 struct processor_costs pentium_cost = {
358 COSTS_N_INSNS (1), /* cost of an add instruction */
359 COSTS_N_INSNS (1), /* cost of a lea instruction */
360 COSTS_N_INSNS (4), /* variable shift costs */
361 COSTS_N_INSNS (1), /* constant shift costs */
362 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
363 COSTS_N_INSNS (11), /* HI */
364 COSTS_N_INSNS (11), /* SI */
365 COSTS_N_INSNS (11), /* DI */
366 COSTS_N_INSNS (11)}, /* other */
367 0, /* cost of multiply per each bit set */
368 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
369 COSTS_N_INSNS (25), /* HI */
370 COSTS_N_INSNS (25), /* SI */
371 COSTS_N_INSNS (25), /* DI */
372 COSTS_N_INSNS (25)}, /* other */
373 COSTS_N_INSNS (3), /* cost of movsx */
374 COSTS_N_INSNS (2), /* cost of movzx */
375 8, /* "large" insn */
376 6, /* MOVE_RATIO */
377 6, /* cost for loading QImode using movzbl */
378 {2, 4, 2}, /* cost of loading integer registers
379 in QImode, HImode and SImode.
380 Relative to reg-reg move (2). */
381 {2, 4, 2}, /* cost of storing integer registers */
382 2, /* cost of reg,reg fld/fst */
383 {2, 2, 6}, /* cost of loading fp registers
384 in SFmode, DFmode and XFmode */
385 {4, 4, 6}, /* cost of storing fp registers
386 in SFmode, DFmode and XFmode */
387 8, /* cost of moving MMX register */
388 {8, 8}, /* cost of loading MMX registers
389 in SImode and DImode */
390 {8, 8}, /* cost of storing MMX registers
391 in SImode and DImode */
392 2, /* cost of moving SSE register */
393 {4, 8, 16}, /* cost of loading SSE registers
394 in SImode, DImode and TImode */
395 {4, 8, 16}, /* cost of storing SSE registers
396 in SImode, DImode and TImode */
397 3, /* MMX or SSE register to integer */
398 8, /* size of l1 cache. */
399 8, /* size of l2 cache */
400 0, /* size of prefetch block */
401 0, /* number of parallel prefetches */
402 2, /* Branch cost */
403 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
404 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
405 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
406 COSTS_N_INSNS (1), /* cost of FABS instruction. */
407 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
408 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
409 pentium_memcpy,
410 pentium_memset,
411 1, /* scalar_stmt_cost. */
412 1, /* scalar load_cost. */
413 1, /* scalar_store_cost. */
414 1, /* vec_stmt_cost. */
415 1, /* vec_to_scalar_cost. */
416 1, /* scalar_to_vec_cost. */
417 1, /* vec_align_load_cost. */
418 2, /* vec_unalign_load_cost. */
419 1, /* vec_store_cost. */
420 3, /* cond_taken_branch_cost. */
421 1, /* cond_not_taken_branch_cost. */
422 };
423
424 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
425 (we ensure the alignment). For small blocks inline loop is still a
426 noticeable win, for bigger blocks either rep movsl or rep movsb is
427 way to go. Rep movsb has apparently more expensive startup time in CPU,
428 but after 4K the difference is down in the noise. */
429 static stringop_algs pentiumpro_memcpy[2] = {
430 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
431 {8192, rep_prefix_4_byte, false},
432 {-1, rep_prefix_1_byte, false}}},
433 DUMMY_STRINGOP_ALGS};
434 static stringop_algs pentiumpro_memset[2] = {
435 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
436 {8192, rep_prefix_4_byte, false},
437 {-1, libcall, false}}},
438 DUMMY_STRINGOP_ALGS};
439 static const
440 struct processor_costs pentiumpro_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (1), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (4), /* SI */
448 COSTS_N_INSNS (4), /* DI */
449 COSTS_N_INSNS (4)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (17), /* HI */
453 COSTS_N_INSNS (17), /* SI */
454 COSTS_N_INSNS (17), /* DI */
455 COSTS_N_INSNS (17)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 6, /* MOVE_RATIO */
460 2, /* cost for loading QImode using movzbl */
461 {4, 4, 4}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {2, 2, 2}, /* cost of storing integer registers */
465 2, /* cost of reg,reg fld/fst */
466 {2, 2, 6}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 4, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
470 2, /* cost of moving MMX register */
471 {2, 2}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {2, 2}, /* cost of storing MMX registers
474 in SImode and DImode */
475 2, /* cost of moving SSE register */
476 {2, 2, 8}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {2, 2, 8}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 3, /* MMX or SSE register to integer */
481 8, /* size of l1 cache. */
482 256, /* size of l2 cache */
483 32, /* size of prefetch block */
484 6, /* number of parallel prefetches */
485 2, /* Branch cost */
486 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (2), /* cost of FABS instruction. */
490 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
492 pentiumpro_memcpy,
493 pentiumpro_memset,
494 1, /* scalar_stmt_cost. */
495 1, /* scalar load_cost. */
496 1, /* scalar_store_cost. */
497 1, /* vec_stmt_cost. */
498 1, /* vec_to_scalar_cost. */
499 1, /* scalar_to_vec_cost. */
500 1, /* vec_align_load_cost. */
501 2, /* vec_unalign_load_cost. */
502 1, /* vec_store_cost. */
503 3, /* cond_taken_branch_cost. */
504 1, /* cond_not_taken_branch_cost. */
505 };
506
507 static stringop_algs geode_memcpy[2] = {
508 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
509 DUMMY_STRINGOP_ALGS};
510 static stringop_algs geode_memset[2] = {
511 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
512 DUMMY_STRINGOP_ALGS};
513 static const
514 struct processor_costs geode_cost = {
515 COSTS_N_INSNS (1), /* cost of an add instruction */
516 COSTS_N_INSNS (1), /* cost of a lea instruction */
517 COSTS_N_INSNS (2), /* variable shift costs */
518 COSTS_N_INSNS (1), /* constant shift costs */
519 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
520 COSTS_N_INSNS (4), /* HI */
521 COSTS_N_INSNS (7), /* SI */
522 COSTS_N_INSNS (7), /* DI */
523 COSTS_N_INSNS (7)}, /* other */
524 0, /* cost of multiply per each bit set */
525 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
526 COSTS_N_INSNS (23), /* HI */
527 COSTS_N_INSNS (39), /* SI */
528 COSTS_N_INSNS (39), /* DI */
529 COSTS_N_INSNS (39)}, /* other */
530 COSTS_N_INSNS (1), /* cost of movsx */
531 COSTS_N_INSNS (1), /* cost of movzx */
532 8, /* "large" insn */
533 4, /* MOVE_RATIO */
534 1, /* cost for loading QImode using movzbl */
535 {1, 1, 1}, /* cost of loading integer registers
536 in QImode, HImode and SImode.
537 Relative to reg-reg move (2). */
538 {1, 1, 1}, /* cost of storing integer registers */
539 1, /* cost of reg,reg fld/fst */
540 {1, 1, 1}, /* cost of loading fp registers
541 in SFmode, DFmode and XFmode */
542 {4, 6, 6}, /* cost of storing fp registers
543 in SFmode, DFmode and XFmode */
544
545 1, /* cost of moving MMX register */
546 {1, 1}, /* cost of loading MMX registers
547 in SImode and DImode */
548 {1, 1}, /* cost of storing MMX registers
549 in SImode and DImode */
550 1, /* cost of moving SSE register */
551 {1, 1, 1}, /* cost of loading SSE registers
552 in SImode, DImode and TImode */
553 {1, 1, 1}, /* cost of storing SSE registers
554 in SImode, DImode and TImode */
555 1, /* MMX or SSE register to integer */
556 64, /* size of l1 cache. */
557 128, /* size of l2 cache. */
558 32, /* size of prefetch block */
559 1, /* number of parallel prefetches */
560 1, /* Branch cost */
561 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
562 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
563 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
564 COSTS_N_INSNS (1), /* cost of FABS instruction. */
565 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
566 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
567 geode_memcpy,
568 geode_memset,
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
580 };
581
582 static stringop_algs k6_memcpy[2] = {
583 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
584 DUMMY_STRINGOP_ALGS};
585 static stringop_algs k6_memset[2] = {
586 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
587 DUMMY_STRINGOP_ALGS};
588 static const
589 struct processor_costs k6_cost = {
590 COSTS_N_INSNS (1), /* cost of an add instruction */
591 COSTS_N_INSNS (2), /* cost of a lea instruction */
592 COSTS_N_INSNS (1), /* variable shift costs */
593 COSTS_N_INSNS (1), /* constant shift costs */
594 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
595 COSTS_N_INSNS (3), /* HI */
596 COSTS_N_INSNS (3), /* SI */
597 COSTS_N_INSNS (3), /* DI */
598 COSTS_N_INSNS (3)}, /* other */
599 0, /* cost of multiply per each bit set */
600 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
601 COSTS_N_INSNS (18), /* HI */
602 COSTS_N_INSNS (18), /* SI */
603 COSTS_N_INSNS (18), /* DI */
604 COSTS_N_INSNS (18)}, /* other */
605 COSTS_N_INSNS (2), /* cost of movsx */
606 COSTS_N_INSNS (2), /* cost of movzx */
607 8, /* "large" insn */
608 4, /* MOVE_RATIO */
609 3, /* cost for loading QImode using movzbl */
610 {4, 5, 4}, /* cost of loading integer registers
611 in QImode, HImode and SImode.
612 Relative to reg-reg move (2). */
613 {2, 3, 2}, /* cost of storing integer registers */
614 4, /* cost of reg,reg fld/fst */
615 {6, 6, 6}, /* cost of loading fp registers
616 in SFmode, DFmode and XFmode */
617 {4, 4, 4}, /* cost of storing fp registers
618 in SFmode, DFmode and XFmode */
619 2, /* cost of moving MMX register */
620 {2, 2}, /* cost of loading MMX registers
621 in SImode and DImode */
622 {2, 2}, /* cost of storing MMX registers
623 in SImode and DImode */
624 2, /* cost of moving SSE register */
625 {2, 2, 8}, /* cost of loading SSE registers
626 in SImode, DImode and TImode */
627 {2, 2, 8}, /* cost of storing SSE registers
628 in SImode, DImode and TImode */
629 6, /* MMX or SSE register to integer */
630 32, /* size of l1 cache. */
631 32, /* size of l2 cache. Some models
632 have integrated l2 cache, but
633 optimizing for k6 is not important
634 enough to worry about that. */
635 32, /* size of prefetch block */
636 1, /* number of parallel prefetches */
637 1, /* Branch cost */
638 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
639 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
640 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
641 COSTS_N_INSNS (2), /* cost of FABS instruction. */
642 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
643 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
644 k6_memcpy,
645 k6_memset,
646 1, /* scalar_stmt_cost. */
647 1, /* scalar load_cost. */
648 1, /* scalar_store_cost. */
649 1, /* vec_stmt_cost. */
650 1, /* vec_to_scalar_cost. */
651 1, /* scalar_to_vec_cost. */
652 1, /* vec_align_load_cost. */
653 2, /* vec_unalign_load_cost. */
654 1, /* vec_store_cost. */
655 3, /* cond_taken_branch_cost. */
656 1, /* cond_not_taken_branch_cost. */
657 };
658
659 /* For some reason, Athlon deals better with REP prefix (relative to loops)
660 compared to K8. Alignment becomes important after 8 bytes for memcpy and
661 128 bytes for memset. */
662 static stringop_algs athlon_memcpy[2] = {
663 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
664 DUMMY_STRINGOP_ALGS};
665 static stringop_algs athlon_memset[2] = {
666 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
667 DUMMY_STRINGOP_ALGS};
668 static const
669 struct processor_costs athlon_cost = {
670 COSTS_N_INSNS (1), /* cost of an add instruction */
671 COSTS_N_INSNS (2), /* cost of a lea instruction */
672 COSTS_N_INSNS (1), /* variable shift costs */
673 COSTS_N_INSNS (1), /* constant shift costs */
674 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
675 COSTS_N_INSNS (5), /* HI */
676 COSTS_N_INSNS (5), /* SI */
677 COSTS_N_INSNS (5), /* DI */
678 COSTS_N_INSNS (5)}, /* other */
679 0, /* cost of multiply per each bit set */
680 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
681 COSTS_N_INSNS (26), /* HI */
682 COSTS_N_INSNS (42), /* SI */
683 COSTS_N_INSNS (74), /* DI */
684 COSTS_N_INSNS (74)}, /* other */
685 COSTS_N_INSNS (1), /* cost of movsx */
686 COSTS_N_INSNS (1), /* cost of movzx */
687 8, /* "large" insn */
688 9, /* MOVE_RATIO */
689 4, /* cost for loading QImode using movzbl */
690 {3, 4, 3}, /* cost of loading integer registers
691 in QImode, HImode and SImode.
692 Relative to reg-reg move (2). */
693 {3, 4, 3}, /* cost of storing integer registers */
694 4, /* cost of reg,reg fld/fst */
695 {4, 4, 12}, /* cost of loading fp registers
696 in SFmode, DFmode and XFmode */
697 {6, 6, 8}, /* cost of storing fp registers
698 in SFmode, DFmode and XFmode */
699 2, /* cost of moving MMX register */
700 {4, 4}, /* cost of loading MMX registers
701 in SImode and DImode */
702 {4, 4}, /* cost of storing MMX registers
703 in SImode and DImode */
704 2, /* cost of moving SSE register */
705 {4, 4, 6}, /* cost of loading SSE registers
706 in SImode, DImode and TImode */
707 {4, 4, 5}, /* cost of storing SSE registers
708 in SImode, DImode and TImode */
709 5, /* MMX or SSE register to integer */
710 64, /* size of l1 cache. */
711 256, /* size of l2 cache. */
712 64, /* size of prefetch block */
713 6, /* number of parallel prefetches */
714 5, /* Branch cost */
715 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
716 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
717 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
718 COSTS_N_INSNS (2), /* cost of FABS instruction. */
719 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
720 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
721 athlon_memcpy,
722 athlon_memset,
723 1, /* scalar_stmt_cost. */
724 1, /* scalar load_cost. */
725 1, /* scalar_store_cost. */
726 1, /* vec_stmt_cost. */
727 1, /* vec_to_scalar_cost. */
728 1, /* scalar_to_vec_cost. */
729 1, /* vec_align_load_cost. */
730 2, /* vec_unalign_load_cost. */
731 1, /* vec_store_cost. */
732 3, /* cond_taken_branch_cost. */
733 1, /* cond_not_taken_branch_cost. */
734 };
735
736 /* K8 has optimized REP instruction for medium sized blocks, but for very
737 small blocks it is better to use loop. For large blocks, libcall can
738 do nontemporary accesses and beat inline considerably. */
739 static stringop_algs k8_memcpy[2] = {
740 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
741 {-1, rep_prefix_4_byte, false}}},
742 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
743 {-1, libcall, false}}}};
744 static stringop_algs k8_memset[2] = {
745 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
746 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
747 {libcall, {{48, unrolled_loop, false},
748 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
749 static const
750 struct processor_costs k8_cost = {
751 COSTS_N_INSNS (1), /* cost of an add instruction */
752 COSTS_N_INSNS (2), /* cost of a lea instruction */
753 COSTS_N_INSNS (1), /* variable shift costs */
754 COSTS_N_INSNS (1), /* constant shift costs */
755 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
756 COSTS_N_INSNS (4), /* HI */
757 COSTS_N_INSNS (3), /* SI */
758 COSTS_N_INSNS (4), /* DI */
759 COSTS_N_INSNS (5)}, /* other */
760 0, /* cost of multiply per each bit set */
761 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
762 COSTS_N_INSNS (26), /* HI */
763 COSTS_N_INSNS (42), /* SI */
764 COSTS_N_INSNS (74), /* DI */
765 COSTS_N_INSNS (74)}, /* other */
766 COSTS_N_INSNS (1), /* cost of movsx */
767 COSTS_N_INSNS (1), /* cost of movzx */
768 8, /* "large" insn */
769 9, /* MOVE_RATIO */
770 4, /* cost for loading QImode using movzbl */
771 {3, 4, 3}, /* cost of loading integer registers
772 in QImode, HImode and SImode.
773 Relative to reg-reg move (2). */
774 {3, 4, 3}, /* cost of storing integer registers */
775 4, /* cost of reg,reg fld/fst */
776 {4, 4, 12}, /* cost of loading fp registers
777 in SFmode, DFmode and XFmode */
778 {6, 6, 8}, /* cost of storing fp registers
779 in SFmode, DFmode and XFmode */
780 2, /* cost of moving MMX register */
781 {3, 3}, /* cost of loading MMX registers
782 in SImode and DImode */
783 {4, 4}, /* cost of storing MMX registers
784 in SImode and DImode */
785 2, /* cost of moving SSE register */
786 {4, 3, 6}, /* cost of loading SSE registers
787 in SImode, DImode and TImode */
788 {4, 4, 5}, /* cost of storing SSE registers
789 in SImode, DImode and TImode */
790 5, /* MMX or SSE register to integer */
791 64, /* size of l1 cache. */
792 512, /* size of l2 cache. */
793 64, /* size of prefetch block */
794 /* New AMD processors never drop prefetches; if they cannot be performed
795 immediately, they are queued. We set number of simultaneous prefetches
796 to a large constant to reflect this (it probably is not a good idea not
797 to limit number of prefetches at all, as their execution also takes some
798 time). */
799 100, /* number of parallel prefetches */
800 3, /* Branch cost */
801 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
802 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
803 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
804 COSTS_N_INSNS (2), /* cost of FABS instruction. */
805 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
806 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
807
808 k8_memcpy,
809 k8_memset,
810 4, /* scalar_stmt_cost. */
811 2, /* scalar load_cost. */
812 2, /* scalar_store_cost. */
813 5, /* vec_stmt_cost. */
814 0, /* vec_to_scalar_cost. */
815 2, /* scalar_to_vec_cost. */
816 2, /* vec_align_load_cost. */
817 3, /* vec_unalign_load_cost. */
818 3, /* vec_store_cost. */
819 3, /* cond_taken_branch_cost. */
820 2, /* cond_not_taken_branch_cost. */
821 };
822
823 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
824 very small blocks it is better to use loop. For large blocks, libcall can
825 do nontemporary accesses and beat inline considerably. */
826 static stringop_algs amdfam10_memcpy[2] = {
827 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
828 {-1, rep_prefix_4_byte, false}}},
829 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
830 {-1, libcall, false}}}};
831 static stringop_algs amdfam10_memset[2] = {
832 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
833 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
834 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
835 {-1, libcall, false}}}};
836 struct processor_costs amdfam10_cost = {
837 COSTS_N_INSNS (1), /* cost of an add instruction */
838 COSTS_N_INSNS (2), /* cost of a lea instruction */
839 COSTS_N_INSNS (1), /* variable shift costs */
840 COSTS_N_INSNS (1), /* constant shift costs */
841 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
842 COSTS_N_INSNS (4), /* HI */
843 COSTS_N_INSNS (3), /* SI */
844 COSTS_N_INSNS (4), /* DI */
845 COSTS_N_INSNS (5)}, /* other */
846 0, /* cost of multiply per each bit set */
847 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
848 COSTS_N_INSNS (35), /* HI */
849 COSTS_N_INSNS (51), /* SI */
850 COSTS_N_INSNS (83), /* DI */
851 COSTS_N_INSNS (83)}, /* other */
852 COSTS_N_INSNS (1), /* cost of movsx */
853 COSTS_N_INSNS (1), /* cost of movzx */
854 8, /* "large" insn */
855 9, /* MOVE_RATIO */
856 4, /* cost for loading QImode using movzbl */
857 {3, 4, 3}, /* cost of loading integer registers
858 in QImode, HImode and SImode.
859 Relative to reg-reg move (2). */
860 {3, 4, 3}, /* cost of storing integer registers */
861 4, /* cost of reg,reg fld/fst */
862 {4, 4, 12}, /* cost of loading fp registers
863 in SFmode, DFmode and XFmode */
864 {6, 6, 8}, /* cost of storing fp registers
865 in SFmode, DFmode and XFmode */
866 2, /* cost of moving MMX register */
867 {3, 3}, /* cost of loading MMX registers
868 in SImode and DImode */
869 {4, 4}, /* cost of storing MMX registers
870 in SImode and DImode */
871 2, /* cost of moving SSE register */
872 {4, 4, 3}, /* cost of loading SSE registers
873 in SImode, DImode and TImode */
874 {4, 4, 5}, /* cost of storing SSE registers
875 in SImode, DImode and TImode */
876 3, /* MMX or SSE register to integer */
877 /* On K8:
878 MOVD reg64, xmmreg Double FSTORE 4
879 MOVD reg32, xmmreg Double FSTORE 4
880 On AMDFAM10:
881 MOVD reg64, xmmreg Double FADD 3
882 1/1 1/1
883 MOVD reg32, xmmreg Double FADD 3
884 1/1 1/1 */
885 64, /* size of l1 cache. */
886 512, /* size of l2 cache. */
887 64, /* size of prefetch block */
888 /* New AMD processors never drop prefetches; if they cannot be performed
889 immediately, they are queued. We set number of simultaneous prefetches
890 to a large constant to reflect this (it probably is not a good idea not
891 to limit number of prefetches at all, as their execution also takes some
892 time). */
893 100, /* number of parallel prefetches */
894 2, /* Branch cost */
895 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
896 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
897 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
898 COSTS_N_INSNS (2), /* cost of FABS instruction. */
899 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
900 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
901
902 amdfam10_memcpy,
903 amdfam10_memset,
904 4, /* scalar_stmt_cost. */
905 2, /* scalar load_cost. */
906 2, /* scalar_store_cost. */
907 6, /* vec_stmt_cost. */
908 0, /* vec_to_scalar_cost. */
909 2, /* scalar_to_vec_cost. */
910 2, /* vec_align_load_cost. */
911 2, /* vec_unalign_load_cost. */
912 2, /* vec_store_cost. */
913 2, /* cond_taken_branch_cost. */
914 1, /* cond_not_taken_branch_cost. */
915 };
916
917 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
918 very small blocks it is better to use loop. For large blocks, libcall
919 can do nontemporary accesses and beat inline considerably. */
920 static stringop_algs bdver1_memcpy[2] = {
921 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
922 {-1, rep_prefix_4_byte, false}}},
923 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
924 {-1, libcall, false}}}};
925 static stringop_algs bdver1_memset[2] = {
926 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
927 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
928 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
929 {-1, libcall, false}}}};
930
931 const struct processor_costs bdver1_cost = {
932 COSTS_N_INSNS (1), /* cost of an add instruction */
933 COSTS_N_INSNS (1), /* cost of a lea instruction */
934 COSTS_N_INSNS (1), /* variable shift costs */
935 COSTS_N_INSNS (1), /* constant shift costs */
936 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
937 COSTS_N_INSNS (4), /* HI */
938 COSTS_N_INSNS (4), /* SI */
939 COSTS_N_INSNS (6), /* DI */
940 COSTS_N_INSNS (6)}, /* other */
941 0, /* cost of multiply per each bit set */
942 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
943 COSTS_N_INSNS (35), /* HI */
944 COSTS_N_INSNS (51), /* SI */
945 COSTS_N_INSNS (83), /* DI */
946 COSTS_N_INSNS (83)}, /* other */
947 COSTS_N_INSNS (1), /* cost of movsx */
948 COSTS_N_INSNS (1), /* cost of movzx */
949 8, /* "large" insn */
950 9, /* MOVE_RATIO */
951 4, /* cost for loading QImode using movzbl */
952 {5, 5, 4}, /* cost of loading integer registers
953 in QImode, HImode and SImode.
954 Relative to reg-reg move (2). */
955 {4, 4, 4}, /* cost of storing integer registers */
956 2, /* cost of reg,reg fld/fst */
957 {5, 5, 12}, /* cost of loading fp registers
958 in SFmode, DFmode and XFmode */
959 {4, 4, 8}, /* cost of storing fp registers
960 in SFmode, DFmode and XFmode */
961 2, /* cost of moving MMX register */
962 {4, 4}, /* cost of loading MMX registers
963 in SImode and DImode */
964 {4, 4}, /* cost of storing MMX registers
965 in SImode and DImode */
966 2, /* cost of moving SSE register */
967 {4, 4, 4}, /* cost of loading SSE registers
968 in SImode, DImode and TImode */
969 {4, 4, 4}, /* cost of storing SSE registers
970 in SImode, DImode and TImode */
971 2, /* MMX or SSE register to integer */
972 /* On K8:
973 MOVD reg64, xmmreg Double FSTORE 4
974 MOVD reg32, xmmreg Double FSTORE 4
975 On AMDFAM10:
976 MOVD reg64, xmmreg Double FADD 3
977 1/1 1/1
978 MOVD reg32, xmmreg Double FADD 3
979 1/1 1/1 */
980 16, /* size of l1 cache. */
981 2048, /* size of l2 cache. */
982 64, /* size of prefetch block */
983 /* New AMD processors never drop prefetches; if they cannot be performed
984 immediately, they are queued. We set number of simultaneous prefetches
985 to a large constant to reflect this (it probably is not a good idea not
986 to limit number of prefetches at all, as their execution also takes some
987 time). */
988 100, /* number of parallel prefetches */
989 2, /* Branch cost */
990 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
991 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
992 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
993 COSTS_N_INSNS (2), /* cost of FABS instruction. */
994 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
995 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
996
997 bdver1_memcpy,
998 bdver1_memset,
999 6, /* scalar_stmt_cost. */
1000 4, /* scalar load_cost. */
1001 4, /* scalar_store_cost. */
1002 6, /* vec_stmt_cost. */
1003 0, /* vec_to_scalar_cost. */
1004 2, /* scalar_to_vec_cost. */
1005 4, /* vec_align_load_cost. */
1006 4, /* vec_unalign_load_cost. */
1007 4, /* vec_store_cost. */
1008 2, /* cond_taken_branch_cost. */
1009 1, /* cond_not_taken_branch_cost. */
1010 };
1011
1012 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1013 very small blocks it is better to use loop. For large blocks, libcall
1014 can do nontemporary accesses and beat inline considerably. */
1015
1016 static stringop_algs bdver2_memcpy[2] = {
1017 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1018 {-1, rep_prefix_4_byte, false}}},
1019 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1020 {-1, libcall, false}}}};
1021 static stringop_algs bdver2_memset[2] = {
1022 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1023 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1024 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1025 {-1, libcall, false}}}};
1026
1027 const struct processor_costs bdver2_cost = {
1028 COSTS_N_INSNS (1), /* cost of an add instruction */
1029 COSTS_N_INSNS (1), /* cost of a lea instruction */
1030 COSTS_N_INSNS (1), /* variable shift costs */
1031 COSTS_N_INSNS (1), /* constant shift costs */
1032 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1033 COSTS_N_INSNS (4), /* HI */
1034 COSTS_N_INSNS (4), /* SI */
1035 COSTS_N_INSNS (6), /* DI */
1036 COSTS_N_INSNS (6)}, /* other */
1037 0, /* cost of multiply per each bit set */
1038 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1039 COSTS_N_INSNS (35), /* HI */
1040 COSTS_N_INSNS (51), /* SI */
1041 COSTS_N_INSNS (83), /* DI */
1042 COSTS_N_INSNS (83)}, /* other */
1043 COSTS_N_INSNS (1), /* cost of movsx */
1044 COSTS_N_INSNS (1), /* cost of movzx */
1045 8, /* "large" insn */
1046 9, /* MOVE_RATIO */
1047 4, /* cost for loading QImode using movzbl */
1048 {5, 5, 4}, /* cost of loading integer registers
1049 in QImode, HImode and SImode.
1050 Relative to reg-reg move (2). */
1051 {4, 4, 4}, /* cost of storing integer registers */
1052 2, /* cost of reg,reg fld/fst */
1053 {5, 5, 12}, /* cost of loading fp registers
1054 in SFmode, DFmode and XFmode */
1055 {4, 4, 8}, /* cost of storing fp registers
1056 in SFmode, DFmode and XFmode */
1057 2, /* cost of moving MMX register */
1058 {4, 4}, /* cost of loading MMX registers
1059 in SImode and DImode */
1060 {4, 4}, /* cost of storing MMX registers
1061 in SImode and DImode */
1062 2, /* cost of moving SSE register */
1063 {4, 4, 4}, /* cost of loading SSE registers
1064 in SImode, DImode and TImode */
1065 {4, 4, 4}, /* cost of storing SSE registers
1066 in SImode, DImode and TImode */
1067 2, /* MMX or SSE register to integer */
1068 /* On K8:
1069 MOVD reg64, xmmreg Double FSTORE 4
1070 MOVD reg32, xmmreg Double FSTORE 4
1071 On AMDFAM10:
1072 MOVD reg64, xmmreg Double FADD 3
1073 1/1 1/1
1074 MOVD reg32, xmmreg Double FADD 3
1075 1/1 1/1 */
1076 16, /* size of l1 cache. */
1077 2048, /* size of l2 cache. */
1078 64, /* size of prefetch block */
1079 /* New AMD processors never drop prefetches; if they cannot be performed
1080 immediately, they are queued. We set number of simultaneous prefetches
1081 to a large constant to reflect this (it probably is not a good idea not
1082 to limit number of prefetches at all, as their execution also takes some
1083 time). */
1084 100, /* number of parallel prefetches */
1085 2, /* Branch cost */
1086 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1092
1093 bdver2_memcpy,
1094 bdver2_memset,
1095 6, /* scalar_stmt_cost. */
1096 4, /* scalar load_cost. */
1097 4, /* scalar_store_cost. */
1098 6, /* vec_stmt_cost. */
1099 0, /* vec_to_scalar_cost. */
1100 2, /* scalar_to_vec_cost. */
1101 4, /* vec_align_load_cost. */
1102 4, /* vec_unalign_load_cost. */
1103 4, /* vec_store_cost. */
1104 2, /* cond_taken_branch_cost. */
1105 1, /* cond_not_taken_branch_cost. */
1106 };
1107
1108
1109 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1110 very small blocks it is better to use loop. For large blocks, libcall
1111 can do nontemporary accesses and beat inline considerably. */
1112 static stringop_algs bdver3_memcpy[2] = {
1113 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1114 {-1, rep_prefix_4_byte, false}}},
1115 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1116 {-1, libcall, false}}}};
1117 static stringop_algs bdver3_memset[2] = {
1118 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1119 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1120 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1121 {-1, libcall, false}}}};
1122 struct processor_costs bdver3_cost = {
1123 COSTS_N_INSNS (1), /* cost of an add instruction */
1124 COSTS_N_INSNS (1), /* cost of a lea instruction */
1125 COSTS_N_INSNS (1), /* variable shift costs */
1126 COSTS_N_INSNS (1), /* constant shift costs */
1127 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1128 COSTS_N_INSNS (4), /* HI */
1129 COSTS_N_INSNS (4), /* SI */
1130 COSTS_N_INSNS (6), /* DI */
1131 COSTS_N_INSNS (6)}, /* other */
1132 0, /* cost of multiply per each bit set */
1133 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1134 COSTS_N_INSNS (35), /* HI */
1135 COSTS_N_INSNS (51), /* SI */
1136 COSTS_N_INSNS (83), /* DI */
1137 COSTS_N_INSNS (83)}, /* other */
1138 COSTS_N_INSNS (1), /* cost of movsx */
1139 COSTS_N_INSNS (1), /* cost of movzx */
1140 8, /* "large" insn */
1141 9, /* MOVE_RATIO */
1142 4, /* cost for loading QImode using movzbl */
1143 {5, 5, 4}, /* cost of loading integer registers
1144 in QImode, HImode and SImode.
1145 Relative to reg-reg move (2). */
1146 {4, 4, 4}, /* cost of storing integer registers */
1147 2, /* cost of reg,reg fld/fst */
1148 {5, 5, 12}, /* cost of loading fp registers
1149 in SFmode, DFmode and XFmode */
1150 {4, 4, 8}, /* cost of storing fp registers
1151 in SFmode, DFmode and XFmode */
1152 2, /* cost of moving MMX register */
1153 {4, 4}, /* cost of loading MMX registers
1154 in SImode and DImode */
1155 {4, 4}, /* cost of storing MMX registers
1156 in SImode and DImode */
1157 2, /* cost of moving SSE register */
1158 {4, 4, 4}, /* cost of loading SSE registers
1159 in SImode, DImode and TImode */
1160 {4, 4, 4}, /* cost of storing SSE registers
1161 in SImode, DImode and TImode */
1162 2, /* MMX or SSE register to integer */
1163 16, /* size of l1 cache. */
1164 2048, /* size of l2 cache. */
1165 64, /* size of prefetch block */
1166 /* New AMD processors never drop prefetches; if they cannot be performed
1167 immediately, they are queued. We set number of simultaneous prefetches
1168 to a large constant to reflect this (it probably is not a good idea not
1169 to limit number of prefetches at all, as their execution also takes some
1170 time). */
1171 100, /* number of parallel prefetches */
1172 2, /* Branch cost */
1173 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1179
1180 bdver3_memcpy,
1181 bdver3_memset,
1182 6, /* scalar_stmt_cost. */
1183 4, /* scalar load_cost. */
1184 4, /* scalar_store_cost. */
1185 6, /* vec_stmt_cost. */
1186 0, /* vec_to_scalar_cost. */
1187 2, /* scalar_to_vec_cost. */
1188 4, /* vec_align_load_cost. */
1189 4, /* vec_unalign_load_cost. */
1190 4, /* vec_store_cost. */
1191 2, /* cond_taken_branch_cost. */
1192 1, /* cond_not_taken_branch_cost. */
1193 };
1194
1195 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1196 very small blocks it is better to use loop. For large blocks, libcall
1197 can do nontemporary accesses and beat inline considerably. */
1198 static stringop_algs bdver4_memcpy[2] = {
1199 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1200 {-1, rep_prefix_4_byte, false}}},
1201 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1202 {-1, libcall, false}}}};
1203 static stringop_algs bdver4_memset[2] = {
1204 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1205 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1206 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1207 {-1, libcall, false}}}};
1208 struct processor_costs bdver4_cost = {
1209 COSTS_N_INSNS (1), /* cost of an add instruction */
1210 COSTS_N_INSNS (1), /* cost of a lea instruction */
1211 COSTS_N_INSNS (1), /* variable shift costs */
1212 COSTS_N_INSNS (1), /* constant shift costs */
1213 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1214 COSTS_N_INSNS (4), /* HI */
1215 COSTS_N_INSNS (4), /* SI */
1216 COSTS_N_INSNS (6), /* DI */
1217 COSTS_N_INSNS (6)}, /* other */
1218 0, /* cost of multiply per each bit set */
1219 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1220 COSTS_N_INSNS (35), /* HI */
1221 COSTS_N_INSNS (51), /* SI */
1222 COSTS_N_INSNS (83), /* DI */
1223 COSTS_N_INSNS (83)}, /* other */
1224 COSTS_N_INSNS (1), /* cost of movsx */
1225 COSTS_N_INSNS (1), /* cost of movzx */
1226 8, /* "large" insn */
1227 9, /* MOVE_RATIO */
1228 4, /* cost for loading QImode using movzbl */
1229 {5, 5, 4}, /* cost of loading integer registers
1230 in QImode, HImode and SImode.
1231 Relative to reg-reg move (2). */
1232 {4, 4, 4}, /* cost of storing integer registers */
1233 2, /* cost of reg,reg fld/fst */
1234 {5, 5, 12}, /* cost of loading fp registers
1235 in SFmode, DFmode and XFmode */
1236 {4, 4, 8}, /* cost of storing fp registers
1237 in SFmode, DFmode and XFmode */
1238 2, /* cost of moving MMX register */
1239 {4, 4}, /* cost of loading MMX registers
1240 in SImode and DImode */
1241 {4, 4}, /* cost of storing MMX registers
1242 in SImode and DImode */
1243 2, /* cost of moving SSE register */
1244 {4, 4, 4}, /* cost of loading SSE registers
1245 in SImode, DImode and TImode */
1246 {4, 4, 4}, /* cost of storing SSE registers
1247 in SImode, DImode and TImode */
1248 2, /* MMX or SSE register to integer */
1249 16, /* size of l1 cache. */
1250 2048, /* size of l2 cache. */
1251 64, /* size of prefetch block */
1252 /* New AMD processors never drop prefetches; if they cannot be performed
1253 immediately, they are queued. We set number of simultaneous prefetches
1254 to a large constant to reflect this (it probably is not a good idea not
1255 to limit number of prefetches at all, as their execution also takes some
1256 time). */
1257 100, /* number of parallel prefetches */
1258 2, /* Branch cost */
1259 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1260 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1261 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1262 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1263 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1264 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1265
1266 bdver4_memcpy,
1267 bdver4_memset,
1268 6, /* scalar_stmt_cost. */
1269 4, /* scalar load_cost. */
1270 4, /* scalar_store_cost. */
1271 6, /* vec_stmt_cost. */
1272 0, /* vec_to_scalar_cost. */
1273 2, /* scalar_to_vec_cost. */
1274 4, /* vec_align_load_cost. */
1275 4, /* vec_unalign_load_cost. */
1276 4, /* vec_store_cost. */
1277 2, /* cond_taken_branch_cost. */
1278 1, /* cond_not_taken_branch_cost. */
1279 };
1280
1281 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1282 very small blocks it is better to use loop. For large blocks, libcall can
1283 do nontemporary accesses and beat inline considerably. */
1284 static stringop_algs btver1_memcpy[2] = {
1285 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1286 {-1, rep_prefix_4_byte, false}}},
1287 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1288 {-1, libcall, false}}}};
1289 static stringop_algs btver1_memset[2] = {
1290 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1291 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1292 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1293 {-1, libcall, false}}}};
1294 const struct processor_costs btver1_cost = {
1295 COSTS_N_INSNS (1), /* cost of an add instruction */
1296 COSTS_N_INSNS (2), /* cost of a lea instruction */
1297 COSTS_N_INSNS (1), /* variable shift costs */
1298 COSTS_N_INSNS (1), /* constant shift costs */
1299 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1300 COSTS_N_INSNS (4), /* HI */
1301 COSTS_N_INSNS (3), /* SI */
1302 COSTS_N_INSNS (4), /* DI */
1303 COSTS_N_INSNS (5)}, /* other */
1304 0, /* cost of multiply per each bit set */
1305 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1306 COSTS_N_INSNS (35), /* HI */
1307 COSTS_N_INSNS (51), /* SI */
1308 COSTS_N_INSNS (83), /* DI */
1309 COSTS_N_INSNS (83)}, /* other */
1310 COSTS_N_INSNS (1), /* cost of movsx */
1311 COSTS_N_INSNS (1), /* cost of movzx */
1312 8, /* "large" insn */
1313 9, /* MOVE_RATIO */
1314 4, /* cost for loading QImode using movzbl */
1315 {3, 4, 3}, /* cost of loading integer registers
1316 in QImode, HImode and SImode.
1317 Relative to reg-reg move (2). */
1318 {3, 4, 3}, /* cost of storing integer registers */
1319 4, /* cost of reg,reg fld/fst */
1320 {4, 4, 12}, /* cost of loading fp registers
1321 in SFmode, DFmode and XFmode */
1322 {6, 6, 8}, /* cost of storing fp registers
1323 in SFmode, DFmode and XFmode */
1324 2, /* cost of moving MMX register */
1325 {3, 3}, /* cost of loading MMX registers
1326 in SImode and DImode */
1327 {4, 4}, /* cost of storing MMX registers
1328 in SImode and DImode */
1329 2, /* cost of moving SSE register */
1330 {4, 4, 3}, /* cost of loading SSE registers
1331 in SImode, DImode and TImode */
1332 {4, 4, 5}, /* cost of storing SSE registers
1333 in SImode, DImode and TImode */
1334 3, /* MMX or SSE register to integer */
1335 /* On K8:
1336 MOVD reg64, xmmreg Double FSTORE 4
1337 MOVD reg32, xmmreg Double FSTORE 4
1338 On AMDFAM10:
1339 MOVD reg64, xmmreg Double FADD 3
1340 1/1 1/1
1341 MOVD reg32, xmmreg Double FADD 3
1342 1/1 1/1 */
1343 32, /* size of l1 cache. */
1344 512, /* size of l2 cache. */
1345 64, /* size of prefetch block */
1346 100, /* number of parallel prefetches */
1347 2, /* Branch cost */
1348 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1349 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1350 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1351 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1352 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1353 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1354
1355 btver1_memcpy,
1356 btver1_memset,
1357 4, /* scalar_stmt_cost. */
1358 2, /* scalar load_cost. */
1359 2, /* scalar_store_cost. */
1360 6, /* vec_stmt_cost. */
1361 0, /* vec_to_scalar_cost. */
1362 2, /* scalar_to_vec_cost. */
1363 2, /* vec_align_load_cost. */
1364 2, /* vec_unalign_load_cost. */
1365 2, /* vec_store_cost. */
1366 2, /* cond_taken_branch_cost. */
1367 1, /* cond_not_taken_branch_cost. */
1368 };
1369
1370 static stringop_algs btver2_memcpy[2] = {
1371 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1372 {-1, rep_prefix_4_byte, false}}},
1373 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1374 {-1, libcall, false}}}};
1375 static stringop_algs btver2_memset[2] = {
1376 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1377 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1378 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1379 {-1, libcall, false}}}};
1380 const struct processor_costs btver2_cost = {
1381 COSTS_N_INSNS (1), /* cost of an add instruction */
1382 COSTS_N_INSNS (2), /* cost of a lea instruction */
1383 COSTS_N_INSNS (1), /* variable shift costs */
1384 COSTS_N_INSNS (1), /* constant shift costs */
1385 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1386 COSTS_N_INSNS (4), /* HI */
1387 COSTS_N_INSNS (3), /* SI */
1388 COSTS_N_INSNS (4), /* DI */
1389 COSTS_N_INSNS (5)}, /* other */
1390 0, /* cost of multiply per each bit set */
1391 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1392 COSTS_N_INSNS (35), /* HI */
1393 COSTS_N_INSNS (51), /* SI */
1394 COSTS_N_INSNS (83), /* DI */
1395 COSTS_N_INSNS (83)}, /* other */
1396 COSTS_N_INSNS (1), /* cost of movsx */
1397 COSTS_N_INSNS (1), /* cost of movzx */
1398 8, /* "large" insn */
1399 9, /* MOVE_RATIO */
1400 4, /* cost for loading QImode using movzbl */
1401 {3, 4, 3}, /* cost of loading integer registers
1402 in QImode, HImode and SImode.
1403 Relative to reg-reg move (2). */
1404 {3, 4, 3}, /* cost of storing integer registers */
1405 4, /* cost of reg,reg fld/fst */
1406 {4, 4, 12}, /* cost of loading fp registers
1407 in SFmode, DFmode and XFmode */
1408 {6, 6, 8}, /* cost of storing fp registers
1409 in SFmode, DFmode and XFmode */
1410 2, /* cost of moving MMX register */
1411 {3, 3}, /* cost of loading MMX registers
1412 in SImode and DImode */
1413 {4, 4}, /* cost of storing MMX registers
1414 in SImode and DImode */
1415 2, /* cost of moving SSE register */
1416 {4, 4, 3}, /* cost of loading SSE registers
1417 in SImode, DImode and TImode */
1418 {4, 4, 5}, /* cost of storing SSE registers
1419 in SImode, DImode and TImode */
1420 3, /* MMX or SSE register to integer */
1421 /* On K8:
1422 MOVD reg64, xmmreg Double FSTORE 4
1423 MOVD reg32, xmmreg Double FSTORE 4
1424 On AMDFAM10:
1425 MOVD reg64, xmmreg Double FADD 3
1426 1/1 1/1
1427 MOVD reg32, xmmreg Double FADD 3
1428 1/1 1/1 */
1429 32, /* size of l1 cache. */
1430 2048, /* size of l2 cache. */
1431 64, /* size of prefetch block */
1432 100, /* number of parallel prefetches */
1433 2, /* Branch cost */
1434 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1435 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1436 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1437 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1438 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1439 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1440 btver2_memcpy,
1441 btver2_memset,
1442 4, /* scalar_stmt_cost. */
1443 2, /* scalar load_cost. */
1444 2, /* scalar_store_cost. */
1445 6, /* vec_stmt_cost. */
1446 0, /* vec_to_scalar_cost. */
1447 2, /* scalar_to_vec_cost. */
1448 2, /* vec_align_load_cost. */
1449 2, /* vec_unalign_load_cost. */
1450 2, /* vec_store_cost. */
1451 2, /* cond_taken_branch_cost. */
1452 1, /* cond_not_taken_branch_cost. */
1453 };
1454
1455 static stringop_algs pentium4_memcpy[2] = {
1456 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1457 DUMMY_STRINGOP_ALGS};
1458 static stringop_algs pentium4_memset[2] = {
1459 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1460 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1461 DUMMY_STRINGOP_ALGS};
1462
1463 static const
1464 struct processor_costs pentium4_cost = {
1465 COSTS_N_INSNS (1), /* cost of an add instruction */
1466 COSTS_N_INSNS (3), /* cost of a lea instruction */
1467 COSTS_N_INSNS (4), /* variable shift costs */
1468 COSTS_N_INSNS (4), /* constant shift costs */
1469 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1470 COSTS_N_INSNS (15), /* HI */
1471 COSTS_N_INSNS (15), /* SI */
1472 COSTS_N_INSNS (15), /* DI */
1473 COSTS_N_INSNS (15)}, /* other */
1474 0, /* cost of multiply per each bit set */
1475 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1476 COSTS_N_INSNS (56), /* HI */
1477 COSTS_N_INSNS (56), /* SI */
1478 COSTS_N_INSNS (56), /* DI */
1479 COSTS_N_INSNS (56)}, /* other */
1480 COSTS_N_INSNS (1), /* cost of movsx */
1481 COSTS_N_INSNS (1), /* cost of movzx */
1482 16, /* "large" insn */
1483 6, /* MOVE_RATIO */
1484 2, /* cost for loading QImode using movzbl */
1485 {4, 5, 4}, /* cost of loading integer registers
1486 in QImode, HImode and SImode.
1487 Relative to reg-reg move (2). */
1488 {2, 3, 2}, /* cost of storing integer registers */
1489 2, /* cost of reg,reg fld/fst */
1490 {2, 2, 6}, /* cost of loading fp registers
1491 in SFmode, DFmode and XFmode */
1492 {4, 4, 6}, /* cost of storing fp registers
1493 in SFmode, DFmode and XFmode */
1494 2, /* cost of moving MMX register */
1495 {2, 2}, /* cost of loading MMX registers
1496 in SImode and DImode */
1497 {2, 2}, /* cost of storing MMX registers
1498 in SImode and DImode */
1499 12, /* cost of moving SSE register */
1500 {12, 12, 12}, /* cost of loading SSE registers
1501 in SImode, DImode and TImode */
1502 {2, 2, 8}, /* cost of storing SSE registers
1503 in SImode, DImode and TImode */
1504 10, /* MMX or SSE register to integer */
1505 8, /* size of l1 cache. */
1506 256, /* size of l2 cache. */
1507 64, /* size of prefetch block */
1508 6, /* number of parallel prefetches */
1509 2, /* Branch cost */
1510 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1511 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1512 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1513 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1514 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1515 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1516 pentium4_memcpy,
1517 pentium4_memset,
1518 1, /* scalar_stmt_cost. */
1519 1, /* scalar load_cost. */
1520 1, /* scalar_store_cost. */
1521 1, /* vec_stmt_cost. */
1522 1, /* vec_to_scalar_cost. */
1523 1, /* scalar_to_vec_cost. */
1524 1, /* vec_align_load_cost. */
1525 2, /* vec_unalign_load_cost. */
1526 1, /* vec_store_cost. */
1527 3, /* cond_taken_branch_cost. */
1528 1, /* cond_not_taken_branch_cost. */
1529 };
1530
1531 static stringop_algs nocona_memcpy[2] = {
1532 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1533 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1534 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1535
1536 static stringop_algs nocona_memset[2] = {
1537 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1538 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1539 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1540 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1541
1542 static const
1543 struct processor_costs nocona_cost = {
1544 COSTS_N_INSNS (1), /* cost of an add instruction */
1545 COSTS_N_INSNS (1), /* cost of a lea instruction */
1546 COSTS_N_INSNS (1), /* variable shift costs */
1547 COSTS_N_INSNS (1), /* constant shift costs */
1548 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1549 COSTS_N_INSNS (10), /* HI */
1550 COSTS_N_INSNS (10), /* SI */
1551 COSTS_N_INSNS (10), /* DI */
1552 COSTS_N_INSNS (10)}, /* other */
1553 0, /* cost of multiply per each bit set */
1554 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1555 COSTS_N_INSNS (66), /* HI */
1556 COSTS_N_INSNS (66), /* SI */
1557 COSTS_N_INSNS (66), /* DI */
1558 COSTS_N_INSNS (66)}, /* other */
1559 COSTS_N_INSNS (1), /* cost of movsx */
1560 COSTS_N_INSNS (1), /* cost of movzx */
1561 16, /* "large" insn */
1562 17, /* MOVE_RATIO */
1563 4, /* cost for loading QImode using movzbl */
1564 {4, 4, 4}, /* cost of loading integer registers
1565 in QImode, HImode and SImode.
1566 Relative to reg-reg move (2). */
1567 {4, 4, 4}, /* cost of storing integer registers */
1568 3, /* cost of reg,reg fld/fst */
1569 {12, 12, 12}, /* cost of loading fp registers
1570 in SFmode, DFmode and XFmode */
1571 {4, 4, 4}, /* cost of storing fp registers
1572 in SFmode, DFmode and XFmode */
1573 6, /* cost of moving MMX register */
1574 {12, 12}, /* cost of loading MMX registers
1575 in SImode and DImode */
1576 {12, 12}, /* cost of storing MMX registers
1577 in SImode and DImode */
1578 6, /* cost of moving SSE register */
1579 {12, 12, 12}, /* cost of loading SSE registers
1580 in SImode, DImode and TImode */
1581 {12, 12, 12}, /* cost of storing SSE registers
1582 in SImode, DImode and TImode */
1583 8, /* MMX or SSE register to integer */
1584 8, /* size of l1 cache. */
1585 1024, /* size of l2 cache. */
1586 64, /* size of prefetch block */
1587 8, /* number of parallel prefetches */
1588 1, /* Branch cost */
1589 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1590 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1591 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1592 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1593 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1594 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1595 nocona_memcpy,
1596 nocona_memset,
1597 1, /* scalar_stmt_cost. */
1598 1, /* scalar load_cost. */
1599 1, /* scalar_store_cost. */
1600 1, /* vec_stmt_cost. */
1601 1, /* vec_to_scalar_cost. */
1602 1, /* scalar_to_vec_cost. */
1603 1, /* vec_align_load_cost. */
1604 2, /* vec_unalign_load_cost. */
1605 1, /* vec_store_cost. */
1606 3, /* cond_taken_branch_cost. */
1607 1, /* cond_not_taken_branch_cost. */
1608 };
1609
1610 static stringop_algs atom_memcpy[2] = {
1611 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1612 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1613 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1614 static stringop_algs atom_memset[2] = {
1615 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1616 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1617 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1618 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1619 static const
1620 struct processor_costs atom_cost = {
1621 COSTS_N_INSNS (1), /* cost of an add instruction */
1622 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1623 COSTS_N_INSNS (1), /* variable shift costs */
1624 COSTS_N_INSNS (1), /* constant shift costs */
1625 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1626 COSTS_N_INSNS (4), /* HI */
1627 COSTS_N_INSNS (3), /* SI */
1628 COSTS_N_INSNS (4), /* DI */
1629 COSTS_N_INSNS (2)}, /* other */
1630 0, /* cost of multiply per each bit set */
1631 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1632 COSTS_N_INSNS (26), /* HI */
1633 COSTS_N_INSNS (42), /* SI */
1634 COSTS_N_INSNS (74), /* DI */
1635 COSTS_N_INSNS (74)}, /* other */
1636 COSTS_N_INSNS (1), /* cost of movsx */
1637 COSTS_N_INSNS (1), /* cost of movzx */
1638 8, /* "large" insn */
1639 17, /* MOVE_RATIO */
1640 4, /* cost for loading QImode using movzbl */
1641 {4, 4, 4}, /* cost of loading integer registers
1642 in QImode, HImode and SImode.
1643 Relative to reg-reg move (2). */
1644 {4, 4, 4}, /* cost of storing integer registers */
1645 4, /* cost of reg,reg fld/fst */
1646 {12, 12, 12}, /* cost of loading fp registers
1647 in SFmode, DFmode and XFmode */
1648 {6, 6, 8}, /* cost of storing fp registers
1649 in SFmode, DFmode and XFmode */
1650 2, /* cost of moving MMX register */
1651 {8, 8}, /* cost of loading MMX registers
1652 in SImode and DImode */
1653 {8, 8}, /* cost of storing MMX registers
1654 in SImode and DImode */
1655 2, /* cost of moving SSE register */
1656 {8, 8, 8}, /* cost of loading SSE registers
1657 in SImode, DImode and TImode */
1658 {8, 8, 8}, /* cost of storing SSE registers
1659 in SImode, DImode and TImode */
1660 5, /* MMX or SSE register to integer */
1661 32, /* size of l1 cache. */
1662 256, /* size of l2 cache. */
1663 64, /* size of prefetch block */
1664 6, /* number of parallel prefetches */
1665 3, /* Branch cost */
1666 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1667 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1668 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1669 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1670 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1671 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1672 atom_memcpy,
1673 atom_memset,
1674 1, /* scalar_stmt_cost. */
1675 1, /* scalar load_cost. */
1676 1, /* scalar_store_cost. */
1677 1, /* vec_stmt_cost. */
1678 1, /* vec_to_scalar_cost. */
1679 1, /* scalar_to_vec_cost. */
1680 1, /* vec_align_load_cost. */
1681 2, /* vec_unalign_load_cost. */
1682 1, /* vec_store_cost. */
1683 3, /* cond_taken_branch_cost. */
1684 1, /* cond_not_taken_branch_cost. */
1685 };
1686
1687 static stringop_algs slm_memcpy[2] = {
1688 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1689 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1690 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1691 static stringop_algs slm_memset[2] = {
1692 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1693 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1694 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1695 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1696 static const
1697 struct processor_costs slm_cost = {
1698 COSTS_N_INSNS (1), /* cost of an add instruction */
1699 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1700 COSTS_N_INSNS (1), /* variable shift costs */
1701 COSTS_N_INSNS (1), /* constant shift costs */
1702 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1703 COSTS_N_INSNS (3), /* HI */
1704 COSTS_N_INSNS (3), /* SI */
1705 COSTS_N_INSNS (4), /* DI */
1706 COSTS_N_INSNS (2)}, /* other */
1707 0, /* cost of multiply per each bit set */
1708 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1709 COSTS_N_INSNS (26), /* HI */
1710 COSTS_N_INSNS (42), /* SI */
1711 COSTS_N_INSNS (74), /* DI */
1712 COSTS_N_INSNS (74)}, /* other */
1713 COSTS_N_INSNS (1), /* cost of movsx */
1714 COSTS_N_INSNS (1), /* cost of movzx */
1715 8, /* "large" insn */
1716 17, /* MOVE_RATIO */
1717 4, /* cost for loading QImode using movzbl */
1718 {4, 4, 4}, /* cost of loading integer registers
1719 in QImode, HImode and SImode.
1720 Relative to reg-reg move (2). */
1721 {4, 4, 4}, /* cost of storing integer registers */
1722 4, /* cost of reg,reg fld/fst */
1723 {12, 12, 12}, /* cost of loading fp registers
1724 in SFmode, DFmode and XFmode */
1725 {6, 6, 8}, /* cost of storing fp registers
1726 in SFmode, DFmode and XFmode */
1727 2, /* cost of moving MMX register */
1728 {8, 8}, /* cost of loading MMX registers
1729 in SImode and DImode */
1730 {8, 8}, /* cost of storing MMX registers
1731 in SImode and DImode */
1732 2, /* cost of moving SSE register */
1733 {8, 8, 8}, /* cost of loading SSE registers
1734 in SImode, DImode and TImode */
1735 {8, 8, 8}, /* cost of storing SSE registers
1736 in SImode, DImode and TImode */
1737 5, /* MMX or SSE register to integer */
1738 32, /* size of l1 cache. */
1739 256, /* size of l2 cache. */
1740 64, /* size of prefetch block */
1741 6, /* number of parallel prefetches */
1742 3, /* Branch cost */
1743 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1744 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1745 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1746 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1747 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1748 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1749 slm_memcpy,
1750 slm_memset,
1751 1, /* scalar_stmt_cost. */
1752 1, /* scalar load_cost. */
1753 1, /* scalar_store_cost. */
1754 1, /* vec_stmt_cost. */
1755 4, /* vec_to_scalar_cost. */
1756 1, /* scalar_to_vec_cost. */
1757 1, /* vec_align_load_cost. */
1758 2, /* vec_unalign_load_cost. */
1759 1, /* vec_store_cost. */
1760 3, /* cond_taken_branch_cost. */
1761 1, /* cond_not_taken_branch_cost. */
1762 };
1763
1764 static stringop_algs intel_memcpy[2] = {
1765 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1766 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1767 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1768 static stringop_algs intel_memset[2] = {
1769 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1770 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1771 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1772 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1773 static const
1774 struct processor_costs intel_cost = {
1775 COSTS_N_INSNS (1), /* cost of an add instruction */
1776 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1777 COSTS_N_INSNS (1), /* variable shift costs */
1778 COSTS_N_INSNS (1), /* constant shift costs */
1779 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1780 COSTS_N_INSNS (3), /* HI */
1781 COSTS_N_INSNS (3), /* SI */
1782 COSTS_N_INSNS (4), /* DI */
1783 COSTS_N_INSNS (2)}, /* other */
1784 0, /* cost of multiply per each bit set */
1785 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1786 COSTS_N_INSNS (26), /* HI */
1787 COSTS_N_INSNS (42), /* SI */
1788 COSTS_N_INSNS (74), /* DI */
1789 COSTS_N_INSNS (74)}, /* other */
1790 COSTS_N_INSNS (1), /* cost of movsx */
1791 COSTS_N_INSNS (1), /* cost of movzx */
1792 8, /* "large" insn */
1793 17, /* MOVE_RATIO */
1794 4, /* cost for loading QImode using movzbl */
1795 {4, 4, 4}, /* cost of loading integer registers
1796 in QImode, HImode and SImode.
1797 Relative to reg-reg move (2). */
1798 {4, 4, 4}, /* cost of storing integer registers */
1799 4, /* cost of reg,reg fld/fst */
1800 {12, 12, 12}, /* cost of loading fp registers
1801 in SFmode, DFmode and XFmode */
1802 {6, 6, 8}, /* cost of storing fp registers
1803 in SFmode, DFmode and XFmode */
1804 2, /* cost of moving MMX register */
1805 {8, 8}, /* cost of loading MMX registers
1806 in SImode and DImode */
1807 {8, 8}, /* cost of storing MMX registers
1808 in SImode and DImode */
1809 2, /* cost of moving SSE register */
1810 {8, 8, 8}, /* cost of loading SSE registers
1811 in SImode, DImode and TImode */
1812 {8, 8, 8}, /* cost of storing SSE registers
1813 in SImode, DImode and TImode */
1814 5, /* MMX or SSE register to integer */
1815 32, /* size of l1 cache. */
1816 256, /* size of l2 cache. */
1817 64, /* size of prefetch block */
1818 6, /* number of parallel prefetches */
1819 3, /* Branch cost */
1820 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1821 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1822 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1823 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1824 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1825 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1826 intel_memcpy,
1827 intel_memset,
1828 1, /* scalar_stmt_cost. */
1829 1, /* scalar load_cost. */
1830 1, /* scalar_store_cost. */
1831 1, /* vec_stmt_cost. */
1832 4, /* vec_to_scalar_cost. */
1833 1, /* scalar_to_vec_cost. */
1834 1, /* vec_align_load_cost. */
1835 2, /* vec_unalign_load_cost. */
1836 1, /* vec_store_cost. */
1837 3, /* cond_taken_branch_cost. */
1838 1, /* cond_not_taken_branch_cost. */
1839 };
1840
1841 /* Generic should produce code tuned for Core-i7 (and newer chips)
1842 and btver1 (and newer chips). */
1843
1844 static stringop_algs generic_memcpy[2] = {
1845 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1846 {-1, libcall, false}}},
1847 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1848 {-1, libcall, false}}}};
1849 static stringop_algs generic_memset[2] = {
1850 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1851 {-1, libcall, false}}},
1852 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1853 {-1, libcall, false}}}};
1854 static const
1855 struct processor_costs generic_cost = {
1856 COSTS_N_INSNS (1), /* cost of an add instruction */
1857 /* On all chips taken into consideration lea is 2 cycles and more. With
1858 this cost however our current implementation of synth_mult results in
1859 use of unnecessary temporary registers causing regression on several
1860 SPECfp benchmarks. */
1861 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1862 COSTS_N_INSNS (1), /* variable shift costs */
1863 COSTS_N_INSNS (1), /* constant shift costs */
1864 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1865 COSTS_N_INSNS (4), /* HI */
1866 COSTS_N_INSNS (3), /* SI */
1867 COSTS_N_INSNS (4), /* DI */
1868 COSTS_N_INSNS (2)}, /* other */
1869 0, /* cost of multiply per each bit set */
1870 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1871 COSTS_N_INSNS (26), /* HI */
1872 COSTS_N_INSNS (42), /* SI */
1873 COSTS_N_INSNS (74), /* DI */
1874 COSTS_N_INSNS (74)}, /* other */
1875 COSTS_N_INSNS (1), /* cost of movsx */
1876 COSTS_N_INSNS (1), /* cost of movzx */
1877 8, /* "large" insn */
1878 17, /* MOVE_RATIO */
1879 4, /* cost for loading QImode using movzbl */
1880 {4, 4, 4}, /* cost of loading integer registers
1881 in QImode, HImode and SImode.
1882 Relative to reg-reg move (2). */
1883 {4, 4, 4}, /* cost of storing integer registers */
1884 4, /* cost of reg,reg fld/fst */
1885 {12, 12, 12}, /* cost of loading fp registers
1886 in SFmode, DFmode and XFmode */
1887 {6, 6, 8}, /* cost of storing fp registers
1888 in SFmode, DFmode and XFmode */
1889 2, /* cost of moving MMX register */
1890 {8, 8}, /* cost of loading MMX registers
1891 in SImode and DImode */
1892 {8, 8}, /* cost of storing MMX registers
1893 in SImode and DImode */
1894 2, /* cost of moving SSE register */
1895 {8, 8, 8}, /* cost of loading SSE registers
1896 in SImode, DImode and TImode */
1897 {8, 8, 8}, /* cost of storing SSE registers
1898 in SImode, DImode and TImode */
1899 5, /* MMX or SSE register to integer */
1900 32, /* size of l1 cache. */
1901 512, /* size of l2 cache. */
1902 64, /* size of prefetch block */
1903 6, /* number of parallel prefetches */
1904 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1905 value is increased to perhaps more appropriate value of 5. */
1906 3, /* Branch cost */
1907 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1908 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1909 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1910 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1911 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1912 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1913 generic_memcpy,
1914 generic_memset,
1915 1, /* scalar_stmt_cost. */
1916 1, /* scalar load_cost. */
1917 1, /* scalar_store_cost. */
1918 1, /* vec_stmt_cost. */
1919 1, /* vec_to_scalar_cost. */
1920 1, /* scalar_to_vec_cost. */
1921 1, /* vec_align_load_cost. */
1922 2, /* vec_unalign_load_cost. */
1923 1, /* vec_store_cost. */
1924 3, /* cond_taken_branch_cost. */
1925 1, /* cond_not_taken_branch_cost. */
1926 };
1927
1928 /* core_cost should produce code tuned for Core familly of CPUs. */
1929 static stringop_algs core_memcpy[2] = {
1930 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1931 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1932 {-1, libcall, false}}}};
1933 static stringop_algs core_memset[2] = {
1934 {libcall, {{6, loop_1_byte, true},
1935 {24, loop, true},
1936 {8192, rep_prefix_4_byte, true},
1937 {-1, libcall, false}}},
1938 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1939 {-1, libcall, false}}}};
1940
1941 static const
1942 struct processor_costs core_cost = {
1943 COSTS_N_INSNS (1), /* cost of an add instruction */
1944 /* On all chips taken into consideration lea is 2 cycles and more. With
1945 this cost however our current implementation of synth_mult results in
1946 use of unnecessary temporary registers causing regression on several
1947 SPECfp benchmarks. */
1948 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1949 COSTS_N_INSNS (1), /* variable shift costs */
1950 COSTS_N_INSNS (1), /* constant shift costs */
1951 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1952 COSTS_N_INSNS (4), /* HI */
1953 COSTS_N_INSNS (3), /* SI */
1954 COSTS_N_INSNS (4), /* DI */
1955 COSTS_N_INSNS (2)}, /* other */
1956 0, /* cost of multiply per each bit set */
1957 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1958 COSTS_N_INSNS (26), /* HI */
1959 COSTS_N_INSNS (42), /* SI */
1960 COSTS_N_INSNS (74), /* DI */
1961 COSTS_N_INSNS (74)}, /* other */
1962 COSTS_N_INSNS (1), /* cost of movsx */
1963 COSTS_N_INSNS (1), /* cost of movzx */
1964 8, /* "large" insn */
1965 17, /* MOVE_RATIO */
1966 4, /* cost for loading QImode using movzbl */
1967 {4, 4, 4}, /* cost of loading integer registers
1968 in QImode, HImode and SImode.
1969 Relative to reg-reg move (2). */
1970 {4, 4, 4}, /* cost of storing integer registers */
1971 4, /* cost of reg,reg fld/fst */
1972 {12, 12, 12}, /* cost of loading fp registers
1973 in SFmode, DFmode and XFmode */
1974 {6, 6, 8}, /* cost of storing fp registers
1975 in SFmode, DFmode and XFmode */
1976 2, /* cost of moving MMX register */
1977 {8, 8}, /* cost of loading MMX registers
1978 in SImode and DImode */
1979 {8, 8}, /* cost of storing MMX registers
1980 in SImode and DImode */
1981 2, /* cost of moving SSE register */
1982 {8, 8, 8}, /* cost of loading SSE registers
1983 in SImode, DImode and TImode */
1984 {8, 8, 8}, /* cost of storing SSE registers
1985 in SImode, DImode and TImode */
1986 5, /* MMX or SSE register to integer */
1987 64, /* size of l1 cache. */
1988 512, /* size of l2 cache. */
1989 64, /* size of prefetch block */
1990 6, /* number of parallel prefetches */
1991 /* FIXME perhaps more appropriate value is 5. */
1992 3, /* Branch cost */
1993 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1994 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1995 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1996 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1997 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1998 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1999 core_memcpy,
2000 core_memset,
2001 1, /* scalar_stmt_cost. */
2002 1, /* scalar load_cost. */
2003 1, /* scalar_store_cost. */
2004 1, /* vec_stmt_cost. */
2005 1, /* vec_to_scalar_cost. */
2006 1, /* scalar_to_vec_cost. */
2007 1, /* vec_align_load_cost. */
2008 2, /* vec_unalign_load_cost. */
2009 1, /* vec_store_cost. */
2010 3, /* cond_taken_branch_cost. */
2011 1, /* cond_not_taken_branch_cost. */
2012 };
2013
2014
2015 /* Set by -mtune. */
2016 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2017
2018 /* Set by -mtune or -Os. */
2019 const struct processor_costs *ix86_cost = &pentium_cost;
2020
2021 /* Processor feature/optimization bitmasks. */
2022 #define m_386 (1<<PROCESSOR_I386)
2023 #define m_486 (1<<PROCESSOR_I486)
2024 #define m_PENT (1<<PROCESSOR_PENTIUM)
2025 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2026 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2027 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2028 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2029 #define m_CORE2 (1<<PROCESSOR_CORE2)
2030 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2031 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2032 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2033 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2034 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2035 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2036 #define m_INTEL (1<<PROCESSOR_INTEL)
2037
2038 #define m_GEODE (1<<PROCESSOR_GEODE)
2039 #define m_K6 (1<<PROCESSOR_K6)
2040 #define m_K6_GEODE (m_K6 | m_GEODE)
2041 #define m_K8 (1<<PROCESSOR_K8)
2042 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2043 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2044 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2045 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2046 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2047 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2048 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2049 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2050 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2051 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2052 #define m_BTVER (m_BTVER1 | m_BTVER2)
2053 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2054
2055 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2056
2057 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2058 #undef DEF_TUNE
2059 #define DEF_TUNE(tune, name, selector) name,
2060 #include "x86-tune.def"
2061 #undef DEF_TUNE
2062 };
2063
2064 /* Feature tests against the various tunings. */
2065 unsigned char ix86_tune_features[X86_TUNE_LAST];
2066
2067 /* Feature tests against the various tunings used to create ix86_tune_features
2068 based on the processor mask. */
2069 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2070 #undef DEF_TUNE
2071 #define DEF_TUNE(tune, name, selector) selector,
2072 #include "x86-tune.def"
2073 #undef DEF_TUNE
2074 };
2075
2076 /* Feature tests against the various architecture variations. */
2077 unsigned char ix86_arch_features[X86_ARCH_LAST];
2078
2079 /* Feature tests against the various architecture variations, used to create
2080 ix86_arch_features based on the processor mask. */
2081 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2082 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2083 ~(m_386 | m_486 | m_PENT | m_K6),
2084
2085 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2086 ~m_386,
2087
2088 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2089 ~(m_386 | m_486),
2090
2091 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2092 ~m_386,
2093
2094 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2095 ~m_386,
2096 };
2097
2098 /* In case the average insn count for single function invocation is
2099 lower than this constant, emit fast (but longer) prologue and
2100 epilogue code. */
2101 #define FAST_PROLOGUE_INSN_COUNT 20
2102
2103 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2104 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2105 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2106 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2107
2108 /* Array of the smallest class containing reg number REGNO, indexed by
2109 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2110
2111 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2112 {
2113 /* ax, dx, cx, bx */
2114 AREG, DREG, CREG, BREG,
2115 /* si, di, bp, sp */
2116 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2117 /* FP registers */
2118 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2119 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2120 /* arg pointer */
2121 NON_Q_REGS,
2122 /* flags, fpsr, fpcr, frame */
2123 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2124 /* SSE registers */
2125 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2126 SSE_REGS, SSE_REGS,
2127 /* MMX registers */
2128 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2129 MMX_REGS, MMX_REGS,
2130 /* REX registers */
2131 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2132 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2133 /* SSE REX registers */
2134 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2135 SSE_REGS, SSE_REGS,
2136 /* AVX-512 SSE registers */
2137 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2138 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2139 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2140 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2141 /* Mask registers. */
2142 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2143 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2144 };
2145
2146 /* The "default" register map used in 32bit mode. */
2147
2148 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2149 {
2150 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2151 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2152 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2153 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2154 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2155 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2156 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2157 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2158 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2159 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2160 };
2161
2162 /* The "default" register map used in 64bit mode. */
2163
2164 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2165 {
2166 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2167 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2168 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2169 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2170 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2171 8,9,10,11,12,13,14,15, /* extended integer registers */
2172 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2173 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2174 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2175 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2176 };
2177
2178 /* Define the register numbers to be used in Dwarf debugging information.
2179 The SVR4 reference port C compiler uses the following register numbers
2180 in its Dwarf output code:
2181 0 for %eax (gcc regno = 0)
2182 1 for %ecx (gcc regno = 2)
2183 2 for %edx (gcc regno = 1)
2184 3 for %ebx (gcc regno = 3)
2185 4 for %esp (gcc regno = 7)
2186 5 for %ebp (gcc regno = 6)
2187 6 for %esi (gcc regno = 4)
2188 7 for %edi (gcc regno = 5)
2189 The following three DWARF register numbers are never generated by
2190 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2191 believes these numbers have these meanings.
2192 8 for %eip (no gcc equivalent)
2193 9 for %eflags (gcc regno = 17)
2194 10 for %trapno (no gcc equivalent)
2195 It is not at all clear how we should number the FP stack registers
2196 for the x86 architecture. If the version of SDB on x86/svr4 were
2197 a bit less brain dead with respect to floating-point then we would
2198 have a precedent to follow with respect to DWARF register numbers
2199 for x86 FP registers, but the SDB on x86/svr4 is so completely
2200 broken with respect to FP registers that it is hardly worth thinking
2201 of it as something to strive for compatibility with.
2202 The version of x86/svr4 SDB I have at the moment does (partially)
2203 seem to believe that DWARF register number 11 is associated with
2204 the x86 register %st(0), but that's about all. Higher DWARF
2205 register numbers don't seem to be associated with anything in
2206 particular, and even for DWARF regno 11, SDB only seems to under-
2207 stand that it should say that a variable lives in %st(0) (when
2208 asked via an `=' command) if we said it was in DWARF regno 11,
2209 but SDB still prints garbage when asked for the value of the
2210 variable in question (via a `/' command).
2211 (Also note that the labels SDB prints for various FP stack regs
2212 when doing an `x' command are all wrong.)
2213 Note that these problems generally don't affect the native SVR4
2214 C compiler because it doesn't allow the use of -O with -g and
2215 because when it is *not* optimizing, it allocates a memory
2216 location for each floating-point variable, and the memory
2217 location is what gets described in the DWARF AT_location
2218 attribute for the variable in question.
2219 Regardless of the severe mental illness of the x86/svr4 SDB, we
2220 do something sensible here and we use the following DWARF
2221 register numbers. Note that these are all stack-top-relative
2222 numbers.
2223 11 for %st(0) (gcc regno = 8)
2224 12 for %st(1) (gcc regno = 9)
2225 13 for %st(2) (gcc regno = 10)
2226 14 for %st(3) (gcc regno = 11)
2227 15 for %st(4) (gcc regno = 12)
2228 16 for %st(5) (gcc regno = 13)
2229 17 for %st(6) (gcc regno = 14)
2230 18 for %st(7) (gcc regno = 15)
2231 */
2232 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2233 {
2234 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2235 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2236 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2237 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2238 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2239 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2240 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2241 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2242 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2243 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2244 };
2245
2246 /* Define parameter passing and return registers. */
2247
2248 static int const x86_64_int_parameter_registers[6] =
2249 {
2250 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2251 };
2252
2253 static int const x86_64_ms_abi_int_parameter_registers[4] =
2254 {
2255 CX_REG, DX_REG, R8_REG, R9_REG
2256 };
2257
2258 static int const x86_64_int_return_registers[4] =
2259 {
2260 AX_REG, DX_REG, DI_REG, SI_REG
2261 };
2262
2263 /* Additional registers that are clobbered by SYSV calls. */
2264
2265 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2266 {
2267 SI_REG, DI_REG,
2268 XMM6_REG, XMM7_REG,
2269 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2270 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2271 };
2272
2273 /* Define the structure for the machine field in struct function. */
2274
2275 struct GTY(()) stack_local_entry {
2276 unsigned short mode;
2277 unsigned short n;
2278 rtx rtl;
2279 struct stack_local_entry *next;
2280 };
2281
2282 /* Structure describing stack frame layout.
2283 Stack grows downward:
2284
2285 [arguments]
2286 <- ARG_POINTER
2287 saved pc
2288
2289 saved static chain if ix86_static_chain_on_stack
2290
2291 saved frame pointer if frame_pointer_needed
2292 <- HARD_FRAME_POINTER
2293 [saved regs]
2294 <- regs_save_offset
2295 [padding0]
2296
2297 [saved SSE regs]
2298 <- sse_regs_save_offset
2299 [padding1] |
2300 | <- FRAME_POINTER
2301 [va_arg registers] |
2302 |
2303 [frame] |
2304 |
2305 [padding2] | = to_allocate
2306 <- STACK_POINTER
2307 */
2308 struct ix86_frame
2309 {
2310 int nsseregs;
2311 int nregs;
2312 int va_arg_size;
2313 int red_zone_size;
2314 int outgoing_arguments_size;
2315
2316 /* The offsets relative to ARG_POINTER. */
2317 HOST_WIDE_INT frame_pointer_offset;
2318 HOST_WIDE_INT hard_frame_pointer_offset;
2319 HOST_WIDE_INT stack_pointer_offset;
2320 HOST_WIDE_INT hfp_save_offset;
2321 HOST_WIDE_INT reg_save_offset;
2322 HOST_WIDE_INT sse_reg_save_offset;
2323
2324 /* When save_regs_using_mov is set, emit prologue using
2325 move instead of push instructions. */
2326 bool save_regs_using_mov;
2327 };
2328
2329 /* Which cpu are we scheduling for. */
2330 enum attr_cpu ix86_schedule;
2331
2332 /* Which cpu are we optimizing for. */
2333 enum processor_type ix86_tune;
2334
2335 /* Which instruction set architecture to use. */
2336 enum processor_type ix86_arch;
2337
2338 /* True if processor has SSE prefetch instruction. */
2339 unsigned char x86_prefetch_sse;
2340
2341 /* -mstackrealign option */
2342 static const char ix86_force_align_arg_pointer_string[]
2343 = "force_align_arg_pointer";
2344
2345 static rtx (*ix86_gen_leave) (void);
2346 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2347 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2348 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2349 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2350 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2351 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2352 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2353 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2354 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2355 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2356 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2357
2358 /* Preferred alignment for stack boundary in bits. */
2359 unsigned int ix86_preferred_stack_boundary;
2360
2361 /* Alignment for incoming stack boundary in bits specified at
2362 command line. */
2363 static unsigned int ix86_user_incoming_stack_boundary;
2364
2365 /* Default alignment for incoming stack boundary in bits. */
2366 static unsigned int ix86_default_incoming_stack_boundary;
2367
2368 /* Alignment for incoming stack boundary in bits. */
2369 unsigned int ix86_incoming_stack_boundary;
2370
2371 /* Calling abi specific va_list type nodes. */
2372 static GTY(()) tree sysv_va_list_type_node;
2373 static GTY(()) tree ms_va_list_type_node;
2374
2375 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2376 char internal_label_prefix[16];
2377 int internal_label_prefix_len;
2378
2379 /* Fence to use after loop using movnt. */
2380 tree x86_mfence;
2381
2382 /* Register class used for passing given 64bit part of the argument.
2383 These represent classes as documented by the PS ABI, with the exception
2384 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2385 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2386
2387 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2388 whenever possible (upper half does contain padding). */
2389 enum x86_64_reg_class
2390 {
2391 X86_64_NO_CLASS,
2392 X86_64_INTEGER_CLASS,
2393 X86_64_INTEGERSI_CLASS,
2394 X86_64_SSE_CLASS,
2395 X86_64_SSESF_CLASS,
2396 X86_64_SSEDF_CLASS,
2397 X86_64_SSEUP_CLASS,
2398 X86_64_X87_CLASS,
2399 X86_64_X87UP_CLASS,
2400 X86_64_COMPLEX_X87_CLASS,
2401 X86_64_MEMORY_CLASS
2402 };
2403
2404 #define MAX_CLASSES 8
2405
2406 /* Table of constants used by fldpi, fldln2, etc.... */
2407 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2408 static bool ext_80387_constants_init = 0;
2409
2410 \f
2411 static struct machine_function * ix86_init_machine_status (void);
2412 static rtx ix86_function_value (const_tree, const_tree, bool);
2413 static bool ix86_function_value_regno_p (const unsigned int);
2414 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2415 const_tree);
2416 static rtx ix86_static_chain (const_tree, bool);
2417 static int ix86_function_regparm (const_tree, const_tree);
2418 static void ix86_compute_frame_layout (struct ix86_frame *);
2419 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2420 rtx, rtx, int);
2421 static void ix86_add_new_builtins (HOST_WIDE_INT);
2422 static tree ix86_canonical_va_list_type (tree);
2423 static void predict_jump (int);
2424 static unsigned int split_stack_prologue_scratch_regno (void);
2425 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2426
2427 enum ix86_function_specific_strings
2428 {
2429 IX86_FUNCTION_SPECIFIC_ARCH,
2430 IX86_FUNCTION_SPECIFIC_TUNE,
2431 IX86_FUNCTION_SPECIFIC_MAX
2432 };
2433
2434 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2435 const char *, enum fpmath_unit, bool);
2436 static void ix86_function_specific_save (struct cl_target_option *,
2437 struct gcc_options *opts);
2438 static void ix86_function_specific_restore (struct gcc_options *opts,
2439 struct cl_target_option *);
2440 static void ix86_function_specific_print (FILE *, int,
2441 struct cl_target_option *);
2442 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2443 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2444 struct gcc_options *,
2445 struct gcc_options *,
2446 struct gcc_options *);
2447 static bool ix86_can_inline_p (tree, tree);
2448 static void ix86_set_current_function (tree);
2449 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2450
2451 static enum calling_abi ix86_function_abi (const_tree);
2452
2453 \f
2454 #ifndef SUBTARGET32_DEFAULT_CPU
2455 #define SUBTARGET32_DEFAULT_CPU "i386"
2456 #endif
2457
2458 /* Whether -mtune= or -march= were specified */
2459 static int ix86_tune_defaulted;
2460 static int ix86_arch_specified;
2461
2462 /* Vectorization library interface and handlers. */
2463 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2464
2465 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2466 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2467
2468 /* Processor target table, indexed by processor number */
2469 struct ptt
2470 {
2471 const char *const name; /* processor name */
2472 const struct processor_costs *cost; /* Processor costs */
2473 const int align_loop; /* Default alignments. */
2474 const int align_loop_max_skip;
2475 const int align_jump;
2476 const int align_jump_max_skip;
2477 const int align_func;
2478 };
2479
2480 /* This table must be in sync with enum processor_type in i386.h. */
2481 static const struct ptt processor_target_table[PROCESSOR_max] =
2482 {
2483 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2484 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2485 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2486 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2487 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2488 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2489 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2490 {"core2", &core_cost, 16, 10, 16, 10, 16},
2491 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2492 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2493 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2494 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2495 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2496 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2497 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2498 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2499 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2500 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2501 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2502 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2503 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2504 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2505 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2506 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2507 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2508 };
2509 \f
2510 static unsigned int
2511 rest_of_handle_insert_vzeroupper (void)
2512 {
2513 int i;
2514
2515 /* vzeroupper instructions are inserted immediately after reload to
2516 account for possible spills from 256bit registers. The pass
2517 reuses mode switching infrastructure by re-running mode insertion
2518 pass, so disable entities that have already been processed. */
2519 for (i = 0; i < MAX_386_ENTITIES; i++)
2520 ix86_optimize_mode_switching[i] = 0;
2521
2522 ix86_optimize_mode_switching[AVX_U128] = 1;
2523
2524 /* Call optimize_mode_switching. */
2525 g->get_passes ()->execute_pass_mode_switching ();
2526 return 0;
2527 }
2528
2529 namespace {
2530
2531 const pass_data pass_data_insert_vzeroupper =
2532 {
2533 RTL_PASS, /* type */
2534 "vzeroupper", /* name */
2535 OPTGROUP_NONE, /* optinfo_flags */
2536 TV_NONE, /* tv_id */
2537 0, /* properties_required */
2538 0, /* properties_provided */
2539 0, /* properties_destroyed */
2540 0, /* todo_flags_start */
2541 TODO_df_finish, /* todo_flags_finish */
2542 };
2543
2544 class pass_insert_vzeroupper : public rtl_opt_pass
2545 {
2546 public:
2547 pass_insert_vzeroupper(gcc::context *ctxt)
2548 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2549 {}
2550
2551 /* opt_pass methods: */
2552 virtual bool gate (function *)
2553 {
2554 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2555 }
2556
2557 virtual unsigned int execute (function *)
2558 {
2559 return rest_of_handle_insert_vzeroupper ();
2560 }
2561
2562 }; // class pass_insert_vzeroupper
2563
2564 } // anon namespace
2565
2566 rtl_opt_pass *
2567 make_pass_insert_vzeroupper (gcc::context *ctxt)
2568 {
2569 return new pass_insert_vzeroupper (ctxt);
2570 }
2571
2572 /* Return true if a red-zone is in use. */
2573
2574 static inline bool
2575 ix86_using_red_zone (void)
2576 {
2577 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2578 }
2579 \f
2580 /* Return a string that documents the current -m options. The caller is
2581 responsible for freeing the string. */
2582
2583 static char *
2584 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2585 const char *tune, enum fpmath_unit fpmath,
2586 bool add_nl_p)
2587 {
2588 struct ix86_target_opts
2589 {
2590 const char *option; /* option string */
2591 HOST_WIDE_INT mask; /* isa mask options */
2592 };
2593
2594 /* This table is ordered so that options like -msse4.2 that imply
2595 preceding options while match those first. */
2596 static struct ix86_target_opts isa_opts[] =
2597 {
2598 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2599 { "-mfma", OPTION_MASK_ISA_FMA },
2600 { "-mxop", OPTION_MASK_ISA_XOP },
2601 { "-mlwp", OPTION_MASK_ISA_LWP },
2602 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2603 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2604 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2605 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2606 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2607 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2608 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2609 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2610 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2611 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2612 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2613 { "-msse3", OPTION_MASK_ISA_SSE3 },
2614 { "-msse2", OPTION_MASK_ISA_SSE2 },
2615 { "-msse", OPTION_MASK_ISA_SSE },
2616 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2617 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2618 { "-mmmx", OPTION_MASK_ISA_MMX },
2619 { "-mabm", OPTION_MASK_ISA_ABM },
2620 { "-mbmi", OPTION_MASK_ISA_BMI },
2621 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2622 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2623 { "-mhle", OPTION_MASK_ISA_HLE },
2624 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2625 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2626 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2627 { "-madx", OPTION_MASK_ISA_ADX },
2628 { "-mtbm", OPTION_MASK_ISA_TBM },
2629 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2630 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2631 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2632 { "-maes", OPTION_MASK_ISA_AES },
2633 { "-msha", OPTION_MASK_ISA_SHA },
2634 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2635 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2636 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2637 { "-mf16c", OPTION_MASK_ISA_F16C },
2638 { "-mrtm", OPTION_MASK_ISA_RTM },
2639 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2640 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2641 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2642 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2643 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2644 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2645 };
2646
2647 /* Flag options. */
2648 static struct ix86_target_opts flag_opts[] =
2649 {
2650 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2651 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2652 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2653 { "-m80387", MASK_80387 },
2654 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2655 { "-malign-double", MASK_ALIGN_DOUBLE },
2656 { "-mcld", MASK_CLD },
2657 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2658 { "-mieee-fp", MASK_IEEE_FP },
2659 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2660 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2661 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2662 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2663 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2664 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2665 { "-mno-red-zone", MASK_NO_RED_ZONE },
2666 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2667 { "-mrecip", MASK_RECIP },
2668 { "-mrtd", MASK_RTD },
2669 { "-msseregparm", MASK_SSEREGPARM },
2670 { "-mstack-arg-probe", MASK_STACK_PROBE },
2671 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2672 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2673 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2674 { "-mvzeroupper", MASK_VZEROUPPER },
2675 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2676 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2677 { "-mprefer-avx128", MASK_PREFER_AVX128},
2678 };
2679
2680 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2681
2682 char isa_other[40];
2683 char target_other[40];
2684 unsigned num = 0;
2685 unsigned i, j;
2686 char *ret;
2687 char *ptr;
2688 size_t len;
2689 size_t line_len;
2690 size_t sep_len;
2691 const char *abi;
2692
2693 memset (opts, '\0', sizeof (opts));
2694
2695 /* Add -march= option. */
2696 if (arch)
2697 {
2698 opts[num][0] = "-march=";
2699 opts[num++][1] = arch;
2700 }
2701
2702 /* Add -mtune= option. */
2703 if (tune)
2704 {
2705 opts[num][0] = "-mtune=";
2706 opts[num++][1] = tune;
2707 }
2708
2709 /* Add -m32/-m64/-mx32. */
2710 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2711 {
2712 if ((isa & OPTION_MASK_ABI_64) != 0)
2713 abi = "-m64";
2714 else
2715 abi = "-mx32";
2716 isa &= ~ (OPTION_MASK_ISA_64BIT
2717 | OPTION_MASK_ABI_64
2718 | OPTION_MASK_ABI_X32);
2719 }
2720 else
2721 abi = "-m32";
2722 opts[num++][0] = abi;
2723
2724 /* Pick out the options in isa options. */
2725 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2726 {
2727 if ((isa & isa_opts[i].mask) != 0)
2728 {
2729 opts[num++][0] = isa_opts[i].option;
2730 isa &= ~ isa_opts[i].mask;
2731 }
2732 }
2733
2734 if (isa && add_nl_p)
2735 {
2736 opts[num++][0] = isa_other;
2737 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2738 isa);
2739 }
2740
2741 /* Add flag options. */
2742 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2743 {
2744 if ((flags & flag_opts[i].mask) != 0)
2745 {
2746 opts[num++][0] = flag_opts[i].option;
2747 flags &= ~ flag_opts[i].mask;
2748 }
2749 }
2750
2751 if (flags && add_nl_p)
2752 {
2753 opts[num++][0] = target_other;
2754 sprintf (target_other, "(other flags: %#x)", flags);
2755 }
2756
2757 /* Add -fpmath= option. */
2758 if (fpmath)
2759 {
2760 opts[num][0] = "-mfpmath=";
2761 switch ((int) fpmath)
2762 {
2763 case FPMATH_387:
2764 opts[num++][1] = "387";
2765 break;
2766
2767 case FPMATH_SSE:
2768 opts[num++][1] = "sse";
2769 break;
2770
2771 case FPMATH_387 | FPMATH_SSE:
2772 opts[num++][1] = "sse+387";
2773 break;
2774
2775 default:
2776 gcc_unreachable ();
2777 }
2778 }
2779
2780 /* Any options? */
2781 if (num == 0)
2782 return NULL;
2783
2784 gcc_assert (num < ARRAY_SIZE (opts));
2785
2786 /* Size the string. */
2787 len = 0;
2788 sep_len = (add_nl_p) ? 3 : 1;
2789 for (i = 0; i < num; i++)
2790 {
2791 len += sep_len;
2792 for (j = 0; j < 2; j++)
2793 if (opts[i][j])
2794 len += strlen (opts[i][j]);
2795 }
2796
2797 /* Build the string. */
2798 ret = ptr = (char *) xmalloc (len);
2799 line_len = 0;
2800
2801 for (i = 0; i < num; i++)
2802 {
2803 size_t len2[2];
2804
2805 for (j = 0; j < 2; j++)
2806 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2807
2808 if (i != 0)
2809 {
2810 *ptr++ = ' ';
2811 line_len++;
2812
2813 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2814 {
2815 *ptr++ = '\\';
2816 *ptr++ = '\n';
2817 line_len = 0;
2818 }
2819 }
2820
2821 for (j = 0; j < 2; j++)
2822 if (opts[i][j])
2823 {
2824 memcpy (ptr, opts[i][j], len2[j]);
2825 ptr += len2[j];
2826 line_len += len2[j];
2827 }
2828 }
2829
2830 *ptr = '\0';
2831 gcc_assert (ret + len >= ptr);
2832
2833 return ret;
2834 }
2835
2836 /* Return true, if profiling code should be emitted before
2837 prologue. Otherwise it returns false.
2838 Note: For x86 with "hotfix" it is sorried. */
2839 static bool
2840 ix86_profile_before_prologue (void)
2841 {
2842 return flag_fentry != 0;
2843 }
2844
2845 /* Function that is callable from the debugger to print the current
2846 options. */
2847 void ATTRIBUTE_UNUSED
2848 ix86_debug_options (void)
2849 {
2850 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2851 ix86_arch_string, ix86_tune_string,
2852 ix86_fpmath, true);
2853
2854 if (opts)
2855 {
2856 fprintf (stderr, "%s\n\n", opts);
2857 free (opts);
2858 }
2859 else
2860 fputs ("<no options>\n\n", stderr);
2861
2862 return;
2863 }
2864
2865 static const char *stringop_alg_names[] = {
2866 #define DEF_ENUM
2867 #define DEF_ALG(alg, name) #name,
2868 #include "stringop.def"
2869 #undef DEF_ENUM
2870 #undef DEF_ALG
2871 };
2872
2873 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2874 The string is of the following form (or comma separated list of it):
2875
2876 strategy_alg:max_size:[align|noalign]
2877
2878 where the full size range for the strategy is either [0, max_size] or
2879 [min_size, max_size], in which min_size is the max_size + 1 of the
2880 preceding range. The last size range must have max_size == -1.
2881
2882 Examples:
2883
2884 1.
2885 -mmemcpy-strategy=libcall:-1:noalign
2886
2887 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2888
2889
2890 2.
2891 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2892
2893 This is to tell the compiler to use the following strategy for memset
2894 1) when the expected size is between [1, 16], use rep_8byte strategy;
2895 2) when the size is between [17, 2048], use vector_loop;
2896 3) when the size is > 2048, use libcall. */
2897
2898 struct stringop_size_range
2899 {
2900 int max;
2901 stringop_alg alg;
2902 bool noalign;
2903 };
2904
2905 static void
2906 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2907 {
2908 const struct stringop_algs *default_algs;
2909 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2910 char *curr_range_str, *next_range_str;
2911 int i = 0, n = 0;
2912
2913 if (is_memset)
2914 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2915 else
2916 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2917
2918 curr_range_str = strategy_str;
2919
2920 do
2921 {
2922 int maxs;
2923 char alg_name[128];
2924 char align[16];
2925 next_range_str = strchr (curr_range_str, ',');
2926 if (next_range_str)
2927 *next_range_str++ = '\0';
2928
2929 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2930 alg_name, &maxs, align))
2931 {
2932 error ("wrong arg %s to option %s", curr_range_str,
2933 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2934 return;
2935 }
2936
2937 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2938 {
2939 error ("size ranges of option %s should be increasing",
2940 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2941 return;
2942 }
2943
2944 for (i = 0; i < last_alg; i++)
2945 if (!strcmp (alg_name, stringop_alg_names[i]))
2946 break;
2947
2948 if (i == last_alg)
2949 {
2950 error ("wrong stringop strategy name %s specified for option %s",
2951 alg_name,
2952 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2953 return;
2954 }
2955
2956 input_ranges[n].max = maxs;
2957 input_ranges[n].alg = (stringop_alg) i;
2958 if (!strcmp (align, "align"))
2959 input_ranges[n].noalign = false;
2960 else if (!strcmp (align, "noalign"))
2961 input_ranges[n].noalign = true;
2962 else
2963 {
2964 error ("unknown alignment %s specified for option %s",
2965 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2966 return;
2967 }
2968 n++;
2969 curr_range_str = next_range_str;
2970 }
2971 while (curr_range_str);
2972
2973 if (input_ranges[n - 1].max != -1)
2974 {
2975 error ("the max value for the last size range should be -1"
2976 " for option %s",
2977 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2978 return;
2979 }
2980
2981 if (n > MAX_STRINGOP_ALGS)
2982 {
2983 error ("too many size ranges specified in option %s",
2984 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2985 return;
2986 }
2987
2988 /* Now override the default algs array. */
2989 for (i = 0; i < n; i++)
2990 {
2991 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2992 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2993 = input_ranges[i].alg;
2994 *const_cast<int *>(&default_algs->size[i].noalign)
2995 = input_ranges[i].noalign;
2996 }
2997 }
2998
2999 \f
3000 /* parse -mtune-ctrl= option. When DUMP is true,
3001 print the features that are explicitly set. */
3002
3003 static void
3004 parse_mtune_ctrl_str (bool dump)
3005 {
3006 if (!ix86_tune_ctrl_string)
3007 return;
3008
3009 char *next_feature_string = NULL;
3010 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3011 char *orig = curr_feature_string;
3012 int i;
3013 do
3014 {
3015 bool clear = false;
3016
3017 next_feature_string = strchr (curr_feature_string, ',');
3018 if (next_feature_string)
3019 *next_feature_string++ = '\0';
3020 if (*curr_feature_string == '^')
3021 {
3022 curr_feature_string++;
3023 clear = true;
3024 }
3025 for (i = 0; i < X86_TUNE_LAST; i++)
3026 {
3027 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3028 {
3029 ix86_tune_features[i] = !clear;
3030 if (dump)
3031 fprintf (stderr, "Explicitly %s feature %s\n",
3032 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3033 break;
3034 }
3035 }
3036 if (i == X86_TUNE_LAST)
3037 error ("Unknown parameter to option -mtune-ctrl: %s",
3038 clear ? curr_feature_string - 1 : curr_feature_string);
3039 curr_feature_string = next_feature_string;
3040 }
3041 while (curr_feature_string);
3042 free (orig);
3043 }
3044
3045 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3046 processor type. */
3047
3048 static void
3049 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3050 {
3051 unsigned int ix86_tune_mask = 1u << ix86_tune;
3052 int i;
3053
3054 for (i = 0; i < X86_TUNE_LAST; ++i)
3055 {
3056 if (ix86_tune_no_default)
3057 ix86_tune_features[i] = 0;
3058 else
3059 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3060 }
3061
3062 if (dump)
3063 {
3064 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3065 for (i = 0; i < X86_TUNE_LAST; i++)
3066 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3067 ix86_tune_features[i] ? "on" : "off");
3068 }
3069
3070 parse_mtune_ctrl_str (dump);
3071 }
3072
3073
3074 /* Override various settings based on options. If MAIN_ARGS_P, the
3075 options are from the command line, otherwise they are from
3076 attributes. */
3077
3078 static void
3079 ix86_option_override_internal (bool main_args_p,
3080 struct gcc_options *opts,
3081 struct gcc_options *opts_set)
3082 {
3083 int i;
3084 unsigned int ix86_arch_mask;
3085 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3086 const char *prefix;
3087 const char *suffix;
3088 const char *sw;
3089
3090 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3091 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3092 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3093 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3094 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3095 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3096 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3097 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3098 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3099 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3100 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3101 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3102 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3103 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3104 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3105 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3106 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3107 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3108 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3109 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3110 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3111 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3112 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3113 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3114 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3115 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3116 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3117 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3118 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3119 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3120 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3121 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3122 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3123 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3124 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3125 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3126 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3127 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3128 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3129 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3130 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3131 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3132 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3133 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3134 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3135 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3136 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3137 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3138 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3139 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3140 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3141 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3142
3143 #define PTA_CORE2 \
3144 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3145 | PTA_CX16 | PTA_FXSR)
3146 #define PTA_NEHALEM \
3147 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3148 #define PTA_WESTMERE \
3149 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3150 #define PTA_SANDYBRIDGE \
3151 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3152 #define PTA_IVYBRIDGE \
3153 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3154 #define PTA_HASWELL \
3155 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3156 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3157 #define PTA_BROADWELL \
3158 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3159 #define PTA_BONNELL \
3160 (PTA_CORE2 | PTA_MOVBE)
3161 #define PTA_SILVERMONT \
3162 (PTA_WESTMERE | PTA_MOVBE)
3163
3164 /* if this reaches 64, need to widen struct pta flags below */
3165
3166 static struct pta
3167 {
3168 const char *const name; /* processor name or nickname. */
3169 const enum processor_type processor;
3170 const enum attr_cpu schedule;
3171 const unsigned HOST_WIDE_INT flags;
3172 }
3173 const processor_alias_table[] =
3174 {
3175 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3176 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3177 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3178 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3179 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3180 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3181 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3182 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3183 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3184 PTA_MMX | PTA_SSE | PTA_FXSR},
3185 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3186 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3187 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3188 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3189 PTA_MMX | PTA_SSE | PTA_FXSR},
3190 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3191 PTA_MMX | PTA_SSE | PTA_FXSR},
3192 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3193 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3194 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3195 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3196 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3197 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3198 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3199 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3200 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3201 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3202 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3203 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3204 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3205 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3206 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3207 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3208 PTA_SANDYBRIDGE},
3209 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3210 PTA_SANDYBRIDGE},
3211 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3212 PTA_IVYBRIDGE},
3213 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3214 PTA_IVYBRIDGE},
3215 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3216 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3217 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3218 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3219 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3220 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3221 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3222 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3223 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3225 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3226 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3227 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3228 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3229 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3230 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3231 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3232 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3233 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3234 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3235 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3236 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3237 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3238 {"x86-64", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3240 {"k8", PROCESSOR_K8, CPU_K8,
3241 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3242 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3243 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3244 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3245 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3246 {"opteron", PROCESSOR_K8, CPU_K8,
3247 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3248 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3249 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3250 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3251 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3252 {"athlon64", PROCESSOR_K8, CPU_K8,
3253 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3254 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3255 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3256 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3257 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3258 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3259 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3260 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3261 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3262 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3263 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3264 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3265 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3266 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3267 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3270 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3271 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3272 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3273 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3274 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3275 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3276 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3277 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3278 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3279 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3280 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3281 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3282 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3283 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3284 | PTA_XSAVEOPT | PTA_FSGSBASE},
3285 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3286 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3287 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3288 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3289 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3290 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3291 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3292 | PTA_MOVBE},
3293 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3294 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3295 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3296 | PTA_FXSR | PTA_XSAVE},
3297 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3298 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3299 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3300 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3301 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3302 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3303
3304 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3305 PTA_64BIT
3306 | PTA_HLE /* flags are only used for -march switch. */ },
3307 };
3308
3309 /* -mrecip options. */
3310 static struct
3311 {
3312 const char *string; /* option name */
3313 unsigned int mask; /* mask bits to set */
3314 }
3315 const recip_options[] =
3316 {
3317 { "all", RECIP_MASK_ALL },
3318 { "none", RECIP_MASK_NONE },
3319 { "div", RECIP_MASK_DIV },
3320 { "sqrt", RECIP_MASK_SQRT },
3321 { "vec-div", RECIP_MASK_VEC_DIV },
3322 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3323 };
3324
3325 int const pta_size = ARRAY_SIZE (processor_alias_table);
3326
3327 /* Set up prefix/suffix so the error messages refer to either the command
3328 line argument, or the attribute(target). */
3329 if (main_args_p)
3330 {
3331 prefix = "-m";
3332 suffix = "";
3333 sw = "switch";
3334 }
3335 else
3336 {
3337 prefix = "option(\"";
3338 suffix = "\")";
3339 sw = "attribute";
3340 }
3341
3342 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3343 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3344 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3345 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3346 #ifdef TARGET_BI_ARCH
3347 else
3348 {
3349 #if TARGET_BI_ARCH == 1
3350 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3351 is on and OPTION_MASK_ABI_X32 is off. We turn off
3352 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3353 -mx32. */
3354 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3355 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3356 #else
3357 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3358 on and OPTION_MASK_ABI_64 is off. We turn off
3359 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3360 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3361 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3362 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3363 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3364 #endif
3365 }
3366 #endif
3367
3368 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3369 {
3370 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3371 OPTION_MASK_ABI_64 for TARGET_X32. */
3372 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3373 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3374 }
3375 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3376 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3377 | OPTION_MASK_ABI_X32
3378 | OPTION_MASK_ABI_64);
3379 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3380 {
3381 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3382 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3383 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3384 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3385 }
3386
3387 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3388 SUBTARGET_OVERRIDE_OPTIONS;
3389 #endif
3390
3391 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3392 SUBSUBTARGET_OVERRIDE_OPTIONS;
3393 #endif
3394
3395 /* -fPIC is the default for x86_64. */
3396 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3397 opts->x_flag_pic = 2;
3398
3399 /* Need to check -mtune=generic first. */
3400 if (opts->x_ix86_tune_string)
3401 {
3402 /* As special support for cross compilers we read -mtune=native
3403 as -mtune=generic. With native compilers we won't see the
3404 -mtune=native, as it was changed by the driver. */
3405 if (!strcmp (opts->x_ix86_tune_string, "native"))
3406 {
3407 opts->x_ix86_tune_string = "generic";
3408 }
3409 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3410 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3411 "%stune=k8%s or %stune=generic%s instead as appropriate",
3412 prefix, suffix, prefix, suffix, prefix, suffix);
3413 }
3414 else
3415 {
3416 if (opts->x_ix86_arch_string)
3417 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3418 if (!opts->x_ix86_tune_string)
3419 {
3420 opts->x_ix86_tune_string
3421 = processor_target_table[TARGET_CPU_DEFAULT].name;
3422 ix86_tune_defaulted = 1;
3423 }
3424
3425 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3426 or defaulted. We need to use a sensible tune option. */
3427 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3428 {
3429 opts->x_ix86_tune_string = "generic";
3430 }
3431 }
3432
3433 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3434 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3435 {
3436 /* rep; movq isn't available in 32-bit code. */
3437 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3438 opts->x_ix86_stringop_alg = no_stringop;
3439 }
3440
3441 if (!opts->x_ix86_arch_string)
3442 opts->x_ix86_arch_string
3443 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3444 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3445 else
3446 ix86_arch_specified = 1;
3447
3448 if (opts_set->x_ix86_pmode)
3449 {
3450 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3451 && opts->x_ix86_pmode == PMODE_SI)
3452 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3453 && opts->x_ix86_pmode == PMODE_DI))
3454 error ("address mode %qs not supported in the %s bit mode",
3455 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3456 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3457 }
3458 else
3459 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3460 ? PMODE_DI : PMODE_SI;
3461
3462 if (!opts_set->x_ix86_abi)
3463 opts->x_ix86_abi = DEFAULT_ABI;
3464
3465 /* For targets using ms ABI enable ms-extensions, if not
3466 explicit turned off. For non-ms ABI we turn off this
3467 option. */
3468 if (!opts_set->x_flag_ms_extensions)
3469 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3470
3471 if (opts_set->x_ix86_cmodel)
3472 {
3473 switch (opts->x_ix86_cmodel)
3474 {
3475 case CM_SMALL:
3476 case CM_SMALL_PIC:
3477 if (opts->x_flag_pic)
3478 opts->x_ix86_cmodel = CM_SMALL_PIC;
3479 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3480 error ("code model %qs not supported in the %s bit mode",
3481 "small", "32");
3482 break;
3483
3484 case CM_MEDIUM:
3485 case CM_MEDIUM_PIC:
3486 if (opts->x_flag_pic)
3487 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3488 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3489 error ("code model %qs not supported in the %s bit mode",
3490 "medium", "32");
3491 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3492 error ("code model %qs not supported in x32 mode",
3493 "medium");
3494 break;
3495
3496 case CM_LARGE:
3497 case CM_LARGE_PIC:
3498 if (opts->x_flag_pic)
3499 opts->x_ix86_cmodel = CM_LARGE_PIC;
3500 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3501 error ("code model %qs not supported in the %s bit mode",
3502 "large", "32");
3503 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3504 error ("code model %qs not supported in x32 mode",
3505 "large");
3506 break;
3507
3508 case CM_32:
3509 if (opts->x_flag_pic)
3510 error ("code model %s does not support PIC mode", "32");
3511 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3512 error ("code model %qs not supported in the %s bit mode",
3513 "32", "64");
3514 break;
3515
3516 case CM_KERNEL:
3517 if (opts->x_flag_pic)
3518 {
3519 error ("code model %s does not support PIC mode", "kernel");
3520 opts->x_ix86_cmodel = CM_32;
3521 }
3522 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3523 error ("code model %qs not supported in the %s bit mode",
3524 "kernel", "32");
3525 break;
3526
3527 default:
3528 gcc_unreachable ();
3529 }
3530 }
3531 else
3532 {
3533 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3534 use of rip-relative addressing. This eliminates fixups that
3535 would otherwise be needed if this object is to be placed in a
3536 DLL, and is essentially just as efficient as direct addressing. */
3537 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3538 && (TARGET_RDOS || TARGET_PECOFF))
3539 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3540 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3541 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3542 else
3543 opts->x_ix86_cmodel = CM_32;
3544 }
3545 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3546 {
3547 error ("-masm=intel not supported in this configuration");
3548 opts->x_ix86_asm_dialect = ASM_ATT;
3549 }
3550 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3551 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3552 sorry ("%i-bit mode not compiled in",
3553 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3554
3555 for (i = 0; i < pta_size; i++)
3556 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3557 {
3558 ix86_schedule = processor_alias_table[i].schedule;
3559 ix86_arch = processor_alias_table[i].processor;
3560 /* Default cpu tuning to the architecture. */
3561 ix86_tune = ix86_arch;
3562
3563 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3564 && !(processor_alias_table[i].flags & PTA_64BIT))
3565 error ("CPU you selected does not support x86-64 "
3566 "instruction set");
3567
3568 if (processor_alias_table[i].flags & PTA_MMX
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3571 if (processor_alias_table[i].flags & PTA_3DNOW
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3574 if (processor_alias_table[i].flags & PTA_3DNOW_A
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3577 if (processor_alias_table[i].flags & PTA_SSE
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3580 if (processor_alias_table[i].flags & PTA_SSE2
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3583 if (processor_alias_table[i].flags & PTA_SSE3
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3586 if (processor_alias_table[i].flags & PTA_SSSE3
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3589 if (processor_alias_table[i].flags & PTA_SSE4_1
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3592 if (processor_alias_table[i].flags & PTA_SSE4_2
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3595 if (processor_alias_table[i].flags & PTA_AVX
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3598 if (processor_alias_table[i].flags & PTA_AVX2
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3601 if (processor_alias_table[i].flags & PTA_FMA
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3604 if (processor_alias_table[i].flags & PTA_SSE4A
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3607 if (processor_alias_table[i].flags & PTA_FMA4
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3610 if (processor_alias_table[i].flags & PTA_XOP
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3613 if (processor_alias_table[i].flags & PTA_LWP
3614 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3615 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3616 if (processor_alias_table[i].flags & PTA_ABM
3617 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3618 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3619 if (processor_alias_table[i].flags & PTA_BMI
3620 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3621 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3622 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3623 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3624 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3625 if (processor_alias_table[i].flags & PTA_TBM
3626 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3627 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3628 if (processor_alias_table[i].flags & PTA_BMI2
3629 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3630 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3631 if (processor_alias_table[i].flags & PTA_CX16
3632 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3633 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3634 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3635 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3636 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3637 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3638 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3641 if (processor_alias_table[i].flags & PTA_MOVBE
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3644 if (processor_alias_table[i].flags & PTA_AES
3645 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3646 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3647 if (processor_alias_table[i].flags & PTA_SHA
3648 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3649 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3650 if (processor_alias_table[i].flags & PTA_PCLMUL
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3653 if (processor_alias_table[i].flags & PTA_FSGSBASE
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3656 if (processor_alias_table[i].flags & PTA_RDRND
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3659 if (processor_alias_table[i].flags & PTA_F16C
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3662 if (processor_alias_table[i].flags & PTA_RTM
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3665 if (processor_alias_table[i].flags & PTA_HLE
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3668 if (processor_alias_table[i].flags & PTA_PRFCHW
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3671 if (processor_alias_table[i].flags & PTA_RDSEED
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3674 if (processor_alias_table[i].flags & PTA_ADX
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3677 if (processor_alias_table[i].flags & PTA_FXSR
3678 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3679 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3680 if (processor_alias_table[i].flags & PTA_XSAVE
3681 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3682 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3683 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3684 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3685 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3686 if (processor_alias_table[i].flags & PTA_AVX512F
3687 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3688 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3689 if (processor_alias_table[i].flags & PTA_AVX512ER
3690 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3691 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3692 if (processor_alias_table[i].flags & PTA_AVX512PF
3693 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3694 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3695 if (processor_alias_table[i].flags & PTA_AVX512CD
3696 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3697 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3698 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3699 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3700 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3701 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3702 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3703 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3704 if (processor_alias_table[i].flags & PTA_XSAVEC
3705 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3706 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3707 if (processor_alias_table[i].flags & PTA_XSAVES
3708 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3709 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3710 if (processor_alias_table[i].flags & PTA_AVX512DQ
3711 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3712 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3713 if (processor_alias_table[i].flags & PTA_AVX512BW
3714 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3715 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3716 if (processor_alias_table[i].flags & PTA_AVX512VL
3717 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3718 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3719 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3720 x86_prefetch_sse = true;
3721
3722 break;
3723 }
3724
3725 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3726 error ("generic CPU can be used only for %stune=%s %s",
3727 prefix, suffix, sw);
3728 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3729 error ("intel CPU can be used only for %stune=%s %s",
3730 prefix, suffix, sw);
3731 else if (i == pta_size)
3732 error ("bad value (%s) for %sarch=%s %s",
3733 opts->x_ix86_arch_string, prefix, suffix, sw);
3734
3735 ix86_arch_mask = 1u << ix86_arch;
3736 for (i = 0; i < X86_ARCH_LAST; ++i)
3737 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3738
3739 for (i = 0; i < pta_size; i++)
3740 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3741 {
3742 ix86_schedule = processor_alias_table[i].schedule;
3743 ix86_tune = processor_alias_table[i].processor;
3744 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3745 {
3746 if (!(processor_alias_table[i].flags & PTA_64BIT))
3747 {
3748 if (ix86_tune_defaulted)
3749 {
3750 opts->x_ix86_tune_string = "x86-64";
3751 for (i = 0; i < pta_size; i++)
3752 if (! strcmp (opts->x_ix86_tune_string,
3753 processor_alias_table[i].name))
3754 break;
3755 ix86_schedule = processor_alias_table[i].schedule;
3756 ix86_tune = processor_alias_table[i].processor;
3757 }
3758 else
3759 error ("CPU you selected does not support x86-64 "
3760 "instruction set");
3761 }
3762 }
3763 /* Intel CPUs have always interpreted SSE prefetch instructions as
3764 NOPs; so, we can enable SSE prefetch instructions even when
3765 -mtune (rather than -march) points us to a processor that has them.
3766 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3767 higher processors. */
3768 if (TARGET_CMOV
3769 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3770 x86_prefetch_sse = true;
3771 break;
3772 }
3773
3774 if (ix86_tune_specified && i == pta_size)
3775 error ("bad value (%s) for %stune=%s %s",
3776 opts->x_ix86_tune_string, prefix, suffix, sw);
3777
3778 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3779
3780 #ifndef USE_IX86_FRAME_POINTER
3781 #define USE_IX86_FRAME_POINTER 0
3782 #endif
3783
3784 #ifndef USE_X86_64_FRAME_POINTER
3785 #define USE_X86_64_FRAME_POINTER 0
3786 #endif
3787
3788 /* Set the default values for switches whose default depends on TARGET_64BIT
3789 in case they weren't overwritten by command line options. */
3790 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3791 {
3792 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3793 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3794 if (opts->x_flag_asynchronous_unwind_tables
3795 && !opts_set->x_flag_unwind_tables
3796 && TARGET_64BIT_MS_ABI)
3797 opts->x_flag_unwind_tables = 1;
3798 if (opts->x_flag_asynchronous_unwind_tables == 2)
3799 opts->x_flag_unwind_tables
3800 = opts->x_flag_asynchronous_unwind_tables = 1;
3801 if (opts->x_flag_pcc_struct_return == 2)
3802 opts->x_flag_pcc_struct_return = 0;
3803 }
3804 else
3805 {
3806 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3807 opts->x_flag_omit_frame_pointer
3808 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3809 if (opts->x_flag_asynchronous_unwind_tables == 2)
3810 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3811 if (opts->x_flag_pcc_struct_return == 2)
3812 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3813 }
3814
3815 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3816 if (opts->x_optimize_size)
3817 ix86_cost = &ix86_size_cost;
3818 else
3819 ix86_cost = ix86_tune_cost;
3820
3821 /* Arrange to set up i386_stack_locals for all functions. */
3822 init_machine_status = ix86_init_machine_status;
3823
3824 /* Validate -mregparm= value. */
3825 if (opts_set->x_ix86_regparm)
3826 {
3827 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3828 warning (0, "-mregparm is ignored in 64-bit mode");
3829 if (opts->x_ix86_regparm > REGPARM_MAX)
3830 {
3831 error ("-mregparm=%d is not between 0 and %d",
3832 opts->x_ix86_regparm, REGPARM_MAX);
3833 opts->x_ix86_regparm = 0;
3834 }
3835 }
3836 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3837 opts->x_ix86_regparm = REGPARM_MAX;
3838
3839 /* Default align_* from the processor table. */
3840 if (opts->x_align_loops == 0)
3841 {
3842 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3843 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3844 }
3845 if (opts->x_align_jumps == 0)
3846 {
3847 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3848 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3849 }
3850 if (opts->x_align_functions == 0)
3851 {
3852 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3853 }
3854
3855 /* Provide default for -mbranch-cost= value. */
3856 if (!opts_set->x_ix86_branch_cost)
3857 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3858
3859 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3860 {
3861 opts->x_target_flags
3862 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3863
3864 /* Enable by default the SSE and MMX builtins. Do allow the user to
3865 explicitly disable any of these. In particular, disabling SSE and
3866 MMX for kernel code is extremely useful. */
3867 if (!ix86_arch_specified)
3868 opts->x_ix86_isa_flags
3869 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3870 | TARGET_SUBTARGET64_ISA_DEFAULT)
3871 & ~opts->x_ix86_isa_flags_explicit);
3872
3873 if (TARGET_RTD_P (opts->x_target_flags))
3874 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3875 }
3876 else
3877 {
3878 opts->x_target_flags
3879 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3880
3881 if (!ix86_arch_specified)
3882 opts->x_ix86_isa_flags
3883 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3884
3885 /* i386 ABI does not specify red zone. It still makes sense to use it
3886 when programmer takes care to stack from being destroyed. */
3887 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3888 opts->x_target_flags |= MASK_NO_RED_ZONE;
3889 }
3890
3891 /* Keep nonleaf frame pointers. */
3892 if (opts->x_flag_omit_frame_pointer)
3893 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3894 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3895 opts->x_flag_omit_frame_pointer = 1;
3896
3897 /* If we're doing fast math, we don't care about comparison order
3898 wrt NaNs. This lets us use a shorter comparison sequence. */
3899 if (opts->x_flag_finite_math_only)
3900 opts->x_target_flags &= ~MASK_IEEE_FP;
3901
3902 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3903 since the insns won't need emulation. */
3904 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3905 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3906
3907 /* Likewise, if the target doesn't have a 387, or we've specified
3908 software floating point, don't use 387 inline intrinsics. */
3909 if (!TARGET_80387_P (opts->x_target_flags))
3910 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3911
3912 /* Turn on MMX builtins for -msse. */
3913 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3914 opts->x_ix86_isa_flags
3915 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3916
3917 /* Enable SSE prefetch. */
3918 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3919 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3920 x86_prefetch_sse = true;
3921
3922 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3923 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3924 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3925 opts->x_ix86_isa_flags
3926 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3927
3928 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3929 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3930 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3931 opts->x_ix86_isa_flags
3932 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3933
3934 /* Enable lzcnt instruction for -mabm. */
3935 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3936 opts->x_ix86_isa_flags
3937 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3938
3939 /* Validate -mpreferred-stack-boundary= value or default it to
3940 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3941 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3942 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3943 {
3944 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3945 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3946 int max = (TARGET_SEH ? 4 : 12);
3947
3948 if (opts->x_ix86_preferred_stack_boundary_arg < min
3949 || opts->x_ix86_preferred_stack_boundary_arg > max)
3950 {
3951 if (min == max)
3952 error ("-mpreferred-stack-boundary is not supported "
3953 "for this target");
3954 else
3955 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3956 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3957 }
3958 else
3959 ix86_preferred_stack_boundary
3960 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3961 }
3962
3963 /* Set the default value for -mstackrealign. */
3964 if (opts->x_ix86_force_align_arg_pointer == -1)
3965 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3966
3967 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3968
3969 /* Validate -mincoming-stack-boundary= value or default it to
3970 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3971 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3972 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3973 {
3974 if (opts->x_ix86_incoming_stack_boundary_arg
3975 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3976 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3977 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3978 opts->x_ix86_incoming_stack_boundary_arg,
3979 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3980 else
3981 {
3982 ix86_user_incoming_stack_boundary
3983 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3984 ix86_incoming_stack_boundary
3985 = ix86_user_incoming_stack_boundary;
3986 }
3987 }
3988
3989 #ifndef NO_PROFILE_COUNTERS
3990 if (flag_nop_mcount)
3991 error ("-mnop-mcount is not compatible with this target");
3992 #endif
3993 if (flag_nop_mcount && flag_pic)
3994 error ("-mnop-mcount is not implemented for -fPIC");
3995
3996 /* Accept -msseregparm only if at least SSE support is enabled. */
3997 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3998 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3999 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4000
4001 if (opts_set->x_ix86_fpmath)
4002 {
4003 if (opts->x_ix86_fpmath & FPMATH_SSE)
4004 {
4005 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4006 {
4007 warning (0, "SSE instruction set disabled, using 387 arithmetics");
4008 opts->x_ix86_fpmath = FPMATH_387;
4009 }
4010 else if ((opts->x_ix86_fpmath & FPMATH_387)
4011 && !TARGET_80387_P (opts->x_target_flags))
4012 {
4013 warning (0, "387 instruction set disabled, using SSE arithmetics");
4014 opts->x_ix86_fpmath = FPMATH_SSE;
4015 }
4016 }
4017 }
4018 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4019 fpmath=387. The second is however default at many targets since the
4020 extra 80bit precision of temporaries is considered to be part of ABI.
4021 Overwrite the default at least for -ffast-math.
4022 TODO: -mfpmath=both seems to produce same performing code with bit
4023 smaller binaries. It is however not clear if register allocation is
4024 ready for this setting.
4025 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4026 codegen. We may switch to 387 with -ffast-math for size optimized
4027 functions. */
4028 else if (fast_math_flags_set_p (&global_options)
4029 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4030 opts->x_ix86_fpmath = FPMATH_SSE;
4031 else
4032 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4033
4034 /* If the i387 is disabled, then do not return values in it. */
4035 if (!TARGET_80387_P (opts->x_target_flags))
4036 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4037
4038 /* Use external vectorized library in vectorizing intrinsics. */
4039 if (opts_set->x_ix86_veclibabi_type)
4040 switch (opts->x_ix86_veclibabi_type)
4041 {
4042 case ix86_veclibabi_type_svml:
4043 ix86_veclib_handler = ix86_veclibabi_svml;
4044 break;
4045
4046 case ix86_veclibabi_type_acml:
4047 ix86_veclib_handler = ix86_veclibabi_acml;
4048 break;
4049
4050 default:
4051 gcc_unreachable ();
4052 }
4053
4054 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4055 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4056 && !opts->x_optimize_size)
4057 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4058
4059 /* If stack probes are required, the space used for large function
4060 arguments on the stack must also be probed, so enable
4061 -maccumulate-outgoing-args so this happens in the prologue. */
4062 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4063 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4064 {
4065 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4066 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4067 "for correctness", prefix, suffix);
4068 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4069 }
4070
4071 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4072 {
4073 char *p;
4074 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4075 p = strchr (internal_label_prefix, 'X');
4076 internal_label_prefix_len = p - internal_label_prefix;
4077 *p = '\0';
4078 }
4079
4080 /* When scheduling description is not available, disable scheduler pass
4081 so it won't slow down the compilation and make x87 code slower. */
4082 if (!TARGET_SCHEDULE)
4083 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4084
4085 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4086 ix86_tune_cost->simultaneous_prefetches,
4087 opts->x_param_values,
4088 opts_set->x_param_values);
4089 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4090 ix86_tune_cost->prefetch_block,
4091 opts->x_param_values,
4092 opts_set->x_param_values);
4093 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4094 ix86_tune_cost->l1_cache_size,
4095 opts->x_param_values,
4096 opts_set->x_param_values);
4097 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4098 ix86_tune_cost->l2_cache_size,
4099 opts->x_param_values,
4100 opts_set->x_param_values);
4101
4102 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4103 if (opts->x_flag_prefetch_loop_arrays < 0
4104 && HAVE_prefetch
4105 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4106 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4107 opts->x_flag_prefetch_loop_arrays = 1;
4108
4109 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4110 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4111 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4112 targetm.expand_builtin_va_start = NULL;
4113
4114 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4115 {
4116 ix86_gen_leave = gen_leave_rex64;
4117 if (Pmode == DImode)
4118 {
4119 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4120 ix86_gen_tls_local_dynamic_base_64
4121 = gen_tls_local_dynamic_base_64_di;
4122 }
4123 else
4124 {
4125 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4126 ix86_gen_tls_local_dynamic_base_64
4127 = gen_tls_local_dynamic_base_64_si;
4128 }
4129 }
4130 else
4131 ix86_gen_leave = gen_leave;
4132
4133 if (Pmode == DImode)
4134 {
4135 ix86_gen_add3 = gen_adddi3;
4136 ix86_gen_sub3 = gen_subdi3;
4137 ix86_gen_sub3_carry = gen_subdi3_carry;
4138 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4139 ix86_gen_andsp = gen_anddi3;
4140 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4141 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4142 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4143 ix86_gen_monitor = gen_sse3_monitor_di;
4144 }
4145 else
4146 {
4147 ix86_gen_add3 = gen_addsi3;
4148 ix86_gen_sub3 = gen_subsi3;
4149 ix86_gen_sub3_carry = gen_subsi3_carry;
4150 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4151 ix86_gen_andsp = gen_andsi3;
4152 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4153 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4154 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4155 ix86_gen_monitor = gen_sse3_monitor_si;
4156 }
4157
4158 #ifdef USE_IX86_CLD
4159 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4160 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4161 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4162 #endif
4163
4164 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4165 {
4166 if (opts->x_flag_fentry > 0)
4167 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4168 "with -fpic");
4169 opts->x_flag_fentry = 0;
4170 }
4171 else if (TARGET_SEH)
4172 {
4173 if (opts->x_flag_fentry == 0)
4174 sorry ("-mno-fentry isn%'t compatible with SEH");
4175 opts->x_flag_fentry = 1;
4176 }
4177 else if (opts->x_flag_fentry < 0)
4178 {
4179 #if defined(PROFILE_BEFORE_PROLOGUE)
4180 opts->x_flag_fentry = 1;
4181 #else
4182 opts->x_flag_fentry = 0;
4183 #endif
4184 }
4185
4186 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4187 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4188 AVX unaligned load/store. */
4189 if (!opts->x_optimize_size)
4190 {
4191 if (flag_expensive_optimizations
4192 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4193 opts->x_target_flags |= MASK_VZEROUPPER;
4194 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4195 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4196 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4197 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4198 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4199 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4200 /* Enable 128-bit AVX instruction generation
4201 for the auto-vectorizer. */
4202 if (TARGET_AVX128_OPTIMAL
4203 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4204 opts->x_target_flags |= MASK_PREFER_AVX128;
4205 }
4206
4207 if (opts->x_ix86_recip_name)
4208 {
4209 char *p = ASTRDUP (opts->x_ix86_recip_name);
4210 char *q;
4211 unsigned int mask, i;
4212 bool invert;
4213
4214 while ((q = strtok (p, ",")) != NULL)
4215 {
4216 p = NULL;
4217 if (*q == '!')
4218 {
4219 invert = true;
4220 q++;
4221 }
4222 else
4223 invert = false;
4224
4225 if (!strcmp (q, "default"))
4226 mask = RECIP_MASK_ALL;
4227 else
4228 {
4229 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4230 if (!strcmp (q, recip_options[i].string))
4231 {
4232 mask = recip_options[i].mask;
4233 break;
4234 }
4235
4236 if (i == ARRAY_SIZE (recip_options))
4237 {
4238 error ("unknown option for -mrecip=%s", q);
4239 invert = false;
4240 mask = RECIP_MASK_NONE;
4241 }
4242 }
4243
4244 opts->x_recip_mask_explicit |= mask;
4245 if (invert)
4246 opts->x_recip_mask &= ~mask;
4247 else
4248 opts->x_recip_mask |= mask;
4249 }
4250 }
4251
4252 if (TARGET_RECIP_P (opts->x_target_flags))
4253 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4254 else if (opts_set->x_target_flags & MASK_RECIP)
4255 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4256
4257 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4258 for 64-bit Bionic. */
4259 if (TARGET_HAS_BIONIC
4260 && !(opts_set->x_target_flags
4261 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4262 opts->x_target_flags |= (TARGET_64BIT
4263 ? MASK_LONG_DOUBLE_128
4264 : MASK_LONG_DOUBLE_64);
4265
4266 /* Only one of them can be active. */
4267 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4268 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4269
4270 /* Save the initial options in case the user does function specific
4271 options. */
4272 if (main_args_p)
4273 target_option_default_node = target_option_current_node
4274 = build_target_option_node (opts);
4275
4276 /* Handle stack protector */
4277 if (!opts_set->x_ix86_stack_protector_guard)
4278 opts->x_ix86_stack_protector_guard
4279 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4280
4281 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4282 if (opts->x_ix86_tune_memcpy_strategy)
4283 {
4284 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4285 ix86_parse_stringop_strategy_string (str, false);
4286 free (str);
4287 }
4288
4289 if (opts->x_ix86_tune_memset_strategy)
4290 {
4291 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4292 ix86_parse_stringop_strategy_string (str, true);
4293 free (str);
4294 }
4295 }
4296
4297 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4298
4299 static void
4300 ix86_option_override (void)
4301 {
4302 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4303 static struct register_pass_info insert_vzeroupper_info
4304 = { pass_insert_vzeroupper, "reload",
4305 1, PASS_POS_INSERT_AFTER
4306 };
4307
4308 ix86_option_override_internal (true, &global_options, &global_options_set);
4309
4310
4311 /* This needs to be done at start up. It's convenient to do it here. */
4312 register_pass (&insert_vzeroupper_info);
4313 }
4314
4315 /* Update register usage after having seen the compiler flags. */
4316
4317 static void
4318 ix86_conditional_register_usage (void)
4319 {
4320 int i, c_mask;
4321 unsigned int j;
4322
4323 /* The PIC register, if it exists, is fixed. */
4324 j = PIC_OFFSET_TABLE_REGNUM;
4325 if (j != INVALID_REGNUM)
4326 fixed_regs[j] = call_used_regs[j] = 1;
4327
4328 /* For 32-bit targets, squash the REX registers. */
4329 if (! TARGET_64BIT)
4330 {
4331 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4332 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4333 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4334 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4335 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4336 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4337 }
4338
4339 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4340 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4341 : TARGET_64BIT ? (1 << 2)
4342 : (1 << 1));
4343
4344 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4345
4346 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4347 {
4348 /* Set/reset conditionally defined registers from
4349 CALL_USED_REGISTERS initializer. */
4350 if (call_used_regs[i] > 1)
4351 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4352
4353 /* Calculate registers of CLOBBERED_REGS register set
4354 as call used registers from GENERAL_REGS register set. */
4355 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4356 && call_used_regs[i])
4357 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4358 }
4359
4360 /* If MMX is disabled, squash the registers. */
4361 if (! TARGET_MMX)
4362 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4363 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4364 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4365
4366 /* If SSE is disabled, squash the registers. */
4367 if (! TARGET_SSE)
4368 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4369 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4370 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4371
4372 /* If the FPU is disabled, squash the registers. */
4373 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4374 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4375 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4376 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4377
4378 /* If AVX512F is disabled, squash the registers. */
4379 if (! TARGET_AVX512F)
4380 {
4381 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4382 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4383
4384 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4385 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4386 }
4387 }
4388
4389 \f
4390 /* Save the current options */
4391
4392 static void
4393 ix86_function_specific_save (struct cl_target_option *ptr,
4394 struct gcc_options *opts)
4395 {
4396 ptr->arch = ix86_arch;
4397 ptr->schedule = ix86_schedule;
4398 ptr->tune = ix86_tune;
4399 ptr->branch_cost = ix86_branch_cost;
4400 ptr->tune_defaulted = ix86_tune_defaulted;
4401 ptr->arch_specified = ix86_arch_specified;
4402 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4403 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4404 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4405 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4406 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4407 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4408 ptr->x_ix86_abi = opts->x_ix86_abi;
4409 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4410 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4411 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4412 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4413 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4414 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4415 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4416 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4417 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4418 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4419 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4420 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4421 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4422 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4423 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4424 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4425 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4426 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4427 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4428 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4429
4430 /* The fields are char but the variables are not; make sure the
4431 values fit in the fields. */
4432 gcc_assert (ptr->arch == ix86_arch);
4433 gcc_assert (ptr->schedule == ix86_schedule);
4434 gcc_assert (ptr->tune == ix86_tune);
4435 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4436 }
4437
4438 /* Restore the current options */
4439
4440 static void
4441 ix86_function_specific_restore (struct gcc_options *opts,
4442 struct cl_target_option *ptr)
4443 {
4444 enum processor_type old_tune = ix86_tune;
4445 enum processor_type old_arch = ix86_arch;
4446 unsigned int ix86_arch_mask;
4447 int i;
4448
4449 /* We don't change -fPIC. */
4450 opts->x_flag_pic = flag_pic;
4451
4452 ix86_arch = (enum processor_type) ptr->arch;
4453 ix86_schedule = (enum attr_cpu) ptr->schedule;
4454 ix86_tune = (enum processor_type) ptr->tune;
4455 opts->x_ix86_branch_cost = ptr->branch_cost;
4456 ix86_tune_defaulted = ptr->tune_defaulted;
4457 ix86_arch_specified = ptr->arch_specified;
4458 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4459 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4460 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4461 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4462 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4463 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4464 opts->x_ix86_abi = ptr->x_ix86_abi;
4465 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4466 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4467 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4468 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4469 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4470 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4471 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4472 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4473 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4474 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4475 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4476 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4477 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4478 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4479 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4480 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4481 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4482 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4483 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4484 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4485
4486 /* Recreate the arch feature tests if the arch changed */
4487 if (old_arch != ix86_arch)
4488 {
4489 ix86_arch_mask = 1u << ix86_arch;
4490 for (i = 0; i < X86_ARCH_LAST; ++i)
4491 ix86_arch_features[i]
4492 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4493 }
4494
4495 /* Recreate the tune optimization tests */
4496 if (old_tune != ix86_tune)
4497 set_ix86_tune_features (ix86_tune, false);
4498 }
4499
4500 /* Print the current options */
4501
4502 static void
4503 ix86_function_specific_print (FILE *file, int indent,
4504 struct cl_target_option *ptr)
4505 {
4506 char *target_string
4507 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4508 NULL, NULL, ptr->x_ix86_fpmath, false);
4509
4510 gcc_assert (ptr->arch < PROCESSOR_max);
4511 fprintf (file, "%*sarch = %d (%s)\n",
4512 indent, "",
4513 ptr->arch, processor_target_table[ptr->arch].name);
4514
4515 gcc_assert (ptr->tune < PROCESSOR_max);
4516 fprintf (file, "%*stune = %d (%s)\n",
4517 indent, "",
4518 ptr->tune, processor_target_table[ptr->tune].name);
4519
4520 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4521
4522 if (target_string)
4523 {
4524 fprintf (file, "%*s%s\n", indent, "", target_string);
4525 free (target_string);
4526 }
4527 }
4528
4529 \f
4530 /* Inner function to process the attribute((target(...))), take an argument and
4531 set the current options from the argument. If we have a list, recursively go
4532 over the list. */
4533
4534 static bool
4535 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4536 struct gcc_options *opts,
4537 struct gcc_options *opts_set,
4538 struct gcc_options *enum_opts_set)
4539 {
4540 char *next_optstr;
4541 bool ret = true;
4542
4543 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4544 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4545 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4546 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4547 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4548
4549 enum ix86_opt_type
4550 {
4551 ix86_opt_unknown,
4552 ix86_opt_yes,
4553 ix86_opt_no,
4554 ix86_opt_str,
4555 ix86_opt_enum,
4556 ix86_opt_isa
4557 };
4558
4559 static const struct
4560 {
4561 const char *string;
4562 size_t len;
4563 enum ix86_opt_type type;
4564 int opt;
4565 int mask;
4566 } attrs[] = {
4567 /* isa options */
4568 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4569 IX86_ATTR_ISA ("abm", OPT_mabm),
4570 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4571 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4572 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4573 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4574 IX86_ATTR_ISA ("aes", OPT_maes),
4575 IX86_ATTR_ISA ("sha", OPT_msha),
4576 IX86_ATTR_ISA ("avx", OPT_mavx),
4577 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4578 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4579 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4580 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4581 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4582 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4583 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4584 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4585 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4586 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4587 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4588 IX86_ATTR_ISA ("sse", OPT_msse),
4589 IX86_ATTR_ISA ("sse2", OPT_msse2),
4590 IX86_ATTR_ISA ("sse3", OPT_msse3),
4591 IX86_ATTR_ISA ("sse4", OPT_msse4),
4592 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4593 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4594 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4595 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4596 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4597 IX86_ATTR_ISA ("fma", OPT_mfma),
4598 IX86_ATTR_ISA ("xop", OPT_mxop),
4599 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4600 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4601 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4602 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4603 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4604 IX86_ATTR_ISA ("hle", OPT_mhle),
4605 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4606 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4607 IX86_ATTR_ISA ("adx", OPT_madx),
4608 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4609 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4610 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4611 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4612 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4613 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4614 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4615
4616 /* enum options */
4617 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4618
4619 /* string options */
4620 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4621 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4622
4623 /* flag options */
4624 IX86_ATTR_YES ("cld",
4625 OPT_mcld,
4626 MASK_CLD),
4627
4628 IX86_ATTR_NO ("fancy-math-387",
4629 OPT_mfancy_math_387,
4630 MASK_NO_FANCY_MATH_387),
4631
4632 IX86_ATTR_YES ("ieee-fp",
4633 OPT_mieee_fp,
4634 MASK_IEEE_FP),
4635
4636 IX86_ATTR_YES ("inline-all-stringops",
4637 OPT_minline_all_stringops,
4638 MASK_INLINE_ALL_STRINGOPS),
4639
4640 IX86_ATTR_YES ("inline-stringops-dynamically",
4641 OPT_minline_stringops_dynamically,
4642 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4643
4644 IX86_ATTR_NO ("align-stringops",
4645 OPT_mno_align_stringops,
4646 MASK_NO_ALIGN_STRINGOPS),
4647
4648 IX86_ATTR_YES ("recip",
4649 OPT_mrecip,
4650 MASK_RECIP),
4651
4652 };
4653
4654 /* If this is a list, recurse to get the options. */
4655 if (TREE_CODE (args) == TREE_LIST)
4656 {
4657 bool ret = true;
4658
4659 for (; args; args = TREE_CHAIN (args))
4660 if (TREE_VALUE (args)
4661 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4662 p_strings, opts, opts_set,
4663 enum_opts_set))
4664 ret = false;
4665
4666 return ret;
4667 }
4668
4669 else if (TREE_CODE (args) != STRING_CST)
4670 {
4671 error ("attribute %<target%> argument not a string");
4672 return false;
4673 }
4674
4675 /* Handle multiple arguments separated by commas. */
4676 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4677
4678 while (next_optstr && *next_optstr != '\0')
4679 {
4680 char *p = next_optstr;
4681 char *orig_p = p;
4682 char *comma = strchr (next_optstr, ',');
4683 const char *opt_string;
4684 size_t len, opt_len;
4685 int opt;
4686 bool opt_set_p;
4687 char ch;
4688 unsigned i;
4689 enum ix86_opt_type type = ix86_opt_unknown;
4690 int mask = 0;
4691
4692 if (comma)
4693 {
4694 *comma = '\0';
4695 len = comma - next_optstr;
4696 next_optstr = comma + 1;
4697 }
4698 else
4699 {
4700 len = strlen (p);
4701 next_optstr = NULL;
4702 }
4703
4704 /* Recognize no-xxx. */
4705 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4706 {
4707 opt_set_p = false;
4708 p += 3;
4709 len -= 3;
4710 }
4711 else
4712 opt_set_p = true;
4713
4714 /* Find the option. */
4715 ch = *p;
4716 opt = N_OPTS;
4717 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4718 {
4719 type = attrs[i].type;
4720 opt_len = attrs[i].len;
4721 if (ch == attrs[i].string[0]
4722 && ((type != ix86_opt_str && type != ix86_opt_enum)
4723 ? len == opt_len
4724 : len > opt_len)
4725 && memcmp (p, attrs[i].string, opt_len) == 0)
4726 {
4727 opt = attrs[i].opt;
4728 mask = attrs[i].mask;
4729 opt_string = attrs[i].string;
4730 break;
4731 }
4732 }
4733
4734 /* Process the option. */
4735 if (opt == N_OPTS)
4736 {
4737 error ("attribute(target(\"%s\")) is unknown", orig_p);
4738 ret = false;
4739 }
4740
4741 else if (type == ix86_opt_isa)
4742 {
4743 struct cl_decoded_option decoded;
4744
4745 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4746 ix86_handle_option (opts, opts_set,
4747 &decoded, input_location);
4748 }
4749
4750 else if (type == ix86_opt_yes || type == ix86_opt_no)
4751 {
4752 if (type == ix86_opt_no)
4753 opt_set_p = !opt_set_p;
4754
4755 if (opt_set_p)
4756 opts->x_target_flags |= mask;
4757 else
4758 opts->x_target_flags &= ~mask;
4759 }
4760
4761 else if (type == ix86_opt_str)
4762 {
4763 if (p_strings[opt])
4764 {
4765 error ("option(\"%s\") was already specified", opt_string);
4766 ret = false;
4767 }
4768 else
4769 p_strings[opt] = xstrdup (p + opt_len);
4770 }
4771
4772 else if (type == ix86_opt_enum)
4773 {
4774 bool arg_ok;
4775 int value;
4776
4777 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4778 if (arg_ok)
4779 set_option (opts, enum_opts_set, opt, value,
4780 p + opt_len, DK_UNSPECIFIED, input_location,
4781 global_dc);
4782 else
4783 {
4784 error ("attribute(target(\"%s\")) is unknown", orig_p);
4785 ret = false;
4786 }
4787 }
4788
4789 else
4790 gcc_unreachable ();
4791 }
4792
4793 return ret;
4794 }
4795
4796 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4797
4798 tree
4799 ix86_valid_target_attribute_tree (tree args,
4800 struct gcc_options *opts,
4801 struct gcc_options *opts_set)
4802 {
4803 const char *orig_arch_string = opts->x_ix86_arch_string;
4804 const char *orig_tune_string = opts->x_ix86_tune_string;
4805 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4806 int orig_tune_defaulted = ix86_tune_defaulted;
4807 int orig_arch_specified = ix86_arch_specified;
4808 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4809 tree t = NULL_TREE;
4810 int i;
4811 struct cl_target_option *def
4812 = TREE_TARGET_OPTION (target_option_default_node);
4813 struct gcc_options enum_opts_set;
4814
4815 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4816
4817 /* Process each of the options on the chain. */
4818 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4819 opts_set, &enum_opts_set))
4820 return error_mark_node;
4821
4822 /* If the changed options are different from the default, rerun
4823 ix86_option_override_internal, and then save the options away.
4824 The string options are are attribute options, and will be undone
4825 when we copy the save structure. */
4826 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4827 || opts->x_target_flags != def->x_target_flags
4828 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4829 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4830 || enum_opts_set.x_ix86_fpmath)
4831 {
4832 /* If we are using the default tune= or arch=, undo the string assigned,
4833 and use the default. */
4834 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4835 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4836 else if (!orig_arch_specified)
4837 opts->x_ix86_arch_string = NULL;
4838
4839 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4840 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4841 else if (orig_tune_defaulted)
4842 opts->x_ix86_tune_string = NULL;
4843
4844 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4845 if (enum_opts_set.x_ix86_fpmath)
4846 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4847 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4848 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4849 {
4850 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4851 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4852 }
4853
4854 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4855 ix86_option_override_internal (false, opts, opts_set);
4856
4857 /* Add any builtin functions with the new isa if any. */
4858 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4859
4860 /* Save the current options unless we are validating options for
4861 #pragma. */
4862 t = build_target_option_node (opts);
4863
4864 opts->x_ix86_arch_string = orig_arch_string;
4865 opts->x_ix86_tune_string = orig_tune_string;
4866 opts_set->x_ix86_fpmath = orig_fpmath_set;
4867
4868 /* Free up memory allocated to hold the strings */
4869 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4870 free (option_strings[i]);
4871 }
4872
4873 return t;
4874 }
4875
4876 /* Hook to validate attribute((target("string"))). */
4877
4878 static bool
4879 ix86_valid_target_attribute_p (tree fndecl,
4880 tree ARG_UNUSED (name),
4881 tree args,
4882 int ARG_UNUSED (flags))
4883 {
4884 struct gcc_options func_options;
4885 tree new_target, new_optimize;
4886 bool ret = true;
4887
4888 /* attribute((target("default"))) does nothing, beyond
4889 affecting multi-versioning. */
4890 if (TREE_VALUE (args)
4891 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4892 && TREE_CHAIN (args) == NULL_TREE
4893 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4894 return true;
4895
4896 tree old_optimize = build_optimization_node (&global_options);
4897
4898 /* Get the optimization options of the current function. */
4899 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4900
4901 if (!func_optimize)
4902 func_optimize = old_optimize;
4903
4904 /* Init func_options. */
4905 memset (&func_options, 0, sizeof (func_options));
4906 init_options_struct (&func_options, NULL);
4907 lang_hooks.init_options_struct (&func_options);
4908
4909 cl_optimization_restore (&func_options,
4910 TREE_OPTIMIZATION (func_optimize));
4911
4912 /* Initialize func_options to the default before its target options can
4913 be set. */
4914 cl_target_option_restore (&func_options,
4915 TREE_TARGET_OPTION (target_option_default_node));
4916
4917 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4918 &global_options_set);
4919
4920 new_optimize = build_optimization_node (&func_options);
4921
4922 if (new_target == error_mark_node)
4923 ret = false;
4924
4925 else if (fndecl && new_target)
4926 {
4927 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4928
4929 if (old_optimize != new_optimize)
4930 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4931 }
4932
4933 return ret;
4934 }
4935
4936 \f
4937 /* Hook to determine if one function can safely inline another. */
4938
4939 static bool
4940 ix86_can_inline_p (tree caller, tree callee)
4941 {
4942 bool ret = false;
4943 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4944 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4945
4946 /* If callee has no option attributes, then it is ok to inline. */
4947 if (!callee_tree)
4948 ret = true;
4949
4950 /* If caller has no option attributes, but callee does then it is not ok to
4951 inline. */
4952 else if (!caller_tree)
4953 ret = false;
4954
4955 else
4956 {
4957 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4958 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4959
4960 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4961 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4962 function. */
4963 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4964 != callee_opts->x_ix86_isa_flags)
4965 ret = false;
4966
4967 /* See if we have the same non-isa options. */
4968 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4969 ret = false;
4970
4971 /* See if arch, tune, etc. are the same. */
4972 else if (caller_opts->arch != callee_opts->arch)
4973 ret = false;
4974
4975 else if (caller_opts->tune != callee_opts->tune)
4976 ret = false;
4977
4978 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4979 ret = false;
4980
4981 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4982 ret = false;
4983
4984 else
4985 ret = true;
4986 }
4987
4988 return ret;
4989 }
4990
4991 \f
4992 /* Remember the last target of ix86_set_current_function. */
4993 static GTY(()) tree ix86_previous_fndecl;
4994
4995 /* Invalidate ix86_previous_fndecl cache. */
4996 void
4997 ix86_reset_previous_fndecl (void)
4998 {
4999 ix86_previous_fndecl = NULL_TREE;
5000 }
5001
5002 /* Establish appropriate back-end context for processing the function
5003 FNDECL. The argument might be NULL to indicate processing at top
5004 level, outside of any function scope. */
5005 static void
5006 ix86_set_current_function (tree fndecl)
5007 {
5008 /* Only change the context if the function changes. This hook is called
5009 several times in the course of compiling a function, and we don't want to
5010 slow things down too much or call target_reinit when it isn't safe. */
5011 if (fndecl && fndecl != ix86_previous_fndecl)
5012 {
5013 tree old_tree = (ix86_previous_fndecl
5014 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5015 : NULL_TREE);
5016
5017 tree new_tree = (fndecl
5018 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5019 : NULL_TREE);
5020
5021 ix86_previous_fndecl = fndecl;
5022 if (old_tree == new_tree)
5023 ;
5024
5025 else if (new_tree)
5026 {
5027 cl_target_option_restore (&global_options,
5028 TREE_TARGET_OPTION (new_tree));
5029 if (TREE_TARGET_GLOBALS (new_tree))
5030 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5031 else
5032 TREE_TARGET_GLOBALS (new_tree)
5033 = save_target_globals_default_opts ();
5034 }
5035
5036 else if (old_tree)
5037 {
5038 new_tree = target_option_current_node;
5039 cl_target_option_restore (&global_options,
5040 TREE_TARGET_OPTION (new_tree));
5041 if (TREE_TARGET_GLOBALS (new_tree))
5042 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5043 else if (new_tree == target_option_default_node)
5044 restore_target_globals (&default_target_globals);
5045 else
5046 TREE_TARGET_GLOBALS (new_tree)
5047 = save_target_globals_default_opts ();
5048 }
5049 }
5050 }
5051
5052 \f
5053 /* Return true if this goes in large data/bss. */
5054
5055 static bool
5056 ix86_in_large_data_p (tree exp)
5057 {
5058 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5059 return false;
5060
5061 /* Functions are never large data. */
5062 if (TREE_CODE (exp) == FUNCTION_DECL)
5063 return false;
5064
5065 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5066 {
5067 const char *section = DECL_SECTION_NAME (exp);
5068 if (strcmp (section, ".ldata") == 0
5069 || strcmp (section, ".lbss") == 0)
5070 return true;
5071 return false;
5072 }
5073 else
5074 {
5075 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5076
5077 /* If this is an incomplete type with size 0, then we can't put it
5078 in data because it might be too big when completed. Also,
5079 int_size_in_bytes returns -1 if size can vary or is larger than
5080 an integer in which case also it is safer to assume that it goes in
5081 large data. */
5082 if (size <= 0 || size > ix86_section_threshold)
5083 return true;
5084 }
5085
5086 return false;
5087 }
5088
5089 /* Switch to the appropriate section for output of DECL.
5090 DECL is either a `VAR_DECL' node or a constant of some sort.
5091 RELOC indicates whether forming the initial value of DECL requires
5092 link-time relocations. */
5093
5094 ATTRIBUTE_UNUSED static section *
5095 x86_64_elf_select_section (tree decl, int reloc,
5096 unsigned HOST_WIDE_INT align)
5097 {
5098 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5099 && ix86_in_large_data_p (decl))
5100 {
5101 const char *sname = NULL;
5102 unsigned int flags = SECTION_WRITE;
5103 switch (categorize_decl_for_section (decl, reloc))
5104 {
5105 case SECCAT_DATA:
5106 sname = ".ldata";
5107 break;
5108 case SECCAT_DATA_REL:
5109 sname = ".ldata.rel";
5110 break;
5111 case SECCAT_DATA_REL_LOCAL:
5112 sname = ".ldata.rel.local";
5113 break;
5114 case SECCAT_DATA_REL_RO:
5115 sname = ".ldata.rel.ro";
5116 break;
5117 case SECCAT_DATA_REL_RO_LOCAL:
5118 sname = ".ldata.rel.ro.local";
5119 break;
5120 case SECCAT_BSS:
5121 sname = ".lbss";
5122 flags |= SECTION_BSS;
5123 break;
5124 case SECCAT_RODATA:
5125 case SECCAT_RODATA_MERGE_STR:
5126 case SECCAT_RODATA_MERGE_STR_INIT:
5127 case SECCAT_RODATA_MERGE_CONST:
5128 sname = ".lrodata";
5129 flags = 0;
5130 break;
5131 case SECCAT_SRODATA:
5132 case SECCAT_SDATA:
5133 case SECCAT_SBSS:
5134 gcc_unreachable ();
5135 case SECCAT_TEXT:
5136 case SECCAT_TDATA:
5137 case SECCAT_TBSS:
5138 /* We don't split these for medium model. Place them into
5139 default sections and hope for best. */
5140 break;
5141 }
5142 if (sname)
5143 {
5144 /* We might get called with string constants, but get_named_section
5145 doesn't like them as they are not DECLs. Also, we need to set
5146 flags in that case. */
5147 if (!DECL_P (decl))
5148 return get_section (sname, flags, NULL);
5149 return get_named_section (decl, sname, reloc);
5150 }
5151 }
5152 return default_elf_select_section (decl, reloc, align);
5153 }
5154
5155 /* Select a set of attributes for section NAME based on the properties
5156 of DECL and whether or not RELOC indicates that DECL's initializer
5157 might contain runtime relocations. */
5158
5159 static unsigned int ATTRIBUTE_UNUSED
5160 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5161 {
5162 unsigned int flags = default_section_type_flags (decl, name, reloc);
5163
5164 if (decl == NULL_TREE
5165 && (strcmp (name, ".ldata.rel.ro") == 0
5166 || strcmp (name, ".ldata.rel.ro.local") == 0))
5167 flags |= SECTION_RELRO;
5168
5169 if (strcmp (name, ".lbss") == 0
5170 || strncmp (name, ".lbss.", 5) == 0
5171 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5172 flags |= SECTION_BSS;
5173
5174 return flags;
5175 }
5176
5177 /* Build up a unique section name, expressed as a
5178 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5179 RELOC indicates whether the initial value of EXP requires
5180 link-time relocations. */
5181
5182 static void ATTRIBUTE_UNUSED
5183 x86_64_elf_unique_section (tree decl, int reloc)
5184 {
5185 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5186 && ix86_in_large_data_p (decl))
5187 {
5188 const char *prefix = NULL;
5189 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5190 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5191
5192 switch (categorize_decl_for_section (decl, reloc))
5193 {
5194 case SECCAT_DATA:
5195 case SECCAT_DATA_REL:
5196 case SECCAT_DATA_REL_LOCAL:
5197 case SECCAT_DATA_REL_RO:
5198 case SECCAT_DATA_REL_RO_LOCAL:
5199 prefix = one_only ? ".ld" : ".ldata";
5200 break;
5201 case SECCAT_BSS:
5202 prefix = one_only ? ".lb" : ".lbss";
5203 break;
5204 case SECCAT_RODATA:
5205 case SECCAT_RODATA_MERGE_STR:
5206 case SECCAT_RODATA_MERGE_STR_INIT:
5207 case SECCAT_RODATA_MERGE_CONST:
5208 prefix = one_only ? ".lr" : ".lrodata";
5209 break;
5210 case SECCAT_SRODATA:
5211 case SECCAT_SDATA:
5212 case SECCAT_SBSS:
5213 gcc_unreachable ();
5214 case SECCAT_TEXT:
5215 case SECCAT_TDATA:
5216 case SECCAT_TBSS:
5217 /* We don't split these for medium model. Place them into
5218 default sections and hope for best. */
5219 break;
5220 }
5221 if (prefix)
5222 {
5223 const char *name, *linkonce;
5224 char *string;
5225
5226 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5227 name = targetm.strip_name_encoding (name);
5228
5229 /* If we're using one_only, then there needs to be a .gnu.linkonce
5230 prefix to the section name. */
5231 linkonce = one_only ? ".gnu.linkonce" : "";
5232
5233 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5234
5235 set_decl_section_name (decl, string);
5236 return;
5237 }
5238 }
5239 default_unique_section (decl, reloc);
5240 }
5241
5242 #ifdef COMMON_ASM_OP
5243 /* This says how to output assembler code to declare an
5244 uninitialized external linkage data object.
5245
5246 For medium model x86-64 we need to use .largecomm opcode for
5247 large objects. */
5248 void
5249 x86_elf_aligned_common (FILE *file,
5250 const char *name, unsigned HOST_WIDE_INT size,
5251 int align)
5252 {
5253 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5254 && size > (unsigned int)ix86_section_threshold)
5255 fputs (".largecomm\t", file);
5256 else
5257 fputs (COMMON_ASM_OP, file);
5258 assemble_name (file, name);
5259 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5260 size, align / BITS_PER_UNIT);
5261 }
5262 #endif
5263
5264 /* Utility function for targets to use in implementing
5265 ASM_OUTPUT_ALIGNED_BSS. */
5266
5267 void
5268 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5269 unsigned HOST_WIDE_INT size, int align)
5270 {
5271 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5272 && size > (unsigned int)ix86_section_threshold)
5273 switch_to_section (get_named_section (decl, ".lbss", 0));
5274 else
5275 switch_to_section (bss_section);
5276 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5277 #ifdef ASM_DECLARE_OBJECT_NAME
5278 last_assemble_variable_decl = decl;
5279 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5280 #else
5281 /* Standard thing is just output label for the object. */
5282 ASM_OUTPUT_LABEL (file, name);
5283 #endif /* ASM_DECLARE_OBJECT_NAME */
5284 ASM_OUTPUT_SKIP (file, size ? size : 1);
5285 }
5286 \f
5287 /* Decide whether we must probe the stack before any space allocation
5288 on this target. It's essentially TARGET_STACK_PROBE except when
5289 -fstack-check causes the stack to be already probed differently. */
5290
5291 bool
5292 ix86_target_stack_probe (void)
5293 {
5294 /* Do not probe the stack twice if static stack checking is enabled. */
5295 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5296 return false;
5297
5298 return TARGET_STACK_PROBE;
5299 }
5300 \f
5301 /* Decide whether we can make a sibling call to a function. DECL is the
5302 declaration of the function being targeted by the call and EXP is the
5303 CALL_EXPR representing the call. */
5304
5305 static bool
5306 ix86_function_ok_for_sibcall (tree decl, tree exp)
5307 {
5308 tree type, decl_or_type;
5309 rtx a, b;
5310
5311 /* If we are generating position-independent code, we cannot sibcall
5312 optimize any indirect call, or a direct call to a global function,
5313 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5314 if (!TARGET_MACHO
5315 && !TARGET_64BIT
5316 && flag_pic
5317 && (!decl || !targetm.binds_local_p (decl)))
5318 return false;
5319
5320 /* If we need to align the outgoing stack, then sibcalling would
5321 unalign the stack, which may break the called function. */
5322 if (ix86_minimum_incoming_stack_boundary (true)
5323 < PREFERRED_STACK_BOUNDARY)
5324 return false;
5325
5326 if (decl)
5327 {
5328 decl_or_type = decl;
5329 type = TREE_TYPE (decl);
5330 }
5331 else
5332 {
5333 /* We're looking at the CALL_EXPR, we need the type of the function. */
5334 type = CALL_EXPR_FN (exp); /* pointer expression */
5335 type = TREE_TYPE (type); /* pointer type */
5336 type = TREE_TYPE (type); /* function type */
5337 decl_or_type = type;
5338 }
5339
5340 /* Check that the return value locations are the same. Like
5341 if we are returning floats on the 80387 register stack, we cannot
5342 make a sibcall from a function that doesn't return a float to a
5343 function that does or, conversely, from a function that does return
5344 a float to a function that doesn't; the necessary stack adjustment
5345 would not be executed. This is also the place we notice
5346 differences in the return value ABI. Note that it is ok for one
5347 of the functions to have void return type as long as the return
5348 value of the other is passed in a register. */
5349 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5350 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5351 cfun->decl, false);
5352 if (STACK_REG_P (a) || STACK_REG_P (b))
5353 {
5354 if (!rtx_equal_p (a, b))
5355 return false;
5356 }
5357 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5358 ;
5359 else if (!rtx_equal_p (a, b))
5360 return false;
5361
5362 if (TARGET_64BIT)
5363 {
5364 /* The SYSV ABI has more call-clobbered registers;
5365 disallow sibcalls from MS to SYSV. */
5366 if (cfun->machine->call_abi == MS_ABI
5367 && ix86_function_type_abi (type) == SYSV_ABI)
5368 return false;
5369 }
5370 else
5371 {
5372 /* If this call is indirect, we'll need to be able to use a
5373 call-clobbered register for the address of the target function.
5374 Make sure that all such registers are not used for passing
5375 parameters. Note that DLLIMPORT functions are indirect. */
5376 if (!decl
5377 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5378 {
5379 if (ix86_function_regparm (type, NULL) >= 3)
5380 {
5381 /* ??? Need to count the actual number of registers to be used,
5382 not the possible number of registers. Fix later. */
5383 return false;
5384 }
5385 }
5386 }
5387
5388 /* Otherwise okay. That also includes certain types of indirect calls. */
5389 return true;
5390 }
5391
5392 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5393 and "sseregparm" calling convention attributes;
5394 arguments as in struct attribute_spec.handler. */
5395
5396 static tree
5397 ix86_handle_cconv_attribute (tree *node, tree name,
5398 tree args,
5399 int,
5400 bool *no_add_attrs)
5401 {
5402 if (TREE_CODE (*node) != FUNCTION_TYPE
5403 && TREE_CODE (*node) != METHOD_TYPE
5404 && TREE_CODE (*node) != FIELD_DECL
5405 && TREE_CODE (*node) != TYPE_DECL)
5406 {
5407 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5408 name);
5409 *no_add_attrs = true;
5410 return NULL_TREE;
5411 }
5412
5413 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5414 if (is_attribute_p ("regparm", name))
5415 {
5416 tree cst;
5417
5418 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5419 {
5420 error ("fastcall and regparm attributes are not compatible");
5421 }
5422
5423 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5424 {
5425 error ("regparam and thiscall attributes are not compatible");
5426 }
5427
5428 cst = TREE_VALUE (args);
5429 if (TREE_CODE (cst) != INTEGER_CST)
5430 {
5431 warning (OPT_Wattributes,
5432 "%qE attribute requires an integer constant argument",
5433 name);
5434 *no_add_attrs = true;
5435 }
5436 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5437 {
5438 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5439 name, REGPARM_MAX);
5440 *no_add_attrs = true;
5441 }
5442
5443 return NULL_TREE;
5444 }
5445
5446 if (TARGET_64BIT)
5447 {
5448 /* Do not warn when emulating the MS ABI. */
5449 if ((TREE_CODE (*node) != FUNCTION_TYPE
5450 && TREE_CODE (*node) != METHOD_TYPE)
5451 || ix86_function_type_abi (*node) != MS_ABI)
5452 warning (OPT_Wattributes, "%qE attribute ignored",
5453 name);
5454 *no_add_attrs = true;
5455 return NULL_TREE;
5456 }
5457
5458 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5459 if (is_attribute_p ("fastcall", name))
5460 {
5461 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5462 {
5463 error ("fastcall and cdecl attributes are not compatible");
5464 }
5465 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5466 {
5467 error ("fastcall and stdcall attributes are not compatible");
5468 }
5469 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5470 {
5471 error ("fastcall and regparm attributes are not compatible");
5472 }
5473 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5474 {
5475 error ("fastcall and thiscall attributes are not compatible");
5476 }
5477 }
5478
5479 /* Can combine stdcall with fastcall (redundant), regparm and
5480 sseregparm. */
5481 else if (is_attribute_p ("stdcall", name))
5482 {
5483 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5484 {
5485 error ("stdcall and cdecl attributes are not compatible");
5486 }
5487 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5488 {
5489 error ("stdcall and fastcall attributes are not compatible");
5490 }
5491 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5492 {
5493 error ("stdcall and thiscall attributes are not compatible");
5494 }
5495 }
5496
5497 /* Can combine cdecl with regparm and sseregparm. */
5498 else if (is_attribute_p ("cdecl", name))
5499 {
5500 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5501 {
5502 error ("stdcall and cdecl attributes are not compatible");
5503 }
5504 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5505 {
5506 error ("fastcall and cdecl attributes are not compatible");
5507 }
5508 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5509 {
5510 error ("cdecl and thiscall attributes are not compatible");
5511 }
5512 }
5513 else if (is_attribute_p ("thiscall", name))
5514 {
5515 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5516 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5517 name);
5518 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5519 {
5520 error ("stdcall and thiscall attributes are not compatible");
5521 }
5522 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5523 {
5524 error ("fastcall and thiscall attributes are not compatible");
5525 }
5526 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5527 {
5528 error ("cdecl and thiscall attributes are not compatible");
5529 }
5530 }
5531
5532 /* Can combine sseregparm with all attributes. */
5533
5534 return NULL_TREE;
5535 }
5536
5537 /* The transactional memory builtins are implicitly regparm or fastcall
5538 depending on the ABI. Override the generic do-nothing attribute that
5539 these builtins were declared with, and replace it with one of the two
5540 attributes that we expect elsewhere. */
5541
5542 static tree
5543 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5544 int flags, bool *no_add_attrs)
5545 {
5546 tree alt;
5547
5548 /* In no case do we want to add the placeholder attribute. */
5549 *no_add_attrs = true;
5550
5551 /* The 64-bit ABI is unchanged for transactional memory. */
5552 if (TARGET_64BIT)
5553 return NULL_TREE;
5554
5555 /* ??? Is there a better way to validate 32-bit windows? We have
5556 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5557 if (CHECK_STACK_LIMIT > 0)
5558 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5559 else
5560 {
5561 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5562 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5563 }
5564 decl_attributes (node, alt, flags);
5565
5566 return NULL_TREE;
5567 }
5568
5569 /* This function determines from TYPE the calling-convention. */
5570
5571 unsigned int
5572 ix86_get_callcvt (const_tree type)
5573 {
5574 unsigned int ret = 0;
5575 bool is_stdarg;
5576 tree attrs;
5577
5578 if (TARGET_64BIT)
5579 return IX86_CALLCVT_CDECL;
5580
5581 attrs = TYPE_ATTRIBUTES (type);
5582 if (attrs != NULL_TREE)
5583 {
5584 if (lookup_attribute ("cdecl", attrs))
5585 ret |= IX86_CALLCVT_CDECL;
5586 else if (lookup_attribute ("stdcall", attrs))
5587 ret |= IX86_CALLCVT_STDCALL;
5588 else if (lookup_attribute ("fastcall", attrs))
5589 ret |= IX86_CALLCVT_FASTCALL;
5590 else if (lookup_attribute ("thiscall", attrs))
5591 ret |= IX86_CALLCVT_THISCALL;
5592
5593 /* Regparam isn't allowed for thiscall and fastcall. */
5594 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5595 {
5596 if (lookup_attribute ("regparm", attrs))
5597 ret |= IX86_CALLCVT_REGPARM;
5598 if (lookup_attribute ("sseregparm", attrs))
5599 ret |= IX86_CALLCVT_SSEREGPARM;
5600 }
5601
5602 if (IX86_BASE_CALLCVT(ret) != 0)
5603 return ret;
5604 }
5605
5606 is_stdarg = stdarg_p (type);
5607 if (TARGET_RTD && !is_stdarg)
5608 return IX86_CALLCVT_STDCALL | ret;
5609
5610 if (ret != 0
5611 || is_stdarg
5612 || TREE_CODE (type) != METHOD_TYPE
5613 || ix86_function_type_abi (type) != MS_ABI)
5614 return IX86_CALLCVT_CDECL | ret;
5615
5616 return IX86_CALLCVT_THISCALL;
5617 }
5618
5619 /* Return 0 if the attributes for two types are incompatible, 1 if they
5620 are compatible, and 2 if they are nearly compatible (which causes a
5621 warning to be generated). */
5622
5623 static int
5624 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5625 {
5626 unsigned int ccvt1, ccvt2;
5627
5628 if (TREE_CODE (type1) != FUNCTION_TYPE
5629 && TREE_CODE (type1) != METHOD_TYPE)
5630 return 1;
5631
5632 ccvt1 = ix86_get_callcvt (type1);
5633 ccvt2 = ix86_get_callcvt (type2);
5634 if (ccvt1 != ccvt2)
5635 return 0;
5636 if (ix86_function_regparm (type1, NULL)
5637 != ix86_function_regparm (type2, NULL))
5638 return 0;
5639
5640 return 1;
5641 }
5642 \f
5643 /* Return the regparm value for a function with the indicated TYPE and DECL.
5644 DECL may be NULL when calling function indirectly
5645 or considering a libcall. */
5646
5647 static int
5648 ix86_function_regparm (const_tree type, const_tree decl)
5649 {
5650 tree attr;
5651 int regparm;
5652 unsigned int ccvt;
5653
5654 if (TARGET_64BIT)
5655 return (ix86_function_type_abi (type) == SYSV_ABI
5656 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5657 ccvt = ix86_get_callcvt (type);
5658 regparm = ix86_regparm;
5659
5660 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5661 {
5662 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5663 if (attr)
5664 {
5665 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5666 return regparm;
5667 }
5668 }
5669 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5670 return 2;
5671 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5672 return 1;
5673
5674 /* Use register calling convention for local functions when possible. */
5675 if (decl
5676 && TREE_CODE (decl) == FUNCTION_DECL
5677 /* Caller and callee must agree on the calling convention, so
5678 checking here just optimize means that with
5679 __attribute__((optimize (...))) caller could use regparm convention
5680 and callee not, or vice versa. Instead look at whether the callee
5681 is optimized or not. */
5682 && opt_for_fn (decl, optimize)
5683 && !(profile_flag && !flag_fentry))
5684 {
5685 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5686 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5687 if (i && i->local && i->can_change_signature)
5688 {
5689 int local_regparm, globals = 0, regno;
5690
5691 /* Make sure no regparm register is taken by a
5692 fixed register variable. */
5693 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5694 if (fixed_regs[local_regparm])
5695 break;
5696
5697 /* We don't want to use regparm(3) for nested functions as
5698 these use a static chain pointer in the third argument. */
5699 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5700 local_regparm = 2;
5701
5702 /* In 32-bit mode save a register for the split stack. */
5703 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5704 local_regparm = 2;
5705
5706 /* Each fixed register usage increases register pressure,
5707 so less registers should be used for argument passing.
5708 This functionality can be overriden by an explicit
5709 regparm value. */
5710 for (regno = AX_REG; regno <= DI_REG; regno++)
5711 if (fixed_regs[regno])
5712 globals++;
5713
5714 local_regparm
5715 = globals < local_regparm ? local_regparm - globals : 0;
5716
5717 if (local_regparm > regparm)
5718 regparm = local_regparm;
5719 }
5720 }
5721
5722 return regparm;
5723 }
5724
5725 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5726 DFmode (2) arguments in SSE registers for a function with the
5727 indicated TYPE and DECL. DECL may be NULL when calling function
5728 indirectly or considering a libcall. Otherwise return 0. */
5729
5730 static int
5731 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5732 {
5733 gcc_assert (!TARGET_64BIT);
5734
5735 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5736 by the sseregparm attribute. */
5737 if (TARGET_SSEREGPARM
5738 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5739 {
5740 if (!TARGET_SSE)
5741 {
5742 if (warn)
5743 {
5744 if (decl)
5745 error ("calling %qD with attribute sseregparm without "
5746 "SSE/SSE2 enabled", decl);
5747 else
5748 error ("calling %qT with attribute sseregparm without "
5749 "SSE/SSE2 enabled", type);
5750 }
5751 return 0;
5752 }
5753
5754 return 2;
5755 }
5756
5757 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5758 (and DFmode for SSE2) arguments in SSE registers. */
5759 if (decl && TARGET_SSE_MATH && optimize
5760 && !(profile_flag && !flag_fentry))
5761 {
5762 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5763 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5764 if (i && i->local && i->can_change_signature)
5765 return TARGET_SSE2 ? 2 : 1;
5766 }
5767
5768 return 0;
5769 }
5770
5771 /* Return true if EAX is live at the start of the function. Used by
5772 ix86_expand_prologue to determine if we need special help before
5773 calling allocate_stack_worker. */
5774
5775 static bool
5776 ix86_eax_live_at_start_p (void)
5777 {
5778 /* Cheat. Don't bother working forward from ix86_function_regparm
5779 to the function type to whether an actual argument is located in
5780 eax. Instead just look at cfg info, which is still close enough
5781 to correct at this point. This gives false positives for broken
5782 functions that might use uninitialized data that happens to be
5783 allocated in eax, but who cares? */
5784 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5785 }
5786
5787 static bool
5788 ix86_keep_aggregate_return_pointer (tree fntype)
5789 {
5790 tree attr;
5791
5792 if (!TARGET_64BIT)
5793 {
5794 attr = lookup_attribute ("callee_pop_aggregate_return",
5795 TYPE_ATTRIBUTES (fntype));
5796 if (attr)
5797 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5798
5799 /* For 32-bit MS-ABI the default is to keep aggregate
5800 return pointer. */
5801 if (ix86_function_type_abi (fntype) == MS_ABI)
5802 return true;
5803 }
5804 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5805 }
5806
5807 /* Value is the number of bytes of arguments automatically
5808 popped when returning from a subroutine call.
5809 FUNDECL is the declaration node of the function (as a tree),
5810 FUNTYPE is the data type of the function (as a tree),
5811 or for a library call it is an identifier node for the subroutine name.
5812 SIZE is the number of bytes of arguments passed on the stack.
5813
5814 On the 80386, the RTD insn may be used to pop them if the number
5815 of args is fixed, but if the number is variable then the caller
5816 must pop them all. RTD can't be used for library calls now
5817 because the library is compiled with the Unix compiler.
5818 Use of RTD is a selectable option, since it is incompatible with
5819 standard Unix calling sequences. If the option is not selected,
5820 the caller must always pop the args.
5821
5822 The attribute stdcall is equivalent to RTD on a per module basis. */
5823
5824 static int
5825 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5826 {
5827 unsigned int ccvt;
5828
5829 /* None of the 64-bit ABIs pop arguments. */
5830 if (TARGET_64BIT)
5831 return 0;
5832
5833 ccvt = ix86_get_callcvt (funtype);
5834
5835 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5836 | IX86_CALLCVT_THISCALL)) != 0
5837 && ! stdarg_p (funtype))
5838 return size;
5839
5840 /* Lose any fake structure return argument if it is passed on the stack. */
5841 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5842 && !ix86_keep_aggregate_return_pointer (funtype))
5843 {
5844 int nregs = ix86_function_regparm (funtype, fundecl);
5845 if (nregs == 0)
5846 return GET_MODE_SIZE (Pmode);
5847 }
5848
5849 return 0;
5850 }
5851
5852 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5853
5854 static bool
5855 ix86_legitimate_combined_insn (rtx_insn *insn)
5856 {
5857 /* Check operand constraints in case hard registers were propagated
5858 into insn pattern. This check prevents combine pass from
5859 generating insn patterns with invalid hard register operands.
5860 These invalid insns can eventually confuse reload to error out
5861 with a spill failure. See also PRs 46829 and 46843. */
5862 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5863 {
5864 int i;
5865
5866 extract_insn (insn);
5867 preprocess_constraints (insn);
5868
5869 int n_operands = recog_data.n_operands;
5870 int n_alternatives = recog_data.n_alternatives;
5871 for (i = 0; i < n_operands; i++)
5872 {
5873 rtx op = recog_data.operand[i];
5874 enum machine_mode mode = GET_MODE (op);
5875 const operand_alternative *op_alt;
5876 int offset = 0;
5877 bool win;
5878 int j;
5879
5880 /* For pre-AVX disallow unaligned loads/stores where the
5881 instructions don't support it. */
5882 if (!TARGET_AVX
5883 && VECTOR_MODE_P (GET_MODE (op))
5884 && misaligned_operand (op, GET_MODE (op)))
5885 {
5886 int min_align = get_attr_ssememalign (insn);
5887 if (min_align == 0)
5888 return false;
5889 }
5890
5891 /* A unary operator may be accepted by the predicate, but it
5892 is irrelevant for matching constraints. */
5893 if (UNARY_P (op))
5894 op = XEXP (op, 0);
5895
5896 if (GET_CODE (op) == SUBREG)
5897 {
5898 if (REG_P (SUBREG_REG (op))
5899 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5900 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5901 GET_MODE (SUBREG_REG (op)),
5902 SUBREG_BYTE (op),
5903 GET_MODE (op));
5904 op = SUBREG_REG (op);
5905 }
5906
5907 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5908 continue;
5909
5910 op_alt = recog_op_alt;
5911
5912 /* Operand has no constraints, anything is OK. */
5913 win = !n_alternatives;
5914
5915 alternative_mask preferred = get_preferred_alternatives (insn);
5916 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5917 {
5918 if (!TEST_BIT (preferred, j))
5919 continue;
5920 if (op_alt[i].anything_ok
5921 || (op_alt[i].matches != -1
5922 && operands_match_p
5923 (recog_data.operand[i],
5924 recog_data.operand[op_alt[i].matches]))
5925 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5926 {
5927 win = true;
5928 break;
5929 }
5930 }
5931
5932 if (!win)
5933 return false;
5934 }
5935 }
5936
5937 return true;
5938 }
5939 \f
5940 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5941
5942 static unsigned HOST_WIDE_INT
5943 ix86_asan_shadow_offset (void)
5944 {
5945 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5946 : HOST_WIDE_INT_C (0x7fff8000))
5947 : (HOST_WIDE_INT_1 << 29);
5948 }
5949 \f
5950 /* Argument support functions. */
5951
5952 /* Return true when register may be used to pass function parameters. */
5953 bool
5954 ix86_function_arg_regno_p (int regno)
5955 {
5956 int i;
5957 const int *parm_regs;
5958
5959 if (!TARGET_64BIT)
5960 {
5961 if (TARGET_MACHO)
5962 return (regno < REGPARM_MAX
5963 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5964 else
5965 return (regno < REGPARM_MAX
5966 || (TARGET_MMX && MMX_REGNO_P (regno)
5967 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5968 || (TARGET_SSE && SSE_REGNO_P (regno)
5969 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5970 }
5971
5972 if (TARGET_SSE && SSE_REGNO_P (regno)
5973 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5974 return true;
5975
5976 /* TODO: The function should depend on current function ABI but
5977 builtins.c would need updating then. Therefore we use the
5978 default ABI. */
5979
5980 /* RAX is used as hidden argument to va_arg functions. */
5981 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5982 return true;
5983
5984 if (ix86_abi == MS_ABI)
5985 parm_regs = x86_64_ms_abi_int_parameter_registers;
5986 else
5987 parm_regs = x86_64_int_parameter_registers;
5988 for (i = 0; i < (ix86_abi == MS_ABI
5989 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5990 if (regno == parm_regs[i])
5991 return true;
5992 return false;
5993 }
5994
5995 /* Return if we do not know how to pass TYPE solely in registers. */
5996
5997 static bool
5998 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5999 {
6000 if (must_pass_in_stack_var_size_or_pad (mode, type))
6001 return true;
6002
6003 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
6004 The layout_type routine is crafty and tries to trick us into passing
6005 currently unsupported vector types on the stack by using TImode. */
6006 return (!TARGET_64BIT && mode == TImode
6007 && type && TREE_CODE (type) != VECTOR_TYPE);
6008 }
6009
6010 /* It returns the size, in bytes, of the area reserved for arguments passed
6011 in registers for the function represented by fndecl dependent to the used
6012 abi format. */
6013 int
6014 ix86_reg_parm_stack_space (const_tree fndecl)
6015 {
6016 enum calling_abi call_abi = SYSV_ABI;
6017 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6018 call_abi = ix86_function_abi (fndecl);
6019 else
6020 call_abi = ix86_function_type_abi (fndecl);
6021 if (TARGET_64BIT && call_abi == MS_ABI)
6022 return 32;
6023 return 0;
6024 }
6025
6026 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6027 call abi used. */
6028 enum calling_abi
6029 ix86_function_type_abi (const_tree fntype)
6030 {
6031 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6032 {
6033 enum calling_abi abi = ix86_abi;
6034 if (abi == SYSV_ABI)
6035 {
6036 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6037 abi = MS_ABI;
6038 }
6039 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6040 abi = SYSV_ABI;
6041 return abi;
6042 }
6043 return ix86_abi;
6044 }
6045
6046 /* We add this as a workaround in order to use libc_has_function
6047 hook in i386.md. */
6048 bool
6049 ix86_libc_has_function (enum function_class fn_class)
6050 {
6051 return targetm.libc_has_function (fn_class);
6052 }
6053
6054 static bool
6055 ix86_function_ms_hook_prologue (const_tree fn)
6056 {
6057 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6058 {
6059 if (decl_function_context (fn) != NULL_TREE)
6060 error_at (DECL_SOURCE_LOCATION (fn),
6061 "ms_hook_prologue is not compatible with nested function");
6062 else
6063 return true;
6064 }
6065 return false;
6066 }
6067
6068 static enum calling_abi
6069 ix86_function_abi (const_tree fndecl)
6070 {
6071 if (! fndecl)
6072 return ix86_abi;
6073 return ix86_function_type_abi (TREE_TYPE (fndecl));
6074 }
6075
6076 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6077 call abi used. */
6078 enum calling_abi
6079 ix86_cfun_abi (void)
6080 {
6081 if (! cfun)
6082 return ix86_abi;
6083 return cfun->machine->call_abi;
6084 }
6085
6086 /* Write the extra assembler code needed to declare a function properly. */
6087
6088 void
6089 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6090 tree decl)
6091 {
6092 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6093
6094 if (is_ms_hook)
6095 {
6096 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6097 unsigned int filler_cc = 0xcccccccc;
6098
6099 for (i = 0; i < filler_count; i += 4)
6100 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6101 }
6102
6103 #ifdef SUBTARGET_ASM_UNWIND_INIT
6104 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6105 #endif
6106
6107 ASM_OUTPUT_LABEL (asm_out_file, fname);
6108
6109 /* Output magic byte marker, if hot-patch attribute is set. */
6110 if (is_ms_hook)
6111 {
6112 if (TARGET_64BIT)
6113 {
6114 /* leaq [%rsp + 0], %rsp */
6115 asm_fprintf (asm_out_file, ASM_BYTE
6116 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6117 }
6118 else
6119 {
6120 /* movl.s %edi, %edi
6121 push %ebp
6122 movl.s %esp, %ebp */
6123 asm_fprintf (asm_out_file, ASM_BYTE
6124 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6125 }
6126 }
6127 }
6128
6129 /* regclass.c */
6130 extern void init_regs (void);
6131
6132 /* Implementation of call abi switching target hook. Specific to FNDECL
6133 the specific call register sets are set. See also
6134 ix86_conditional_register_usage for more details. */
6135 void
6136 ix86_call_abi_override (const_tree fndecl)
6137 {
6138 if (fndecl == NULL_TREE)
6139 cfun->machine->call_abi = ix86_abi;
6140 else
6141 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6142 }
6143
6144 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6145 expensive re-initialization of init_regs each time we switch function context
6146 since this is needed only during RTL expansion. */
6147 static void
6148 ix86_maybe_switch_abi (void)
6149 {
6150 if (TARGET_64BIT &&
6151 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6152 reinit_regs ();
6153 }
6154
6155 /* Return 1 if pseudo register should be created and used to hold
6156 GOT address for PIC code. */
6157 static bool
6158 ix86_use_pseudo_pic_reg (void)
6159 {
6160 if ((TARGET_64BIT
6161 && (ix86_cmodel == CM_SMALL_PIC
6162 || TARGET_PECOFF))
6163 || !flag_pic)
6164 return false;
6165 return true;
6166 }
6167
6168 /* Create and initialize PIC register if required. */
6169 static void
6170 ix86_init_pic_reg (void)
6171 {
6172 edge entry_edge;
6173 rtx_insn *seq;
6174
6175 if (!ix86_use_pseudo_pic_reg ())
6176 return;
6177
6178 start_sequence ();
6179
6180 if (TARGET_64BIT)
6181 {
6182 if (ix86_cmodel == CM_LARGE_PIC)
6183 {
6184 rtx_code_label *label;
6185 rtx tmp_reg;
6186
6187 gcc_assert (Pmode == DImode);
6188 label = gen_label_rtx ();
6189 emit_label (label);
6190 LABEL_PRESERVE_P (label) = 1;
6191 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
6192 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6193 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6194 label));
6195 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6196 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6197 pic_offset_table_rtx, tmp_reg));
6198 }
6199 else
6200 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6201 }
6202 else
6203 {
6204 rtx insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6205 RTX_FRAME_RELATED_P (insn) = 1;
6206 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6207 }
6208
6209 seq = get_insns ();
6210 end_sequence ();
6211
6212 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6213 insert_insn_on_edge (seq, entry_edge);
6214 commit_one_edge_insertion (entry_edge);
6215 }
6216
6217 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6218 for a call to a function whose data type is FNTYPE.
6219 For a library call, FNTYPE is 0. */
6220
6221 void
6222 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6223 tree fntype, /* tree ptr for function decl */
6224 rtx libname, /* SYMBOL_REF of library name or 0 */
6225 tree fndecl,
6226 int caller)
6227 {
6228 struct cgraph_local_info *i;
6229
6230 memset (cum, 0, sizeof (*cum));
6231
6232 if (fndecl)
6233 {
6234 i = cgraph_node::local_info (fndecl);
6235 cum->call_abi = ix86_function_abi (fndecl);
6236 }
6237 else
6238 {
6239 i = NULL;
6240 cum->call_abi = ix86_function_type_abi (fntype);
6241 }
6242
6243 cum->caller = caller;
6244
6245 /* Set up the number of registers to use for passing arguments. */
6246 cum->nregs = ix86_regparm;
6247 if (TARGET_64BIT)
6248 {
6249 cum->nregs = (cum->call_abi == SYSV_ABI
6250 ? X86_64_REGPARM_MAX
6251 : X86_64_MS_REGPARM_MAX);
6252 }
6253 if (TARGET_SSE)
6254 {
6255 cum->sse_nregs = SSE_REGPARM_MAX;
6256 if (TARGET_64BIT)
6257 {
6258 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6259 ? X86_64_SSE_REGPARM_MAX
6260 : X86_64_MS_SSE_REGPARM_MAX);
6261 }
6262 }
6263 if (TARGET_MMX)
6264 cum->mmx_nregs = MMX_REGPARM_MAX;
6265 cum->warn_avx512f = true;
6266 cum->warn_avx = true;
6267 cum->warn_sse = true;
6268 cum->warn_mmx = true;
6269
6270 /* Because type might mismatch in between caller and callee, we need to
6271 use actual type of function for local calls.
6272 FIXME: cgraph_analyze can be told to actually record if function uses
6273 va_start so for local functions maybe_vaarg can be made aggressive
6274 helping K&R code.
6275 FIXME: once typesytem is fixed, we won't need this code anymore. */
6276 if (i && i->local && i->can_change_signature)
6277 fntype = TREE_TYPE (fndecl);
6278 cum->maybe_vaarg = (fntype
6279 ? (!prototype_p (fntype) || stdarg_p (fntype))
6280 : !libname);
6281
6282 if (!TARGET_64BIT)
6283 {
6284 /* If there are variable arguments, then we won't pass anything
6285 in registers in 32-bit mode. */
6286 if (stdarg_p (fntype))
6287 {
6288 cum->nregs = 0;
6289 cum->sse_nregs = 0;
6290 cum->mmx_nregs = 0;
6291 cum->warn_avx512f = false;
6292 cum->warn_avx = false;
6293 cum->warn_sse = false;
6294 cum->warn_mmx = false;
6295 return;
6296 }
6297
6298 /* Use ecx and edx registers if function has fastcall attribute,
6299 else look for regparm information. */
6300 if (fntype)
6301 {
6302 unsigned int ccvt = ix86_get_callcvt (fntype);
6303 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6304 {
6305 cum->nregs = 1;
6306 cum->fastcall = 1; /* Same first register as in fastcall. */
6307 }
6308 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6309 {
6310 cum->nregs = 2;
6311 cum->fastcall = 1;
6312 }
6313 else
6314 cum->nregs = ix86_function_regparm (fntype, fndecl);
6315 }
6316
6317 /* Set up the number of SSE registers used for passing SFmode
6318 and DFmode arguments. Warn for mismatching ABI. */
6319 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6320 }
6321 }
6322
6323 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6324 But in the case of vector types, it is some vector mode.
6325
6326 When we have only some of our vector isa extensions enabled, then there
6327 are some modes for which vector_mode_supported_p is false. For these
6328 modes, the generic vector support in gcc will choose some non-vector mode
6329 in order to implement the type. By computing the natural mode, we'll
6330 select the proper ABI location for the operand and not depend on whatever
6331 the middle-end decides to do with these vector types.
6332
6333 The midde-end can't deal with the vector types > 16 bytes. In this
6334 case, we return the original mode and warn ABI change if CUM isn't
6335 NULL.
6336
6337 If INT_RETURN is true, warn ABI change if the vector mode isn't
6338 available for function return value. */
6339
6340 static enum machine_mode
6341 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6342 bool in_return)
6343 {
6344 enum machine_mode mode = TYPE_MODE (type);
6345
6346 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6347 {
6348 HOST_WIDE_INT size = int_size_in_bytes (type);
6349 if ((size == 8 || size == 16 || size == 32 || size == 64)
6350 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6351 && TYPE_VECTOR_SUBPARTS (type) > 1)
6352 {
6353 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6354
6355 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6356 mode = MIN_MODE_VECTOR_FLOAT;
6357 else
6358 mode = MIN_MODE_VECTOR_INT;
6359
6360 /* Get the mode which has this inner mode and number of units. */
6361 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6362 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6363 && GET_MODE_INNER (mode) == innermode)
6364 {
6365 if (size == 64 && !TARGET_AVX512F)
6366 {
6367 static bool warnedavx512f;
6368 static bool warnedavx512f_ret;
6369
6370 if (cum && cum->warn_avx512f && !warnedavx512f)
6371 {
6372 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6373 "without AVX512F enabled changes the ABI"))
6374 warnedavx512f = true;
6375 }
6376 else if (in_return && !warnedavx512f_ret)
6377 {
6378 if (warning (OPT_Wpsabi, "AVX512F vector return "
6379 "without AVX512F enabled changes the ABI"))
6380 warnedavx512f_ret = true;
6381 }
6382
6383 return TYPE_MODE (type);
6384 }
6385 else if (size == 32 && !TARGET_AVX)
6386 {
6387 static bool warnedavx;
6388 static bool warnedavx_ret;
6389
6390 if (cum && cum->warn_avx && !warnedavx)
6391 {
6392 if (warning (OPT_Wpsabi, "AVX vector argument "
6393 "without AVX enabled changes the ABI"))
6394 warnedavx = true;
6395 }
6396 else if (in_return && !warnedavx_ret)
6397 {
6398 if (warning (OPT_Wpsabi, "AVX vector return "
6399 "without AVX enabled changes the ABI"))
6400 warnedavx_ret = true;
6401 }
6402
6403 return TYPE_MODE (type);
6404 }
6405 else if (((size == 8 && TARGET_64BIT) || size == 16)
6406 && !TARGET_SSE)
6407 {
6408 static bool warnedsse;
6409 static bool warnedsse_ret;
6410
6411 if (cum && cum->warn_sse && !warnedsse)
6412 {
6413 if (warning (OPT_Wpsabi, "SSE vector argument "
6414 "without SSE enabled changes the ABI"))
6415 warnedsse = true;
6416 }
6417 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6418 {
6419 if (warning (OPT_Wpsabi, "SSE vector return "
6420 "without SSE enabled changes the ABI"))
6421 warnedsse_ret = true;
6422 }
6423 }
6424 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6425 {
6426 static bool warnedmmx;
6427 static bool warnedmmx_ret;
6428
6429 if (cum && cum->warn_mmx && !warnedmmx)
6430 {
6431 if (warning (OPT_Wpsabi, "MMX vector argument "
6432 "without MMX enabled changes the ABI"))
6433 warnedmmx = true;
6434 }
6435 else if (in_return && !warnedmmx_ret)
6436 {
6437 if (warning (OPT_Wpsabi, "MMX vector return "
6438 "without MMX enabled changes the ABI"))
6439 warnedmmx_ret = true;
6440 }
6441 }
6442 return mode;
6443 }
6444
6445 gcc_unreachable ();
6446 }
6447 }
6448
6449 return mode;
6450 }
6451
6452 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6453 this may not agree with the mode that the type system has chosen for the
6454 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6455 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6456
6457 static rtx
6458 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6459 unsigned int regno)
6460 {
6461 rtx tmp;
6462
6463 if (orig_mode != BLKmode)
6464 tmp = gen_rtx_REG (orig_mode, regno);
6465 else
6466 {
6467 tmp = gen_rtx_REG (mode, regno);
6468 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6469 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6470 }
6471
6472 return tmp;
6473 }
6474
6475 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6476 of this code is to classify each 8bytes of incoming argument by the register
6477 class and assign registers accordingly. */
6478
6479 /* Return the union class of CLASS1 and CLASS2.
6480 See the x86-64 PS ABI for details. */
6481
6482 static enum x86_64_reg_class
6483 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6484 {
6485 /* Rule #1: If both classes are equal, this is the resulting class. */
6486 if (class1 == class2)
6487 return class1;
6488
6489 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6490 the other class. */
6491 if (class1 == X86_64_NO_CLASS)
6492 return class2;
6493 if (class2 == X86_64_NO_CLASS)
6494 return class1;
6495
6496 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6497 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6498 return X86_64_MEMORY_CLASS;
6499
6500 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6501 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6502 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6503 return X86_64_INTEGERSI_CLASS;
6504 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6505 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6506 return X86_64_INTEGER_CLASS;
6507
6508 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6509 MEMORY is used. */
6510 if (class1 == X86_64_X87_CLASS
6511 || class1 == X86_64_X87UP_CLASS
6512 || class1 == X86_64_COMPLEX_X87_CLASS
6513 || class2 == X86_64_X87_CLASS
6514 || class2 == X86_64_X87UP_CLASS
6515 || class2 == X86_64_COMPLEX_X87_CLASS)
6516 return X86_64_MEMORY_CLASS;
6517
6518 /* Rule #6: Otherwise class SSE is used. */
6519 return X86_64_SSE_CLASS;
6520 }
6521
6522 /* Classify the argument of type TYPE and mode MODE.
6523 CLASSES will be filled by the register class used to pass each word
6524 of the operand. The number of words is returned. In case the parameter
6525 should be passed in memory, 0 is returned. As a special case for zero
6526 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6527
6528 BIT_OFFSET is used internally for handling records and specifies offset
6529 of the offset in bits modulo 512 to avoid overflow cases.
6530
6531 See the x86-64 PS ABI for details.
6532 */
6533
6534 static int
6535 classify_argument (enum machine_mode mode, const_tree type,
6536 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6537 {
6538 HOST_WIDE_INT bytes =
6539 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6540 int words
6541 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6542
6543 /* Variable sized entities are always passed/returned in memory. */
6544 if (bytes < 0)
6545 return 0;
6546
6547 if (mode != VOIDmode
6548 && targetm.calls.must_pass_in_stack (mode, type))
6549 return 0;
6550
6551 if (type && AGGREGATE_TYPE_P (type))
6552 {
6553 int i;
6554 tree field;
6555 enum x86_64_reg_class subclasses[MAX_CLASSES];
6556
6557 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6558 if (bytes > 64)
6559 return 0;
6560
6561 for (i = 0; i < words; i++)
6562 classes[i] = X86_64_NO_CLASS;
6563
6564 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6565 signalize memory class, so handle it as special case. */
6566 if (!words)
6567 {
6568 classes[0] = X86_64_NO_CLASS;
6569 return 1;
6570 }
6571
6572 /* Classify each field of record and merge classes. */
6573 switch (TREE_CODE (type))
6574 {
6575 case RECORD_TYPE:
6576 /* And now merge the fields of structure. */
6577 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6578 {
6579 if (TREE_CODE (field) == FIELD_DECL)
6580 {
6581 int num;
6582
6583 if (TREE_TYPE (field) == error_mark_node)
6584 continue;
6585
6586 /* Bitfields are always classified as integer. Handle them
6587 early, since later code would consider them to be
6588 misaligned integers. */
6589 if (DECL_BIT_FIELD (field))
6590 {
6591 for (i = (int_bit_position (field)
6592 + (bit_offset % 64)) / 8 / 8;
6593 i < ((int_bit_position (field) + (bit_offset % 64))
6594 + tree_to_shwi (DECL_SIZE (field))
6595 + 63) / 8 / 8; i++)
6596 classes[i] =
6597 merge_classes (X86_64_INTEGER_CLASS,
6598 classes[i]);
6599 }
6600 else
6601 {
6602 int pos;
6603
6604 type = TREE_TYPE (field);
6605
6606 /* Flexible array member is ignored. */
6607 if (TYPE_MODE (type) == BLKmode
6608 && TREE_CODE (type) == ARRAY_TYPE
6609 && TYPE_SIZE (type) == NULL_TREE
6610 && TYPE_DOMAIN (type) != NULL_TREE
6611 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6612 == NULL_TREE))
6613 {
6614 static bool warned;
6615
6616 if (!warned && warn_psabi)
6617 {
6618 warned = true;
6619 inform (input_location,
6620 "the ABI of passing struct with"
6621 " a flexible array member has"
6622 " changed in GCC 4.4");
6623 }
6624 continue;
6625 }
6626 num = classify_argument (TYPE_MODE (type), type,
6627 subclasses,
6628 (int_bit_position (field)
6629 + bit_offset) % 512);
6630 if (!num)
6631 return 0;
6632 pos = (int_bit_position (field)
6633 + (bit_offset % 64)) / 8 / 8;
6634 for (i = 0; i < num && (i + pos) < words; i++)
6635 classes[i + pos] =
6636 merge_classes (subclasses[i], classes[i + pos]);
6637 }
6638 }
6639 }
6640 break;
6641
6642 case ARRAY_TYPE:
6643 /* Arrays are handled as small records. */
6644 {
6645 int num;
6646 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6647 TREE_TYPE (type), subclasses, bit_offset);
6648 if (!num)
6649 return 0;
6650
6651 /* The partial classes are now full classes. */
6652 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6653 subclasses[0] = X86_64_SSE_CLASS;
6654 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6655 && !((bit_offset % 64) == 0 && bytes == 4))
6656 subclasses[0] = X86_64_INTEGER_CLASS;
6657
6658 for (i = 0; i < words; i++)
6659 classes[i] = subclasses[i % num];
6660
6661 break;
6662 }
6663 case UNION_TYPE:
6664 case QUAL_UNION_TYPE:
6665 /* Unions are similar to RECORD_TYPE but offset is always 0.
6666 */
6667 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6668 {
6669 if (TREE_CODE (field) == FIELD_DECL)
6670 {
6671 int num;
6672
6673 if (TREE_TYPE (field) == error_mark_node)
6674 continue;
6675
6676 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6677 TREE_TYPE (field), subclasses,
6678 bit_offset);
6679 if (!num)
6680 return 0;
6681 for (i = 0; i < num && i < words; i++)
6682 classes[i] = merge_classes (subclasses[i], classes[i]);
6683 }
6684 }
6685 break;
6686
6687 default:
6688 gcc_unreachable ();
6689 }
6690
6691 if (words > 2)
6692 {
6693 /* When size > 16 bytes, if the first one isn't
6694 X86_64_SSE_CLASS or any other ones aren't
6695 X86_64_SSEUP_CLASS, everything should be passed in
6696 memory. */
6697 if (classes[0] != X86_64_SSE_CLASS)
6698 return 0;
6699
6700 for (i = 1; i < words; i++)
6701 if (classes[i] != X86_64_SSEUP_CLASS)
6702 return 0;
6703 }
6704
6705 /* Final merger cleanup. */
6706 for (i = 0; i < words; i++)
6707 {
6708 /* If one class is MEMORY, everything should be passed in
6709 memory. */
6710 if (classes[i] == X86_64_MEMORY_CLASS)
6711 return 0;
6712
6713 /* The X86_64_SSEUP_CLASS should be always preceded by
6714 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6715 if (classes[i] == X86_64_SSEUP_CLASS
6716 && classes[i - 1] != X86_64_SSE_CLASS
6717 && classes[i - 1] != X86_64_SSEUP_CLASS)
6718 {
6719 /* The first one should never be X86_64_SSEUP_CLASS. */
6720 gcc_assert (i != 0);
6721 classes[i] = X86_64_SSE_CLASS;
6722 }
6723
6724 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6725 everything should be passed in memory. */
6726 if (classes[i] == X86_64_X87UP_CLASS
6727 && (classes[i - 1] != X86_64_X87_CLASS))
6728 {
6729 static bool warned;
6730
6731 /* The first one should never be X86_64_X87UP_CLASS. */
6732 gcc_assert (i != 0);
6733 if (!warned && warn_psabi)
6734 {
6735 warned = true;
6736 inform (input_location,
6737 "the ABI of passing union with long double"
6738 " has changed in GCC 4.4");
6739 }
6740 return 0;
6741 }
6742 }
6743 return words;
6744 }
6745
6746 /* Compute alignment needed. We align all types to natural boundaries with
6747 exception of XFmode that is aligned to 64bits. */
6748 if (mode != VOIDmode && mode != BLKmode)
6749 {
6750 int mode_alignment = GET_MODE_BITSIZE (mode);
6751
6752 if (mode == XFmode)
6753 mode_alignment = 128;
6754 else if (mode == XCmode)
6755 mode_alignment = 256;
6756 if (COMPLEX_MODE_P (mode))
6757 mode_alignment /= 2;
6758 /* Misaligned fields are always returned in memory. */
6759 if (bit_offset % mode_alignment)
6760 return 0;
6761 }
6762
6763 /* for V1xx modes, just use the base mode */
6764 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6765 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6766 mode = GET_MODE_INNER (mode);
6767
6768 /* Classification of atomic types. */
6769 switch (mode)
6770 {
6771 case SDmode:
6772 case DDmode:
6773 classes[0] = X86_64_SSE_CLASS;
6774 return 1;
6775 case TDmode:
6776 classes[0] = X86_64_SSE_CLASS;
6777 classes[1] = X86_64_SSEUP_CLASS;
6778 return 2;
6779 case DImode:
6780 case SImode:
6781 case HImode:
6782 case QImode:
6783 case CSImode:
6784 case CHImode:
6785 case CQImode:
6786 {
6787 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6788
6789 /* Analyze last 128 bits only. */
6790 size = (size - 1) & 0x7f;
6791
6792 if (size < 32)
6793 {
6794 classes[0] = X86_64_INTEGERSI_CLASS;
6795 return 1;
6796 }
6797 else if (size < 64)
6798 {
6799 classes[0] = X86_64_INTEGER_CLASS;
6800 return 1;
6801 }
6802 else if (size < 64+32)
6803 {
6804 classes[0] = X86_64_INTEGER_CLASS;
6805 classes[1] = X86_64_INTEGERSI_CLASS;
6806 return 2;
6807 }
6808 else if (size < 64+64)
6809 {
6810 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6811 return 2;
6812 }
6813 else
6814 gcc_unreachable ();
6815 }
6816 case CDImode:
6817 case TImode:
6818 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6819 return 2;
6820 case COImode:
6821 case OImode:
6822 /* OImode shouldn't be used directly. */
6823 gcc_unreachable ();
6824 case CTImode:
6825 return 0;
6826 case SFmode:
6827 if (!(bit_offset % 64))
6828 classes[0] = X86_64_SSESF_CLASS;
6829 else
6830 classes[0] = X86_64_SSE_CLASS;
6831 return 1;
6832 case DFmode:
6833 classes[0] = X86_64_SSEDF_CLASS;
6834 return 1;
6835 case XFmode:
6836 classes[0] = X86_64_X87_CLASS;
6837 classes[1] = X86_64_X87UP_CLASS;
6838 return 2;
6839 case TFmode:
6840 classes[0] = X86_64_SSE_CLASS;
6841 classes[1] = X86_64_SSEUP_CLASS;
6842 return 2;
6843 case SCmode:
6844 classes[0] = X86_64_SSE_CLASS;
6845 if (!(bit_offset % 64))
6846 return 1;
6847 else
6848 {
6849 static bool warned;
6850
6851 if (!warned && warn_psabi)
6852 {
6853 warned = true;
6854 inform (input_location,
6855 "the ABI of passing structure with complex float"
6856 " member has changed in GCC 4.4");
6857 }
6858 classes[1] = X86_64_SSESF_CLASS;
6859 return 2;
6860 }
6861 case DCmode:
6862 classes[0] = X86_64_SSEDF_CLASS;
6863 classes[1] = X86_64_SSEDF_CLASS;
6864 return 2;
6865 case XCmode:
6866 classes[0] = X86_64_COMPLEX_X87_CLASS;
6867 return 1;
6868 case TCmode:
6869 /* This modes is larger than 16 bytes. */
6870 return 0;
6871 case V8SFmode:
6872 case V8SImode:
6873 case V32QImode:
6874 case V16HImode:
6875 case V4DFmode:
6876 case V4DImode:
6877 classes[0] = X86_64_SSE_CLASS;
6878 classes[1] = X86_64_SSEUP_CLASS;
6879 classes[2] = X86_64_SSEUP_CLASS;
6880 classes[3] = X86_64_SSEUP_CLASS;
6881 return 4;
6882 case V8DFmode:
6883 case V16SFmode:
6884 case V8DImode:
6885 case V16SImode:
6886 case V32HImode:
6887 case V64QImode:
6888 classes[0] = X86_64_SSE_CLASS;
6889 classes[1] = X86_64_SSEUP_CLASS;
6890 classes[2] = X86_64_SSEUP_CLASS;
6891 classes[3] = X86_64_SSEUP_CLASS;
6892 classes[4] = X86_64_SSEUP_CLASS;
6893 classes[5] = X86_64_SSEUP_CLASS;
6894 classes[6] = X86_64_SSEUP_CLASS;
6895 classes[7] = X86_64_SSEUP_CLASS;
6896 return 8;
6897 case V4SFmode:
6898 case V4SImode:
6899 case V16QImode:
6900 case V8HImode:
6901 case V2DFmode:
6902 case V2DImode:
6903 classes[0] = X86_64_SSE_CLASS;
6904 classes[1] = X86_64_SSEUP_CLASS;
6905 return 2;
6906 case V1TImode:
6907 case V1DImode:
6908 case V2SFmode:
6909 case V2SImode:
6910 case V4HImode:
6911 case V8QImode:
6912 classes[0] = X86_64_SSE_CLASS;
6913 return 1;
6914 case BLKmode:
6915 case VOIDmode:
6916 return 0;
6917 default:
6918 gcc_assert (VECTOR_MODE_P (mode));
6919
6920 if (bytes > 16)
6921 return 0;
6922
6923 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6924
6925 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6926 classes[0] = X86_64_INTEGERSI_CLASS;
6927 else
6928 classes[0] = X86_64_INTEGER_CLASS;
6929 classes[1] = X86_64_INTEGER_CLASS;
6930 return 1 + (bytes > 8);
6931 }
6932 }
6933
6934 /* Examine the argument and return set number of register required in each
6935 class. Return true iff parameter should be passed in memory. */
6936
6937 static bool
6938 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6939 int *int_nregs, int *sse_nregs)
6940 {
6941 enum x86_64_reg_class regclass[MAX_CLASSES];
6942 int n = classify_argument (mode, type, regclass, 0);
6943
6944 *int_nregs = 0;
6945 *sse_nregs = 0;
6946
6947 if (!n)
6948 return true;
6949 for (n--; n >= 0; n--)
6950 switch (regclass[n])
6951 {
6952 case X86_64_INTEGER_CLASS:
6953 case X86_64_INTEGERSI_CLASS:
6954 (*int_nregs)++;
6955 break;
6956 case X86_64_SSE_CLASS:
6957 case X86_64_SSESF_CLASS:
6958 case X86_64_SSEDF_CLASS:
6959 (*sse_nregs)++;
6960 break;
6961 case X86_64_NO_CLASS:
6962 case X86_64_SSEUP_CLASS:
6963 break;
6964 case X86_64_X87_CLASS:
6965 case X86_64_X87UP_CLASS:
6966 case X86_64_COMPLEX_X87_CLASS:
6967 if (!in_return)
6968 return true;
6969 break;
6970 case X86_64_MEMORY_CLASS:
6971 gcc_unreachable ();
6972 }
6973
6974 return false;
6975 }
6976
6977 /* Construct container for the argument used by GCC interface. See
6978 FUNCTION_ARG for the detailed description. */
6979
6980 static rtx
6981 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6982 const_tree type, int in_return, int nintregs, int nsseregs,
6983 const int *intreg, int sse_regno)
6984 {
6985 /* The following variables hold the static issued_error state. */
6986 static bool issued_sse_arg_error;
6987 static bool issued_sse_ret_error;
6988 static bool issued_x87_ret_error;
6989
6990 enum machine_mode tmpmode;
6991 int bytes =
6992 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6993 enum x86_64_reg_class regclass[MAX_CLASSES];
6994 int n;
6995 int i;
6996 int nexps = 0;
6997 int needed_sseregs, needed_intregs;
6998 rtx exp[MAX_CLASSES];
6999 rtx ret;
7000
7001 n = classify_argument (mode, type, regclass, 0);
7002 if (!n)
7003 return NULL;
7004 if (examine_argument (mode, type, in_return, &needed_intregs,
7005 &needed_sseregs))
7006 return NULL;
7007 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7008 return NULL;
7009
7010 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7011 some less clueful developer tries to use floating-point anyway. */
7012 if (needed_sseregs && !TARGET_SSE)
7013 {
7014 if (in_return)
7015 {
7016 if (!issued_sse_ret_error)
7017 {
7018 error ("SSE register return with SSE disabled");
7019 issued_sse_ret_error = true;
7020 }
7021 }
7022 else if (!issued_sse_arg_error)
7023 {
7024 error ("SSE register argument with SSE disabled");
7025 issued_sse_arg_error = true;
7026 }
7027 return NULL;
7028 }
7029
7030 /* Likewise, error if the ABI requires us to return values in the
7031 x87 registers and the user specified -mno-80387. */
7032 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7033 for (i = 0; i < n; i++)
7034 if (regclass[i] == X86_64_X87_CLASS
7035 || regclass[i] == X86_64_X87UP_CLASS
7036 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7037 {
7038 if (!issued_x87_ret_error)
7039 {
7040 error ("x87 register return with x87 disabled");
7041 issued_x87_ret_error = true;
7042 }
7043 return NULL;
7044 }
7045
7046 /* First construct simple cases. Avoid SCmode, since we want to use
7047 single register to pass this type. */
7048 if (n == 1 && mode != SCmode)
7049 switch (regclass[0])
7050 {
7051 case X86_64_INTEGER_CLASS:
7052 case X86_64_INTEGERSI_CLASS:
7053 return gen_rtx_REG (mode, intreg[0]);
7054 case X86_64_SSE_CLASS:
7055 case X86_64_SSESF_CLASS:
7056 case X86_64_SSEDF_CLASS:
7057 if (mode != BLKmode)
7058 return gen_reg_or_parallel (mode, orig_mode,
7059 SSE_REGNO (sse_regno));
7060 break;
7061 case X86_64_X87_CLASS:
7062 case X86_64_COMPLEX_X87_CLASS:
7063 return gen_rtx_REG (mode, FIRST_STACK_REG);
7064 case X86_64_NO_CLASS:
7065 /* Zero sized array, struct or class. */
7066 return NULL;
7067 default:
7068 gcc_unreachable ();
7069 }
7070 if (n == 2
7071 && regclass[0] == X86_64_SSE_CLASS
7072 && regclass[1] == X86_64_SSEUP_CLASS
7073 && mode != BLKmode)
7074 return gen_reg_or_parallel (mode, orig_mode,
7075 SSE_REGNO (sse_regno));
7076 if (n == 4
7077 && regclass[0] == X86_64_SSE_CLASS
7078 && regclass[1] == X86_64_SSEUP_CLASS
7079 && regclass[2] == X86_64_SSEUP_CLASS
7080 && regclass[3] == X86_64_SSEUP_CLASS
7081 && mode != BLKmode)
7082 return gen_reg_or_parallel (mode, orig_mode,
7083 SSE_REGNO (sse_regno));
7084 if (n == 8
7085 && regclass[0] == X86_64_SSE_CLASS
7086 && regclass[1] == X86_64_SSEUP_CLASS
7087 && regclass[2] == X86_64_SSEUP_CLASS
7088 && regclass[3] == X86_64_SSEUP_CLASS
7089 && regclass[4] == X86_64_SSEUP_CLASS
7090 && regclass[5] == X86_64_SSEUP_CLASS
7091 && regclass[6] == X86_64_SSEUP_CLASS
7092 && regclass[7] == X86_64_SSEUP_CLASS
7093 && mode != BLKmode)
7094 return gen_reg_or_parallel (mode, orig_mode,
7095 SSE_REGNO (sse_regno));
7096 if (n == 2
7097 && regclass[0] == X86_64_X87_CLASS
7098 && regclass[1] == X86_64_X87UP_CLASS)
7099 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7100
7101 if (n == 2
7102 && regclass[0] == X86_64_INTEGER_CLASS
7103 && regclass[1] == X86_64_INTEGER_CLASS
7104 && (mode == CDImode || mode == TImode)
7105 && intreg[0] + 1 == intreg[1])
7106 return gen_rtx_REG (mode, intreg[0]);
7107
7108 /* Otherwise figure out the entries of the PARALLEL. */
7109 for (i = 0; i < n; i++)
7110 {
7111 int pos;
7112
7113 switch (regclass[i])
7114 {
7115 case X86_64_NO_CLASS:
7116 break;
7117 case X86_64_INTEGER_CLASS:
7118 case X86_64_INTEGERSI_CLASS:
7119 /* Merge TImodes on aligned occasions here too. */
7120 if (i * 8 + 8 > bytes)
7121 tmpmode
7122 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7123 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7124 tmpmode = SImode;
7125 else
7126 tmpmode = DImode;
7127 /* We've requested 24 bytes we
7128 don't have mode for. Use DImode. */
7129 if (tmpmode == BLKmode)
7130 tmpmode = DImode;
7131 exp [nexps++]
7132 = gen_rtx_EXPR_LIST (VOIDmode,
7133 gen_rtx_REG (tmpmode, *intreg),
7134 GEN_INT (i*8));
7135 intreg++;
7136 break;
7137 case X86_64_SSESF_CLASS:
7138 exp [nexps++]
7139 = gen_rtx_EXPR_LIST (VOIDmode,
7140 gen_rtx_REG (SFmode,
7141 SSE_REGNO (sse_regno)),
7142 GEN_INT (i*8));
7143 sse_regno++;
7144 break;
7145 case X86_64_SSEDF_CLASS:
7146 exp [nexps++]
7147 = gen_rtx_EXPR_LIST (VOIDmode,
7148 gen_rtx_REG (DFmode,
7149 SSE_REGNO (sse_regno)),
7150 GEN_INT (i*8));
7151 sse_regno++;
7152 break;
7153 case X86_64_SSE_CLASS:
7154 pos = i;
7155 switch (n)
7156 {
7157 case 1:
7158 tmpmode = DImode;
7159 break;
7160 case 2:
7161 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7162 {
7163 tmpmode = TImode;
7164 i++;
7165 }
7166 else
7167 tmpmode = DImode;
7168 break;
7169 case 4:
7170 gcc_assert (i == 0
7171 && regclass[1] == X86_64_SSEUP_CLASS
7172 && regclass[2] == X86_64_SSEUP_CLASS
7173 && regclass[3] == X86_64_SSEUP_CLASS);
7174 tmpmode = OImode;
7175 i += 3;
7176 break;
7177 case 8:
7178 gcc_assert (i == 0
7179 && regclass[1] == X86_64_SSEUP_CLASS
7180 && regclass[2] == X86_64_SSEUP_CLASS
7181 && regclass[3] == X86_64_SSEUP_CLASS
7182 && regclass[4] == X86_64_SSEUP_CLASS
7183 && regclass[5] == X86_64_SSEUP_CLASS
7184 && regclass[6] == X86_64_SSEUP_CLASS
7185 && regclass[7] == X86_64_SSEUP_CLASS);
7186 tmpmode = XImode;
7187 i += 7;
7188 break;
7189 default:
7190 gcc_unreachable ();
7191 }
7192 exp [nexps++]
7193 = gen_rtx_EXPR_LIST (VOIDmode,
7194 gen_rtx_REG (tmpmode,
7195 SSE_REGNO (sse_regno)),
7196 GEN_INT (pos*8));
7197 sse_regno++;
7198 break;
7199 default:
7200 gcc_unreachable ();
7201 }
7202 }
7203
7204 /* Empty aligned struct, union or class. */
7205 if (nexps == 0)
7206 return NULL;
7207
7208 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7209 for (i = 0; i < nexps; i++)
7210 XVECEXP (ret, 0, i) = exp [i];
7211 return ret;
7212 }
7213
7214 /* Update the data in CUM to advance over an argument of mode MODE
7215 and data type TYPE. (TYPE is null for libcalls where that information
7216 may not be available.) */
7217
7218 static void
7219 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7220 const_tree type, HOST_WIDE_INT bytes,
7221 HOST_WIDE_INT words)
7222 {
7223 switch (mode)
7224 {
7225 default:
7226 break;
7227
7228 case BLKmode:
7229 if (bytes < 0)
7230 break;
7231 /* FALLTHRU */
7232
7233 case DImode:
7234 case SImode:
7235 case HImode:
7236 case QImode:
7237 cum->words += words;
7238 cum->nregs -= words;
7239 cum->regno += words;
7240
7241 if (cum->nregs <= 0)
7242 {
7243 cum->nregs = 0;
7244 cum->regno = 0;
7245 }
7246 break;
7247
7248 case OImode:
7249 /* OImode shouldn't be used directly. */
7250 gcc_unreachable ();
7251
7252 case DFmode:
7253 if (cum->float_in_sse < 2)
7254 break;
7255 case SFmode:
7256 if (cum->float_in_sse < 1)
7257 break;
7258 /* FALLTHRU */
7259
7260 case V8SFmode:
7261 case V8SImode:
7262 case V64QImode:
7263 case V32HImode:
7264 case V16SImode:
7265 case V8DImode:
7266 case V16SFmode:
7267 case V8DFmode:
7268 case V32QImode:
7269 case V16HImode:
7270 case V4DFmode:
7271 case V4DImode:
7272 case TImode:
7273 case V16QImode:
7274 case V8HImode:
7275 case V4SImode:
7276 case V2DImode:
7277 case V4SFmode:
7278 case V2DFmode:
7279 if (!type || !AGGREGATE_TYPE_P (type))
7280 {
7281 cum->sse_words += words;
7282 cum->sse_nregs -= 1;
7283 cum->sse_regno += 1;
7284 if (cum->sse_nregs <= 0)
7285 {
7286 cum->sse_nregs = 0;
7287 cum->sse_regno = 0;
7288 }
7289 }
7290 break;
7291
7292 case V8QImode:
7293 case V4HImode:
7294 case V2SImode:
7295 case V2SFmode:
7296 case V1TImode:
7297 case V1DImode:
7298 if (!type || !AGGREGATE_TYPE_P (type))
7299 {
7300 cum->mmx_words += words;
7301 cum->mmx_nregs -= 1;
7302 cum->mmx_regno += 1;
7303 if (cum->mmx_nregs <= 0)
7304 {
7305 cum->mmx_nregs = 0;
7306 cum->mmx_regno = 0;
7307 }
7308 }
7309 break;
7310 }
7311 }
7312
7313 static void
7314 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7315 const_tree type, HOST_WIDE_INT words, bool named)
7316 {
7317 int int_nregs, sse_nregs;
7318
7319 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7320 if (!named && (VALID_AVX512F_REG_MODE (mode)
7321 || VALID_AVX256_REG_MODE (mode)))
7322 return;
7323
7324 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7325 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7326 {
7327 cum->nregs -= int_nregs;
7328 cum->sse_nregs -= sse_nregs;
7329 cum->regno += int_nregs;
7330 cum->sse_regno += sse_nregs;
7331 }
7332 else
7333 {
7334 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7335 cum->words = (cum->words + align - 1) & ~(align - 1);
7336 cum->words += words;
7337 }
7338 }
7339
7340 static void
7341 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7342 HOST_WIDE_INT words)
7343 {
7344 /* Otherwise, this should be passed indirect. */
7345 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7346
7347 cum->words += words;
7348 if (cum->nregs > 0)
7349 {
7350 cum->nregs -= 1;
7351 cum->regno += 1;
7352 }
7353 }
7354
7355 /* Update the data in CUM to advance over an argument of mode MODE and
7356 data type TYPE. (TYPE is null for libcalls where that information
7357 may not be available.) */
7358
7359 static void
7360 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7361 const_tree type, bool named)
7362 {
7363 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7364 HOST_WIDE_INT bytes, words;
7365
7366 if (mode == BLKmode)
7367 bytes = int_size_in_bytes (type);
7368 else
7369 bytes = GET_MODE_SIZE (mode);
7370 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7371
7372 if (type)
7373 mode = type_natural_mode (type, NULL, false);
7374
7375 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7376 function_arg_advance_ms_64 (cum, bytes, words);
7377 else if (TARGET_64BIT)
7378 function_arg_advance_64 (cum, mode, type, words, named);
7379 else
7380 function_arg_advance_32 (cum, mode, type, bytes, words);
7381 }
7382
7383 /* Define where to put the arguments to a function.
7384 Value is zero to push the argument on the stack,
7385 or a hard register in which to store the argument.
7386
7387 MODE is the argument's machine mode.
7388 TYPE is the data type of the argument (as a tree).
7389 This is null for libcalls where that information may
7390 not be available.
7391 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7392 the preceding args and about the function being called.
7393 NAMED is nonzero if this argument is a named parameter
7394 (otherwise it is an extra parameter matching an ellipsis). */
7395
7396 static rtx
7397 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7398 enum machine_mode orig_mode, const_tree type,
7399 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7400 {
7401 /* Avoid the AL settings for the Unix64 ABI. */
7402 if (mode == VOIDmode)
7403 return constm1_rtx;
7404
7405 switch (mode)
7406 {
7407 default:
7408 break;
7409
7410 case BLKmode:
7411 if (bytes < 0)
7412 break;
7413 /* FALLTHRU */
7414 case DImode:
7415 case SImode:
7416 case HImode:
7417 case QImode:
7418 if (words <= cum->nregs)
7419 {
7420 int regno = cum->regno;
7421
7422 /* Fastcall allocates the first two DWORD (SImode) or
7423 smaller arguments to ECX and EDX if it isn't an
7424 aggregate type . */
7425 if (cum->fastcall)
7426 {
7427 if (mode == BLKmode
7428 || mode == DImode
7429 || (type && AGGREGATE_TYPE_P (type)))
7430 break;
7431
7432 /* ECX not EAX is the first allocated register. */
7433 if (regno == AX_REG)
7434 regno = CX_REG;
7435 }
7436 return gen_rtx_REG (mode, regno);
7437 }
7438 break;
7439
7440 case DFmode:
7441 if (cum->float_in_sse < 2)
7442 break;
7443 case SFmode:
7444 if (cum->float_in_sse < 1)
7445 break;
7446 /* FALLTHRU */
7447 case TImode:
7448 /* In 32bit, we pass TImode in xmm registers. */
7449 case V16QImode:
7450 case V8HImode:
7451 case V4SImode:
7452 case V2DImode:
7453 case V4SFmode:
7454 case V2DFmode:
7455 if (!type || !AGGREGATE_TYPE_P (type))
7456 {
7457 if (cum->sse_nregs)
7458 return gen_reg_or_parallel (mode, orig_mode,
7459 cum->sse_regno + FIRST_SSE_REG);
7460 }
7461 break;
7462
7463 case OImode:
7464 case XImode:
7465 /* OImode and XImode shouldn't be used directly. */
7466 gcc_unreachable ();
7467
7468 case V64QImode:
7469 case V32HImode:
7470 case V16SImode:
7471 case V8DImode:
7472 case V16SFmode:
7473 case V8DFmode:
7474 case V8SFmode:
7475 case V8SImode:
7476 case V32QImode:
7477 case V16HImode:
7478 case V4DFmode:
7479 case V4DImode:
7480 if (!type || !AGGREGATE_TYPE_P (type))
7481 {
7482 if (cum->sse_nregs)
7483 return gen_reg_or_parallel (mode, orig_mode,
7484 cum->sse_regno + FIRST_SSE_REG);
7485 }
7486 break;
7487
7488 case V8QImode:
7489 case V4HImode:
7490 case V2SImode:
7491 case V2SFmode:
7492 case V1TImode:
7493 case V1DImode:
7494 if (!type || !AGGREGATE_TYPE_P (type))
7495 {
7496 if (cum->mmx_nregs)
7497 return gen_reg_or_parallel (mode, orig_mode,
7498 cum->mmx_regno + FIRST_MMX_REG);
7499 }
7500 break;
7501 }
7502
7503 return NULL_RTX;
7504 }
7505
7506 static rtx
7507 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7508 enum machine_mode orig_mode, const_tree type, bool named)
7509 {
7510 /* Handle a hidden AL argument containing number of registers
7511 for varargs x86-64 functions. */
7512 if (mode == VOIDmode)
7513 return GEN_INT (cum->maybe_vaarg
7514 ? (cum->sse_nregs < 0
7515 ? X86_64_SSE_REGPARM_MAX
7516 : cum->sse_regno)
7517 : -1);
7518
7519 switch (mode)
7520 {
7521 default:
7522 break;
7523
7524 case V8SFmode:
7525 case V8SImode:
7526 case V32QImode:
7527 case V16HImode:
7528 case V4DFmode:
7529 case V4DImode:
7530 case V16SFmode:
7531 case V16SImode:
7532 case V64QImode:
7533 case V32HImode:
7534 case V8DFmode:
7535 case V8DImode:
7536 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7537 if (!named)
7538 return NULL;
7539 break;
7540 }
7541
7542 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7543 cum->sse_nregs,
7544 &x86_64_int_parameter_registers [cum->regno],
7545 cum->sse_regno);
7546 }
7547
7548 static rtx
7549 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7550 enum machine_mode orig_mode, bool named,
7551 HOST_WIDE_INT bytes)
7552 {
7553 unsigned int regno;
7554
7555 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7556 We use value of -2 to specify that current function call is MSABI. */
7557 if (mode == VOIDmode)
7558 return GEN_INT (-2);
7559
7560 /* If we've run out of registers, it goes on the stack. */
7561 if (cum->nregs == 0)
7562 return NULL_RTX;
7563
7564 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7565
7566 /* Only floating point modes are passed in anything but integer regs. */
7567 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7568 {
7569 if (named)
7570 regno = cum->regno + FIRST_SSE_REG;
7571 else
7572 {
7573 rtx t1, t2;
7574
7575 /* Unnamed floating parameters are passed in both the
7576 SSE and integer registers. */
7577 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7578 t2 = gen_rtx_REG (mode, regno);
7579 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7580 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7581 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7582 }
7583 }
7584 /* Handle aggregated types passed in register. */
7585 if (orig_mode == BLKmode)
7586 {
7587 if (bytes > 0 && bytes <= 8)
7588 mode = (bytes > 4 ? DImode : SImode);
7589 if (mode == BLKmode)
7590 mode = DImode;
7591 }
7592
7593 return gen_reg_or_parallel (mode, orig_mode, regno);
7594 }
7595
7596 /* Return where to put the arguments to a function.
7597 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7598
7599 MODE is the argument's machine mode. TYPE is the data type of the
7600 argument. It is null for libcalls where that information may not be
7601 available. CUM gives information about the preceding args and about
7602 the function being called. NAMED is nonzero if this argument is a
7603 named parameter (otherwise it is an extra parameter matching an
7604 ellipsis). */
7605
7606 static rtx
7607 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7608 const_tree type, bool named)
7609 {
7610 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7611 enum machine_mode mode = omode;
7612 HOST_WIDE_INT bytes, words;
7613 rtx arg;
7614
7615 if (mode == BLKmode)
7616 bytes = int_size_in_bytes (type);
7617 else
7618 bytes = GET_MODE_SIZE (mode);
7619 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7620
7621 /* To simplify the code below, represent vector types with a vector mode
7622 even if MMX/SSE are not active. */
7623 if (type && TREE_CODE (type) == VECTOR_TYPE)
7624 mode = type_natural_mode (type, cum, false);
7625
7626 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7627 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7628 else if (TARGET_64BIT)
7629 arg = function_arg_64 (cum, mode, omode, type, named);
7630 else
7631 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7632
7633 return arg;
7634 }
7635
7636 /* A C expression that indicates when an argument must be passed by
7637 reference. If nonzero for an argument, a copy of that argument is
7638 made in memory and a pointer to the argument is passed instead of
7639 the argument itself. The pointer is passed in whatever way is
7640 appropriate for passing a pointer to that type. */
7641
7642 static bool
7643 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7644 const_tree type, bool)
7645 {
7646 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7647
7648 /* See Windows x64 Software Convention. */
7649 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7650 {
7651 int msize = (int) GET_MODE_SIZE (mode);
7652 if (type)
7653 {
7654 /* Arrays are passed by reference. */
7655 if (TREE_CODE (type) == ARRAY_TYPE)
7656 return true;
7657
7658 if (AGGREGATE_TYPE_P (type))
7659 {
7660 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7661 are passed by reference. */
7662 msize = int_size_in_bytes (type);
7663 }
7664 }
7665
7666 /* __m128 is passed by reference. */
7667 switch (msize) {
7668 case 1: case 2: case 4: case 8:
7669 break;
7670 default:
7671 return true;
7672 }
7673 }
7674 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7675 return 1;
7676
7677 return 0;
7678 }
7679
7680 /* Return true when TYPE should be 128bit aligned for 32bit argument
7681 passing ABI. XXX: This function is obsolete and is only used for
7682 checking psABI compatibility with previous versions of GCC. */
7683
7684 static bool
7685 ix86_compat_aligned_value_p (const_tree type)
7686 {
7687 enum machine_mode mode = TYPE_MODE (type);
7688 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7689 || mode == TDmode
7690 || mode == TFmode
7691 || mode == TCmode)
7692 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7693 return true;
7694 if (TYPE_ALIGN (type) < 128)
7695 return false;
7696
7697 if (AGGREGATE_TYPE_P (type))
7698 {
7699 /* Walk the aggregates recursively. */
7700 switch (TREE_CODE (type))
7701 {
7702 case RECORD_TYPE:
7703 case UNION_TYPE:
7704 case QUAL_UNION_TYPE:
7705 {
7706 tree field;
7707
7708 /* Walk all the structure fields. */
7709 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7710 {
7711 if (TREE_CODE (field) == FIELD_DECL
7712 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7713 return true;
7714 }
7715 break;
7716 }
7717
7718 case ARRAY_TYPE:
7719 /* Just for use if some languages passes arrays by value. */
7720 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7721 return true;
7722 break;
7723
7724 default:
7725 gcc_unreachable ();
7726 }
7727 }
7728 return false;
7729 }
7730
7731 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7732 XXX: This function is obsolete and is only used for checking psABI
7733 compatibility with previous versions of GCC. */
7734
7735 static unsigned int
7736 ix86_compat_function_arg_boundary (enum machine_mode mode,
7737 const_tree type, unsigned int align)
7738 {
7739 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7740 natural boundaries. */
7741 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7742 {
7743 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7744 make an exception for SSE modes since these require 128bit
7745 alignment.
7746
7747 The handling here differs from field_alignment. ICC aligns MMX
7748 arguments to 4 byte boundaries, while structure fields are aligned
7749 to 8 byte boundaries. */
7750 if (!type)
7751 {
7752 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7753 align = PARM_BOUNDARY;
7754 }
7755 else
7756 {
7757 if (!ix86_compat_aligned_value_p (type))
7758 align = PARM_BOUNDARY;
7759 }
7760 }
7761 if (align > BIGGEST_ALIGNMENT)
7762 align = BIGGEST_ALIGNMENT;
7763 return align;
7764 }
7765
7766 /* Return true when TYPE should be 128bit aligned for 32bit argument
7767 passing ABI. */
7768
7769 static bool
7770 ix86_contains_aligned_value_p (const_tree type)
7771 {
7772 enum machine_mode mode = TYPE_MODE (type);
7773
7774 if (mode == XFmode || mode == XCmode)
7775 return false;
7776
7777 if (TYPE_ALIGN (type) < 128)
7778 return false;
7779
7780 if (AGGREGATE_TYPE_P (type))
7781 {
7782 /* Walk the aggregates recursively. */
7783 switch (TREE_CODE (type))
7784 {
7785 case RECORD_TYPE:
7786 case UNION_TYPE:
7787 case QUAL_UNION_TYPE:
7788 {
7789 tree field;
7790
7791 /* Walk all the structure fields. */
7792 for (field = TYPE_FIELDS (type);
7793 field;
7794 field = DECL_CHAIN (field))
7795 {
7796 if (TREE_CODE (field) == FIELD_DECL
7797 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7798 return true;
7799 }
7800 break;
7801 }
7802
7803 case ARRAY_TYPE:
7804 /* Just for use if some languages passes arrays by value. */
7805 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7806 return true;
7807 break;
7808
7809 default:
7810 gcc_unreachable ();
7811 }
7812 }
7813 else
7814 return TYPE_ALIGN (type) >= 128;
7815
7816 return false;
7817 }
7818
7819 /* Gives the alignment boundary, in bits, of an argument with the
7820 specified mode and type. */
7821
7822 static unsigned int
7823 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7824 {
7825 unsigned int align;
7826 if (type)
7827 {
7828 /* Since the main variant type is used for call, we convert it to
7829 the main variant type. */
7830 type = TYPE_MAIN_VARIANT (type);
7831 align = TYPE_ALIGN (type);
7832 }
7833 else
7834 align = GET_MODE_ALIGNMENT (mode);
7835 if (align < PARM_BOUNDARY)
7836 align = PARM_BOUNDARY;
7837 else
7838 {
7839 static bool warned;
7840 unsigned int saved_align = align;
7841
7842 if (!TARGET_64BIT)
7843 {
7844 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7845 if (!type)
7846 {
7847 if (mode == XFmode || mode == XCmode)
7848 align = PARM_BOUNDARY;
7849 }
7850 else if (!ix86_contains_aligned_value_p (type))
7851 align = PARM_BOUNDARY;
7852
7853 if (align < 128)
7854 align = PARM_BOUNDARY;
7855 }
7856
7857 if (warn_psabi
7858 && !warned
7859 && align != ix86_compat_function_arg_boundary (mode, type,
7860 saved_align))
7861 {
7862 warned = true;
7863 inform (input_location,
7864 "The ABI for passing parameters with %d-byte"
7865 " alignment has changed in GCC 4.6",
7866 align / BITS_PER_UNIT);
7867 }
7868 }
7869
7870 return align;
7871 }
7872
7873 /* Return true if N is a possible register number of function value. */
7874
7875 static bool
7876 ix86_function_value_regno_p (const unsigned int regno)
7877 {
7878 switch (regno)
7879 {
7880 case AX_REG:
7881 return true;
7882 case DX_REG:
7883 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7884 case DI_REG:
7885 case SI_REG:
7886 return TARGET_64BIT && ix86_abi != MS_ABI;
7887
7888 /* Complex values are returned in %st(0)/%st(1) pair. */
7889 case ST0_REG:
7890 case ST1_REG:
7891 /* TODO: The function should depend on current function ABI but
7892 builtins.c would need updating then. Therefore we use the
7893 default ABI. */
7894 if (TARGET_64BIT && ix86_abi == MS_ABI)
7895 return false;
7896 return TARGET_FLOAT_RETURNS_IN_80387;
7897
7898 /* Complex values are returned in %xmm0/%xmm1 pair. */
7899 case XMM0_REG:
7900 case XMM1_REG:
7901 return TARGET_SSE;
7902
7903 case MM0_REG:
7904 if (TARGET_MACHO || TARGET_64BIT)
7905 return false;
7906 return TARGET_MMX;
7907 }
7908
7909 return false;
7910 }
7911
7912 /* Define how to find the value returned by a function.
7913 VALTYPE is the data type of the value (as a tree).
7914 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7915 otherwise, FUNC is 0. */
7916
7917 static rtx
7918 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7919 const_tree fntype, const_tree fn)
7920 {
7921 unsigned int regno;
7922
7923 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7924 we normally prevent this case when mmx is not available. However
7925 some ABIs may require the result to be returned like DImode. */
7926 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7927 regno = FIRST_MMX_REG;
7928
7929 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7930 we prevent this case when sse is not available. However some ABIs
7931 may require the result to be returned like integer TImode. */
7932 else if (mode == TImode
7933 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7934 regno = FIRST_SSE_REG;
7935
7936 /* 32-byte vector modes in %ymm0. */
7937 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7938 regno = FIRST_SSE_REG;
7939
7940 /* 64-byte vector modes in %zmm0. */
7941 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7942 regno = FIRST_SSE_REG;
7943
7944 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7945 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7946 regno = FIRST_FLOAT_REG;
7947 else
7948 /* Most things go in %eax. */
7949 regno = AX_REG;
7950
7951 /* Override FP return register with %xmm0 for local functions when
7952 SSE math is enabled or for functions with sseregparm attribute. */
7953 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7954 {
7955 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7956 if ((sse_level >= 1 && mode == SFmode)
7957 || (sse_level == 2 && mode == DFmode))
7958 regno = FIRST_SSE_REG;
7959 }
7960
7961 /* OImode shouldn't be used directly. */
7962 gcc_assert (mode != OImode);
7963
7964 return gen_rtx_REG (orig_mode, regno);
7965 }
7966
7967 static rtx
7968 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7969 const_tree valtype)
7970 {
7971 rtx ret;
7972
7973 /* Handle libcalls, which don't provide a type node. */
7974 if (valtype == NULL)
7975 {
7976 unsigned int regno;
7977
7978 switch (mode)
7979 {
7980 case SFmode:
7981 case SCmode:
7982 case DFmode:
7983 case DCmode:
7984 case TFmode:
7985 case SDmode:
7986 case DDmode:
7987 case TDmode:
7988 regno = FIRST_SSE_REG;
7989 break;
7990 case XFmode:
7991 case XCmode:
7992 regno = FIRST_FLOAT_REG;
7993 break;
7994 case TCmode:
7995 return NULL;
7996 default:
7997 regno = AX_REG;
7998 }
7999
8000 return gen_rtx_REG (mode, regno);
8001 }
8002 else if (POINTER_TYPE_P (valtype))
8003 {
8004 /* Pointers are always returned in word_mode. */
8005 mode = word_mode;
8006 }
8007
8008 ret = construct_container (mode, orig_mode, valtype, 1,
8009 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8010 x86_64_int_return_registers, 0);
8011
8012 /* For zero sized structures, construct_container returns NULL, but we
8013 need to keep rest of compiler happy by returning meaningful value. */
8014 if (!ret)
8015 ret = gen_rtx_REG (orig_mode, AX_REG);
8016
8017 return ret;
8018 }
8019
8020 static rtx
8021 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
8022 const_tree valtype)
8023 {
8024 unsigned int regno = AX_REG;
8025
8026 if (TARGET_SSE)
8027 {
8028 switch (GET_MODE_SIZE (mode))
8029 {
8030 case 16:
8031 if (valtype != NULL_TREE
8032 && !VECTOR_INTEGER_TYPE_P (valtype)
8033 && !VECTOR_INTEGER_TYPE_P (valtype)
8034 && !INTEGRAL_TYPE_P (valtype)
8035 && !VECTOR_FLOAT_TYPE_P (valtype))
8036 break;
8037 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8038 && !COMPLEX_MODE_P (mode))
8039 regno = FIRST_SSE_REG;
8040 break;
8041 case 8:
8042 case 4:
8043 if (mode == SFmode || mode == DFmode)
8044 regno = FIRST_SSE_REG;
8045 break;
8046 default:
8047 break;
8048 }
8049 }
8050 return gen_rtx_REG (orig_mode, regno);
8051 }
8052
8053 static rtx
8054 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8055 enum machine_mode orig_mode, enum machine_mode mode)
8056 {
8057 const_tree fn, fntype;
8058
8059 fn = NULL_TREE;
8060 if (fntype_or_decl && DECL_P (fntype_or_decl))
8061 fn = fntype_or_decl;
8062 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8063
8064 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8065 return function_value_ms_64 (orig_mode, mode, valtype);
8066 else if (TARGET_64BIT)
8067 return function_value_64 (orig_mode, mode, valtype);
8068 else
8069 return function_value_32 (orig_mode, mode, fntype, fn);
8070 }
8071
8072 static rtx
8073 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8074 {
8075 enum machine_mode mode, orig_mode;
8076
8077 orig_mode = TYPE_MODE (valtype);
8078 mode = type_natural_mode (valtype, NULL, true);
8079 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8080 }
8081
8082 /* Pointer function arguments and return values are promoted to
8083 word_mode. */
8084
8085 static enum machine_mode
8086 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8087 int *punsignedp, const_tree fntype,
8088 int for_return)
8089 {
8090 if (type != NULL_TREE && POINTER_TYPE_P (type))
8091 {
8092 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8093 return word_mode;
8094 }
8095 return default_promote_function_mode (type, mode, punsignedp, fntype,
8096 for_return);
8097 }
8098
8099 /* Return true if a structure, union or array with MODE containing FIELD
8100 should be accessed using BLKmode. */
8101
8102 static bool
8103 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8104 {
8105 /* Union with XFmode must be in BLKmode. */
8106 return (mode == XFmode
8107 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8108 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8109 }
8110
8111 rtx
8112 ix86_libcall_value (enum machine_mode mode)
8113 {
8114 return ix86_function_value_1 (NULL, NULL, mode, mode);
8115 }
8116
8117 /* Return true iff type is returned in memory. */
8118
8119 static bool
8120 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8121 {
8122 #ifdef SUBTARGET_RETURN_IN_MEMORY
8123 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8124 #else
8125 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8126 HOST_WIDE_INT size;
8127
8128 if (TARGET_64BIT)
8129 {
8130 if (ix86_function_type_abi (fntype) == MS_ABI)
8131 {
8132 size = int_size_in_bytes (type);
8133
8134 /* __m128 is returned in xmm0. */
8135 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8136 || INTEGRAL_TYPE_P (type)
8137 || VECTOR_FLOAT_TYPE_P (type))
8138 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8139 && !COMPLEX_MODE_P (mode)
8140 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8141 return false;
8142
8143 /* Otherwise, the size must be exactly in [1248]. */
8144 return size != 1 && size != 2 && size != 4 && size != 8;
8145 }
8146 else
8147 {
8148 int needed_intregs, needed_sseregs;
8149
8150 return examine_argument (mode, type, 1,
8151 &needed_intregs, &needed_sseregs);
8152 }
8153 }
8154 else
8155 {
8156 if (mode == BLKmode)
8157 return true;
8158
8159 size = int_size_in_bytes (type);
8160
8161 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8162 return false;
8163
8164 if (VECTOR_MODE_P (mode) || mode == TImode)
8165 {
8166 /* User-created vectors small enough to fit in EAX. */
8167 if (size < 8)
8168 return false;
8169
8170 /* Unless ABI prescibes otherwise,
8171 MMX/3dNow values are returned in MM0 if available. */
8172
8173 if (size == 8)
8174 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8175
8176 /* SSE values are returned in XMM0 if available. */
8177 if (size == 16)
8178 return !TARGET_SSE;
8179
8180 /* AVX values are returned in YMM0 if available. */
8181 if (size == 32)
8182 return !TARGET_AVX;
8183
8184 /* AVX512F values are returned in ZMM0 if available. */
8185 if (size == 64)
8186 return !TARGET_AVX512F;
8187 }
8188
8189 if (mode == XFmode)
8190 return false;
8191
8192 if (size > 12)
8193 return true;
8194
8195 /* OImode shouldn't be used directly. */
8196 gcc_assert (mode != OImode);
8197
8198 return false;
8199 }
8200 #endif
8201 }
8202
8203 \f
8204 /* Create the va_list data type. */
8205
8206 /* Returns the calling convention specific va_list date type.
8207 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8208
8209 static tree
8210 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8211 {
8212 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8213
8214 /* For i386 we use plain pointer to argument area. */
8215 if (!TARGET_64BIT || abi == MS_ABI)
8216 return build_pointer_type (char_type_node);
8217
8218 record = lang_hooks.types.make_type (RECORD_TYPE);
8219 type_decl = build_decl (BUILTINS_LOCATION,
8220 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8221
8222 f_gpr = build_decl (BUILTINS_LOCATION,
8223 FIELD_DECL, get_identifier ("gp_offset"),
8224 unsigned_type_node);
8225 f_fpr = build_decl (BUILTINS_LOCATION,
8226 FIELD_DECL, get_identifier ("fp_offset"),
8227 unsigned_type_node);
8228 f_ovf = build_decl (BUILTINS_LOCATION,
8229 FIELD_DECL, get_identifier ("overflow_arg_area"),
8230 ptr_type_node);
8231 f_sav = build_decl (BUILTINS_LOCATION,
8232 FIELD_DECL, get_identifier ("reg_save_area"),
8233 ptr_type_node);
8234
8235 va_list_gpr_counter_field = f_gpr;
8236 va_list_fpr_counter_field = f_fpr;
8237
8238 DECL_FIELD_CONTEXT (f_gpr) = record;
8239 DECL_FIELD_CONTEXT (f_fpr) = record;
8240 DECL_FIELD_CONTEXT (f_ovf) = record;
8241 DECL_FIELD_CONTEXT (f_sav) = record;
8242
8243 TYPE_STUB_DECL (record) = type_decl;
8244 TYPE_NAME (record) = type_decl;
8245 TYPE_FIELDS (record) = f_gpr;
8246 DECL_CHAIN (f_gpr) = f_fpr;
8247 DECL_CHAIN (f_fpr) = f_ovf;
8248 DECL_CHAIN (f_ovf) = f_sav;
8249
8250 layout_type (record);
8251
8252 /* The correct type is an array type of one element. */
8253 return build_array_type (record, build_index_type (size_zero_node));
8254 }
8255
8256 /* Setup the builtin va_list data type and for 64-bit the additional
8257 calling convention specific va_list data types. */
8258
8259 static tree
8260 ix86_build_builtin_va_list (void)
8261 {
8262 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8263
8264 /* Initialize abi specific va_list builtin types. */
8265 if (TARGET_64BIT)
8266 {
8267 tree t;
8268 if (ix86_abi == MS_ABI)
8269 {
8270 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8271 if (TREE_CODE (t) != RECORD_TYPE)
8272 t = build_variant_type_copy (t);
8273 sysv_va_list_type_node = t;
8274 }
8275 else
8276 {
8277 t = ret;
8278 if (TREE_CODE (t) != RECORD_TYPE)
8279 t = build_variant_type_copy (t);
8280 sysv_va_list_type_node = t;
8281 }
8282 if (ix86_abi != MS_ABI)
8283 {
8284 t = ix86_build_builtin_va_list_abi (MS_ABI);
8285 if (TREE_CODE (t) != RECORD_TYPE)
8286 t = build_variant_type_copy (t);
8287 ms_va_list_type_node = t;
8288 }
8289 else
8290 {
8291 t = ret;
8292 if (TREE_CODE (t) != RECORD_TYPE)
8293 t = build_variant_type_copy (t);
8294 ms_va_list_type_node = t;
8295 }
8296 }
8297
8298 return ret;
8299 }
8300
8301 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8302
8303 static void
8304 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8305 {
8306 rtx save_area, mem;
8307 alias_set_type set;
8308 int i, max;
8309
8310 /* GPR size of varargs save area. */
8311 if (cfun->va_list_gpr_size)
8312 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8313 else
8314 ix86_varargs_gpr_size = 0;
8315
8316 /* FPR size of varargs save area. We don't need it if we don't pass
8317 anything in SSE registers. */
8318 if (TARGET_SSE && cfun->va_list_fpr_size)
8319 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8320 else
8321 ix86_varargs_fpr_size = 0;
8322
8323 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8324 return;
8325
8326 save_area = frame_pointer_rtx;
8327 set = get_varargs_alias_set ();
8328
8329 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8330 if (max > X86_64_REGPARM_MAX)
8331 max = X86_64_REGPARM_MAX;
8332
8333 for (i = cum->regno; i < max; i++)
8334 {
8335 mem = gen_rtx_MEM (word_mode,
8336 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8337 MEM_NOTRAP_P (mem) = 1;
8338 set_mem_alias_set (mem, set);
8339 emit_move_insn (mem,
8340 gen_rtx_REG (word_mode,
8341 x86_64_int_parameter_registers[i]));
8342 }
8343
8344 if (ix86_varargs_fpr_size)
8345 {
8346 enum machine_mode smode;
8347 rtx_code_label *label;
8348 rtx test;
8349
8350 /* Now emit code to save SSE registers. The AX parameter contains number
8351 of SSE parameter registers used to call this function, though all we
8352 actually check here is the zero/non-zero status. */
8353
8354 label = gen_label_rtx ();
8355 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8356 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8357 label));
8358
8359 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8360 we used movdqa (i.e. TImode) instead? Perhaps even better would
8361 be if we could determine the real mode of the data, via a hook
8362 into pass_stdarg. Ignore all that for now. */
8363 smode = V4SFmode;
8364 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8365 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8366
8367 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8368 if (max > X86_64_SSE_REGPARM_MAX)
8369 max = X86_64_SSE_REGPARM_MAX;
8370
8371 for (i = cum->sse_regno; i < max; ++i)
8372 {
8373 mem = plus_constant (Pmode, save_area,
8374 i * 16 + ix86_varargs_gpr_size);
8375 mem = gen_rtx_MEM (smode, mem);
8376 MEM_NOTRAP_P (mem) = 1;
8377 set_mem_alias_set (mem, set);
8378 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8379
8380 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8381 }
8382
8383 emit_label (label);
8384 }
8385 }
8386
8387 static void
8388 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8389 {
8390 alias_set_type set = get_varargs_alias_set ();
8391 int i;
8392
8393 /* Reset to zero, as there might be a sysv vaarg used
8394 before. */
8395 ix86_varargs_gpr_size = 0;
8396 ix86_varargs_fpr_size = 0;
8397
8398 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8399 {
8400 rtx reg, mem;
8401
8402 mem = gen_rtx_MEM (Pmode,
8403 plus_constant (Pmode, virtual_incoming_args_rtx,
8404 i * UNITS_PER_WORD));
8405 MEM_NOTRAP_P (mem) = 1;
8406 set_mem_alias_set (mem, set);
8407
8408 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8409 emit_move_insn (mem, reg);
8410 }
8411 }
8412
8413 static void
8414 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8415 tree type, int *, int no_rtl)
8416 {
8417 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8418 CUMULATIVE_ARGS next_cum;
8419 tree fntype;
8420
8421 /* This argument doesn't appear to be used anymore. Which is good,
8422 because the old code here didn't suppress rtl generation. */
8423 gcc_assert (!no_rtl);
8424
8425 if (!TARGET_64BIT)
8426 return;
8427
8428 fntype = TREE_TYPE (current_function_decl);
8429
8430 /* For varargs, we do not want to skip the dummy va_dcl argument.
8431 For stdargs, we do want to skip the last named argument. */
8432 next_cum = *cum;
8433 if (stdarg_p (fntype))
8434 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8435 true);
8436
8437 if (cum->call_abi == MS_ABI)
8438 setup_incoming_varargs_ms_64 (&next_cum);
8439 else
8440 setup_incoming_varargs_64 (&next_cum);
8441 }
8442
8443 /* Checks if TYPE is of kind va_list char *. */
8444
8445 static bool
8446 is_va_list_char_pointer (tree type)
8447 {
8448 tree canonic;
8449
8450 /* For 32-bit it is always true. */
8451 if (!TARGET_64BIT)
8452 return true;
8453 canonic = ix86_canonical_va_list_type (type);
8454 return (canonic == ms_va_list_type_node
8455 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8456 }
8457
8458 /* Implement va_start. */
8459
8460 static void
8461 ix86_va_start (tree valist, rtx nextarg)
8462 {
8463 HOST_WIDE_INT words, n_gpr, n_fpr;
8464 tree f_gpr, f_fpr, f_ovf, f_sav;
8465 tree gpr, fpr, ovf, sav, t;
8466 tree type;
8467 rtx ovf_rtx;
8468
8469 if (flag_split_stack
8470 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8471 {
8472 unsigned int scratch_regno;
8473
8474 /* When we are splitting the stack, we can't refer to the stack
8475 arguments using internal_arg_pointer, because they may be on
8476 the old stack. The split stack prologue will arrange to
8477 leave a pointer to the old stack arguments in a scratch
8478 register, which we here copy to a pseudo-register. The split
8479 stack prologue can't set the pseudo-register directly because
8480 it (the prologue) runs before any registers have been saved. */
8481
8482 scratch_regno = split_stack_prologue_scratch_regno ();
8483 if (scratch_regno != INVALID_REGNUM)
8484 {
8485 rtx reg;
8486 rtx_insn *seq;
8487
8488 reg = gen_reg_rtx (Pmode);
8489 cfun->machine->split_stack_varargs_pointer = reg;
8490
8491 start_sequence ();
8492 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8493 seq = get_insns ();
8494 end_sequence ();
8495
8496 push_topmost_sequence ();
8497 emit_insn_after (seq, entry_of_function ());
8498 pop_topmost_sequence ();
8499 }
8500 }
8501
8502 /* Only 64bit target needs something special. */
8503 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8504 {
8505 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8506 std_expand_builtin_va_start (valist, nextarg);
8507 else
8508 {
8509 rtx va_r, next;
8510
8511 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8512 next = expand_binop (ptr_mode, add_optab,
8513 cfun->machine->split_stack_varargs_pointer,
8514 crtl->args.arg_offset_rtx,
8515 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8516 convert_move (va_r, next, 0);
8517 }
8518 return;
8519 }
8520
8521 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8522 f_fpr = DECL_CHAIN (f_gpr);
8523 f_ovf = DECL_CHAIN (f_fpr);
8524 f_sav = DECL_CHAIN (f_ovf);
8525
8526 valist = build_simple_mem_ref (valist);
8527 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8528 /* The following should be folded into the MEM_REF offset. */
8529 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8530 f_gpr, NULL_TREE);
8531 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8532 f_fpr, NULL_TREE);
8533 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8534 f_ovf, NULL_TREE);
8535 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8536 f_sav, NULL_TREE);
8537
8538 /* Count number of gp and fp argument registers used. */
8539 words = crtl->args.info.words;
8540 n_gpr = crtl->args.info.regno;
8541 n_fpr = crtl->args.info.sse_regno;
8542
8543 if (cfun->va_list_gpr_size)
8544 {
8545 type = TREE_TYPE (gpr);
8546 t = build2 (MODIFY_EXPR, type,
8547 gpr, build_int_cst (type, n_gpr * 8));
8548 TREE_SIDE_EFFECTS (t) = 1;
8549 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8550 }
8551
8552 if (TARGET_SSE && cfun->va_list_fpr_size)
8553 {
8554 type = TREE_TYPE (fpr);
8555 t = build2 (MODIFY_EXPR, type, fpr,
8556 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8557 TREE_SIDE_EFFECTS (t) = 1;
8558 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8559 }
8560
8561 /* Find the overflow area. */
8562 type = TREE_TYPE (ovf);
8563 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8564 ovf_rtx = crtl->args.internal_arg_pointer;
8565 else
8566 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8567 t = make_tree (type, ovf_rtx);
8568 if (words != 0)
8569 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8570 t = build2 (MODIFY_EXPR, type, ovf, t);
8571 TREE_SIDE_EFFECTS (t) = 1;
8572 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8573
8574 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8575 {
8576 /* Find the register save area.
8577 Prologue of the function save it right above stack frame. */
8578 type = TREE_TYPE (sav);
8579 t = make_tree (type, frame_pointer_rtx);
8580 if (!ix86_varargs_gpr_size)
8581 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8582 t = build2 (MODIFY_EXPR, type, sav, t);
8583 TREE_SIDE_EFFECTS (t) = 1;
8584 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8585 }
8586 }
8587
8588 /* Implement va_arg. */
8589
8590 static tree
8591 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8592 gimple_seq *post_p)
8593 {
8594 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8595 tree f_gpr, f_fpr, f_ovf, f_sav;
8596 tree gpr, fpr, ovf, sav, t;
8597 int size, rsize;
8598 tree lab_false, lab_over = NULL_TREE;
8599 tree addr, t2;
8600 rtx container;
8601 int indirect_p = 0;
8602 tree ptrtype;
8603 enum machine_mode nat_mode;
8604 unsigned int arg_boundary;
8605
8606 /* Only 64bit target needs something special. */
8607 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8608 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8609
8610 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8611 f_fpr = DECL_CHAIN (f_gpr);
8612 f_ovf = DECL_CHAIN (f_fpr);
8613 f_sav = DECL_CHAIN (f_ovf);
8614
8615 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8616 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8617 valist = build_va_arg_indirect_ref (valist);
8618 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8619 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8620 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8621
8622 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8623 if (indirect_p)
8624 type = build_pointer_type (type);
8625 size = int_size_in_bytes (type);
8626 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8627
8628 nat_mode = type_natural_mode (type, NULL, false);
8629 switch (nat_mode)
8630 {
8631 case V8SFmode:
8632 case V8SImode:
8633 case V32QImode:
8634 case V16HImode:
8635 case V4DFmode:
8636 case V4DImode:
8637 case V16SFmode:
8638 case V16SImode:
8639 case V64QImode:
8640 case V32HImode:
8641 case V8DFmode:
8642 case V8DImode:
8643 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8644 if (!TARGET_64BIT_MS_ABI)
8645 {
8646 container = NULL;
8647 break;
8648 }
8649
8650 default:
8651 container = construct_container (nat_mode, TYPE_MODE (type),
8652 type, 0, X86_64_REGPARM_MAX,
8653 X86_64_SSE_REGPARM_MAX, intreg,
8654 0);
8655 break;
8656 }
8657
8658 /* Pull the value out of the saved registers. */
8659
8660 addr = create_tmp_var (ptr_type_node, "addr");
8661
8662 if (container)
8663 {
8664 int needed_intregs, needed_sseregs;
8665 bool need_temp;
8666 tree int_addr, sse_addr;
8667
8668 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8669 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8670
8671 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8672
8673 need_temp = (!REG_P (container)
8674 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8675 || TYPE_ALIGN (type) > 128));
8676
8677 /* In case we are passing structure, verify that it is consecutive block
8678 on the register save area. If not we need to do moves. */
8679 if (!need_temp && !REG_P (container))
8680 {
8681 /* Verify that all registers are strictly consecutive */
8682 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8683 {
8684 int i;
8685
8686 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8687 {
8688 rtx slot = XVECEXP (container, 0, i);
8689 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8690 || INTVAL (XEXP (slot, 1)) != i * 16)
8691 need_temp = 1;
8692 }
8693 }
8694 else
8695 {
8696 int i;
8697
8698 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8699 {
8700 rtx slot = XVECEXP (container, 0, i);
8701 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8702 || INTVAL (XEXP (slot, 1)) != i * 8)
8703 need_temp = 1;
8704 }
8705 }
8706 }
8707 if (!need_temp)
8708 {
8709 int_addr = addr;
8710 sse_addr = addr;
8711 }
8712 else
8713 {
8714 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8715 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8716 }
8717
8718 /* First ensure that we fit completely in registers. */
8719 if (needed_intregs)
8720 {
8721 t = build_int_cst (TREE_TYPE (gpr),
8722 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8723 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8724 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8725 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8726 gimplify_and_add (t, pre_p);
8727 }
8728 if (needed_sseregs)
8729 {
8730 t = build_int_cst (TREE_TYPE (fpr),
8731 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8732 + X86_64_REGPARM_MAX * 8);
8733 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8734 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8735 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8736 gimplify_and_add (t, pre_p);
8737 }
8738
8739 /* Compute index to start of area used for integer regs. */
8740 if (needed_intregs)
8741 {
8742 /* int_addr = gpr + sav; */
8743 t = fold_build_pointer_plus (sav, gpr);
8744 gimplify_assign (int_addr, t, pre_p);
8745 }
8746 if (needed_sseregs)
8747 {
8748 /* sse_addr = fpr + sav; */
8749 t = fold_build_pointer_plus (sav, fpr);
8750 gimplify_assign (sse_addr, t, pre_p);
8751 }
8752 if (need_temp)
8753 {
8754 int i, prev_size = 0;
8755 tree temp = create_tmp_var (type, "va_arg_tmp");
8756
8757 /* addr = &temp; */
8758 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8759 gimplify_assign (addr, t, pre_p);
8760
8761 for (i = 0; i < XVECLEN (container, 0); i++)
8762 {
8763 rtx slot = XVECEXP (container, 0, i);
8764 rtx reg = XEXP (slot, 0);
8765 enum machine_mode mode = GET_MODE (reg);
8766 tree piece_type;
8767 tree addr_type;
8768 tree daddr_type;
8769 tree src_addr, src;
8770 int src_offset;
8771 tree dest_addr, dest;
8772 int cur_size = GET_MODE_SIZE (mode);
8773
8774 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8775 prev_size = INTVAL (XEXP (slot, 1));
8776 if (prev_size + cur_size > size)
8777 {
8778 cur_size = size - prev_size;
8779 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8780 if (mode == BLKmode)
8781 mode = QImode;
8782 }
8783 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8784 if (mode == GET_MODE (reg))
8785 addr_type = build_pointer_type (piece_type);
8786 else
8787 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8788 true);
8789 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8790 true);
8791
8792 if (SSE_REGNO_P (REGNO (reg)))
8793 {
8794 src_addr = sse_addr;
8795 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8796 }
8797 else
8798 {
8799 src_addr = int_addr;
8800 src_offset = REGNO (reg) * 8;
8801 }
8802 src_addr = fold_convert (addr_type, src_addr);
8803 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8804
8805 dest_addr = fold_convert (daddr_type, addr);
8806 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8807 if (cur_size == GET_MODE_SIZE (mode))
8808 {
8809 src = build_va_arg_indirect_ref (src_addr);
8810 dest = build_va_arg_indirect_ref (dest_addr);
8811
8812 gimplify_assign (dest, src, pre_p);
8813 }
8814 else
8815 {
8816 tree copy
8817 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8818 3, dest_addr, src_addr,
8819 size_int (cur_size));
8820 gimplify_and_add (copy, pre_p);
8821 }
8822 prev_size += cur_size;
8823 }
8824 }
8825
8826 if (needed_intregs)
8827 {
8828 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8829 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8830 gimplify_assign (gpr, t, pre_p);
8831 }
8832
8833 if (needed_sseregs)
8834 {
8835 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8836 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8837 gimplify_assign (fpr, t, pre_p);
8838 }
8839
8840 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8841
8842 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8843 }
8844
8845 /* ... otherwise out of the overflow area. */
8846
8847 /* When we align parameter on stack for caller, if the parameter
8848 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8849 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8850 here with caller. */
8851 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8852 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8853 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8854
8855 /* Care for on-stack alignment if needed. */
8856 if (arg_boundary <= 64 || size == 0)
8857 t = ovf;
8858 else
8859 {
8860 HOST_WIDE_INT align = arg_boundary / 8;
8861 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8862 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8863 build_int_cst (TREE_TYPE (t), -align));
8864 }
8865
8866 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8867 gimplify_assign (addr, t, pre_p);
8868
8869 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8870 gimplify_assign (unshare_expr (ovf), t, pre_p);
8871
8872 if (container)
8873 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8874
8875 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8876 addr = fold_convert (ptrtype, addr);
8877
8878 if (indirect_p)
8879 addr = build_va_arg_indirect_ref (addr);
8880 return build_va_arg_indirect_ref (addr);
8881 }
8882 \f
8883 /* Return true if OPNUM's MEM should be matched
8884 in movabs* patterns. */
8885
8886 bool
8887 ix86_check_movabs (rtx insn, int opnum)
8888 {
8889 rtx set, mem;
8890
8891 set = PATTERN (insn);
8892 if (GET_CODE (set) == PARALLEL)
8893 set = XVECEXP (set, 0, 0);
8894 gcc_assert (GET_CODE (set) == SET);
8895 mem = XEXP (set, opnum);
8896 while (GET_CODE (mem) == SUBREG)
8897 mem = SUBREG_REG (mem);
8898 gcc_assert (MEM_P (mem));
8899 return volatile_ok || !MEM_VOLATILE_P (mem);
8900 }
8901 \f
8902 /* Initialize the table of extra 80387 mathematical constants. */
8903
8904 static void
8905 init_ext_80387_constants (void)
8906 {
8907 static const char * cst[5] =
8908 {
8909 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8910 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8911 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8912 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8913 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8914 };
8915 int i;
8916
8917 for (i = 0; i < 5; i++)
8918 {
8919 real_from_string (&ext_80387_constants_table[i], cst[i]);
8920 /* Ensure each constant is rounded to XFmode precision. */
8921 real_convert (&ext_80387_constants_table[i],
8922 XFmode, &ext_80387_constants_table[i]);
8923 }
8924
8925 ext_80387_constants_init = 1;
8926 }
8927
8928 /* Return non-zero if the constant is something that
8929 can be loaded with a special instruction. */
8930
8931 int
8932 standard_80387_constant_p (rtx x)
8933 {
8934 enum machine_mode mode = GET_MODE (x);
8935
8936 REAL_VALUE_TYPE r;
8937
8938 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8939 return -1;
8940
8941 if (x == CONST0_RTX (mode))
8942 return 1;
8943 if (x == CONST1_RTX (mode))
8944 return 2;
8945
8946 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8947
8948 /* For XFmode constants, try to find a special 80387 instruction when
8949 optimizing for size or on those CPUs that benefit from them. */
8950 if (mode == XFmode
8951 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8952 {
8953 int i;
8954
8955 if (! ext_80387_constants_init)
8956 init_ext_80387_constants ();
8957
8958 for (i = 0; i < 5; i++)
8959 if (real_identical (&r, &ext_80387_constants_table[i]))
8960 return i + 3;
8961 }
8962
8963 /* Load of the constant -0.0 or -1.0 will be split as
8964 fldz;fchs or fld1;fchs sequence. */
8965 if (real_isnegzero (&r))
8966 return 8;
8967 if (real_identical (&r, &dconstm1))
8968 return 9;
8969
8970 return 0;
8971 }
8972
8973 /* Return the opcode of the special instruction to be used to load
8974 the constant X. */
8975
8976 const char *
8977 standard_80387_constant_opcode (rtx x)
8978 {
8979 switch (standard_80387_constant_p (x))
8980 {
8981 case 1:
8982 return "fldz";
8983 case 2:
8984 return "fld1";
8985 case 3:
8986 return "fldlg2";
8987 case 4:
8988 return "fldln2";
8989 case 5:
8990 return "fldl2e";
8991 case 6:
8992 return "fldl2t";
8993 case 7:
8994 return "fldpi";
8995 case 8:
8996 case 9:
8997 return "#";
8998 default:
8999 gcc_unreachable ();
9000 }
9001 }
9002
9003 /* Return the CONST_DOUBLE representing the 80387 constant that is
9004 loaded by the specified special instruction. The argument IDX
9005 matches the return value from standard_80387_constant_p. */
9006
9007 rtx
9008 standard_80387_constant_rtx (int idx)
9009 {
9010 int i;
9011
9012 if (! ext_80387_constants_init)
9013 init_ext_80387_constants ();
9014
9015 switch (idx)
9016 {
9017 case 3:
9018 case 4:
9019 case 5:
9020 case 6:
9021 case 7:
9022 i = idx - 3;
9023 break;
9024
9025 default:
9026 gcc_unreachable ();
9027 }
9028
9029 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9030 XFmode);
9031 }
9032
9033 /* Return 1 if X is all 0s and 2 if x is all 1s
9034 in supported SSE/AVX vector mode. */
9035
9036 int
9037 standard_sse_constant_p (rtx x)
9038 {
9039 enum machine_mode mode = GET_MODE (x);
9040
9041 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9042 return 1;
9043 if (vector_all_ones_operand (x, mode))
9044 switch (mode)
9045 {
9046 case V16QImode:
9047 case V8HImode:
9048 case V4SImode:
9049 case V2DImode:
9050 if (TARGET_SSE2)
9051 return 2;
9052 case V32QImode:
9053 case V16HImode:
9054 case V8SImode:
9055 case V4DImode:
9056 if (TARGET_AVX2)
9057 return 2;
9058 case V64QImode:
9059 case V32HImode:
9060 case V16SImode:
9061 case V8DImode:
9062 if (TARGET_AVX512F)
9063 return 2;
9064 default:
9065 break;
9066 }
9067
9068 return 0;
9069 }
9070
9071 /* Return the opcode of the special instruction to be used to load
9072 the constant X. */
9073
9074 const char *
9075 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9076 {
9077 switch (standard_sse_constant_p (x))
9078 {
9079 case 1:
9080 switch (get_attr_mode (insn))
9081 {
9082 case MODE_XI:
9083 return "vpxord\t%g0, %g0, %g0";
9084 case MODE_V16SF:
9085 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9086 : "vpxord\t%g0, %g0, %g0";
9087 case MODE_V8DF:
9088 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9089 : "vpxorq\t%g0, %g0, %g0";
9090 case MODE_TI:
9091 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9092 : "%vpxor\t%0, %d0";
9093 case MODE_V2DF:
9094 return "%vxorpd\t%0, %d0";
9095 case MODE_V4SF:
9096 return "%vxorps\t%0, %d0";
9097
9098 case MODE_OI:
9099 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9100 : "vpxor\t%x0, %x0, %x0";
9101 case MODE_V4DF:
9102 return "vxorpd\t%x0, %x0, %x0";
9103 case MODE_V8SF:
9104 return "vxorps\t%x0, %x0, %x0";
9105
9106 default:
9107 break;
9108 }
9109
9110 case 2:
9111 if (TARGET_AVX512VL
9112 || get_attr_mode (insn) == MODE_XI
9113 || get_attr_mode (insn) == MODE_V8DF
9114 || get_attr_mode (insn) == MODE_V16SF)
9115 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9116 if (TARGET_AVX)
9117 return "vpcmpeqd\t%0, %0, %0";
9118 else
9119 return "pcmpeqd\t%0, %0";
9120
9121 default:
9122 break;
9123 }
9124 gcc_unreachable ();
9125 }
9126
9127 /* Returns true if OP contains a symbol reference */
9128
9129 bool
9130 symbolic_reference_mentioned_p (rtx op)
9131 {
9132 const char *fmt;
9133 int i;
9134
9135 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9136 return true;
9137
9138 fmt = GET_RTX_FORMAT (GET_CODE (op));
9139 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9140 {
9141 if (fmt[i] == 'E')
9142 {
9143 int j;
9144
9145 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9146 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9147 return true;
9148 }
9149
9150 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9151 return true;
9152 }
9153
9154 return false;
9155 }
9156
9157 /* Return true if it is appropriate to emit `ret' instructions in the
9158 body of a function. Do this only if the epilogue is simple, needing a
9159 couple of insns. Prior to reloading, we can't tell how many registers
9160 must be saved, so return false then. Return false if there is no frame
9161 marker to de-allocate. */
9162
9163 bool
9164 ix86_can_use_return_insn_p (void)
9165 {
9166 struct ix86_frame frame;
9167
9168 if (! reload_completed || frame_pointer_needed)
9169 return 0;
9170
9171 /* Don't allow more than 32k pop, since that's all we can do
9172 with one instruction. */
9173 if (crtl->args.pops_args && crtl->args.size >= 32768)
9174 return 0;
9175
9176 ix86_compute_frame_layout (&frame);
9177 return (frame.stack_pointer_offset == UNITS_PER_WORD
9178 && (frame.nregs + frame.nsseregs) == 0);
9179 }
9180 \f
9181 /* Value should be nonzero if functions must have frame pointers.
9182 Zero means the frame pointer need not be set up (and parms may
9183 be accessed via the stack pointer) in functions that seem suitable. */
9184
9185 static bool
9186 ix86_frame_pointer_required (void)
9187 {
9188 /* If we accessed previous frames, then the generated code expects
9189 to be able to access the saved ebp value in our frame. */
9190 if (cfun->machine->accesses_prev_frame)
9191 return true;
9192
9193 /* Several x86 os'es need a frame pointer for other reasons,
9194 usually pertaining to setjmp. */
9195 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9196 return true;
9197
9198 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9199 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9200 return true;
9201
9202 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9203 allocation is 4GB. */
9204 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9205 return true;
9206
9207 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9208 turns off the frame pointer by default. Turn it back on now if
9209 we've not got a leaf function. */
9210 if (TARGET_OMIT_LEAF_FRAME_POINTER
9211 && (!crtl->is_leaf
9212 || ix86_current_function_calls_tls_descriptor))
9213 return true;
9214
9215 if (crtl->profile && !flag_fentry)
9216 return true;
9217
9218 return false;
9219 }
9220
9221 /* Record that the current function accesses previous call frames. */
9222
9223 void
9224 ix86_setup_frame_addresses (void)
9225 {
9226 cfun->machine->accesses_prev_frame = 1;
9227 }
9228 \f
9229 #ifndef USE_HIDDEN_LINKONCE
9230 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9231 # define USE_HIDDEN_LINKONCE 1
9232 # else
9233 # define USE_HIDDEN_LINKONCE 0
9234 # endif
9235 #endif
9236
9237 static int pic_labels_used;
9238
9239 /* Fills in the label name that should be used for a pc thunk for
9240 the given register. */
9241
9242 static void
9243 get_pc_thunk_name (char name[32], unsigned int regno)
9244 {
9245 gcc_assert (!TARGET_64BIT);
9246
9247 if (USE_HIDDEN_LINKONCE)
9248 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9249 else
9250 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9251 }
9252
9253
9254 /* This function generates code for -fpic that loads %ebx with
9255 the return address of the caller and then returns. */
9256
9257 static void
9258 ix86_code_end (void)
9259 {
9260 rtx xops[2];
9261 int regno;
9262
9263 for (regno = AX_REG; regno <= SP_REG; regno++)
9264 {
9265 char name[32];
9266 tree decl;
9267
9268 if (!(pic_labels_used & (1 << regno)))
9269 continue;
9270
9271 get_pc_thunk_name (name, regno);
9272
9273 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9274 get_identifier (name),
9275 build_function_type_list (void_type_node, NULL_TREE));
9276 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9277 NULL_TREE, void_type_node);
9278 TREE_PUBLIC (decl) = 1;
9279 TREE_STATIC (decl) = 1;
9280 DECL_IGNORED_P (decl) = 1;
9281
9282 #if TARGET_MACHO
9283 if (TARGET_MACHO)
9284 {
9285 switch_to_section (darwin_sections[text_coal_section]);
9286 fputs ("\t.weak_definition\t", asm_out_file);
9287 assemble_name (asm_out_file, name);
9288 fputs ("\n\t.private_extern\t", asm_out_file);
9289 assemble_name (asm_out_file, name);
9290 putc ('\n', asm_out_file);
9291 ASM_OUTPUT_LABEL (asm_out_file, name);
9292 DECL_WEAK (decl) = 1;
9293 }
9294 else
9295 #endif
9296 if (USE_HIDDEN_LINKONCE)
9297 {
9298 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9299
9300 targetm.asm_out.unique_section (decl, 0);
9301 switch_to_section (get_named_section (decl, NULL, 0));
9302
9303 targetm.asm_out.globalize_label (asm_out_file, name);
9304 fputs ("\t.hidden\t", asm_out_file);
9305 assemble_name (asm_out_file, name);
9306 putc ('\n', asm_out_file);
9307 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9308 }
9309 else
9310 {
9311 switch_to_section (text_section);
9312 ASM_OUTPUT_LABEL (asm_out_file, name);
9313 }
9314
9315 DECL_INITIAL (decl) = make_node (BLOCK);
9316 current_function_decl = decl;
9317 init_function_start (decl);
9318 first_function_block_is_cold = false;
9319 /* Make sure unwind info is emitted for the thunk if needed. */
9320 final_start_function (emit_barrier (), asm_out_file, 1);
9321
9322 /* Pad stack IP move with 4 instructions (two NOPs count
9323 as one instruction). */
9324 if (TARGET_PAD_SHORT_FUNCTION)
9325 {
9326 int i = 8;
9327
9328 while (i--)
9329 fputs ("\tnop\n", asm_out_file);
9330 }
9331
9332 xops[0] = gen_rtx_REG (Pmode, regno);
9333 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9334 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9335 fputs ("\tret\n", asm_out_file);
9336 final_end_function ();
9337 init_insn_lengths ();
9338 free_after_compilation (cfun);
9339 set_cfun (NULL);
9340 current_function_decl = NULL;
9341 }
9342
9343 if (flag_split_stack)
9344 file_end_indicate_split_stack ();
9345 }
9346
9347 /* Emit code for the SET_GOT patterns. */
9348
9349 const char *
9350 output_set_got (rtx dest, rtx label)
9351 {
9352 rtx xops[3];
9353
9354 xops[0] = dest;
9355
9356 if (TARGET_VXWORKS_RTP && flag_pic)
9357 {
9358 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9359 xops[2] = gen_rtx_MEM (Pmode,
9360 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9361 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9362
9363 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9364 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9365 an unadorned address. */
9366 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9367 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9368 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9369 return "";
9370 }
9371
9372 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9373
9374 if (!flag_pic)
9375 {
9376 if (TARGET_MACHO)
9377 /* We don't need a pic base, we're not producing pic. */
9378 gcc_unreachable ();
9379
9380 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9381 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9382 targetm.asm_out.internal_label (asm_out_file, "L",
9383 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9384 }
9385 else
9386 {
9387 char name[32];
9388 get_pc_thunk_name (name, REGNO (dest));
9389 pic_labels_used |= 1 << REGNO (dest);
9390
9391 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9392 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9393 output_asm_insn ("call\t%X2", xops);
9394
9395 #if TARGET_MACHO
9396 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9397 This is what will be referenced by the Mach-O PIC subsystem. */
9398 if (machopic_should_output_picbase_label () || !label)
9399 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9400
9401 /* When we are restoring the pic base at the site of a nonlocal label,
9402 and we decided to emit the pic base above, we will still output a
9403 local label used for calculating the correction offset (even though
9404 the offset will be 0 in that case). */
9405 if (label)
9406 targetm.asm_out.internal_label (asm_out_file, "L",
9407 CODE_LABEL_NUMBER (label));
9408 #endif
9409 }
9410
9411 if (!TARGET_MACHO)
9412 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9413
9414 return "";
9415 }
9416
9417 /* Generate an "push" pattern for input ARG. */
9418
9419 static rtx
9420 gen_push (rtx arg)
9421 {
9422 struct machine_function *m = cfun->machine;
9423
9424 if (m->fs.cfa_reg == stack_pointer_rtx)
9425 m->fs.cfa_offset += UNITS_PER_WORD;
9426 m->fs.sp_offset += UNITS_PER_WORD;
9427
9428 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9429 arg = gen_rtx_REG (word_mode, REGNO (arg));
9430
9431 return gen_rtx_SET (VOIDmode,
9432 gen_rtx_MEM (word_mode,
9433 gen_rtx_PRE_DEC (Pmode,
9434 stack_pointer_rtx)),
9435 arg);
9436 }
9437
9438 /* Generate an "pop" pattern for input ARG. */
9439
9440 static rtx
9441 gen_pop (rtx arg)
9442 {
9443 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9444 arg = gen_rtx_REG (word_mode, REGNO (arg));
9445
9446 return gen_rtx_SET (VOIDmode,
9447 arg,
9448 gen_rtx_MEM (word_mode,
9449 gen_rtx_POST_INC (Pmode,
9450 stack_pointer_rtx)));
9451 }
9452
9453 /* Return >= 0 if there is an unused call-clobbered register available
9454 for the entire function. */
9455
9456 static unsigned int
9457 ix86_select_alt_pic_regnum (void)
9458 {
9459 if (ix86_use_pseudo_pic_reg ())
9460 return INVALID_REGNUM;
9461
9462 if (crtl->is_leaf
9463 && !crtl->profile
9464 && !ix86_current_function_calls_tls_descriptor)
9465 {
9466 int i, drap;
9467 /* Can't use the same register for both PIC and DRAP. */
9468 if (crtl->drap_reg)
9469 drap = REGNO (crtl->drap_reg);
9470 else
9471 drap = -1;
9472 for (i = 2; i >= 0; --i)
9473 if (i != drap && !df_regs_ever_live_p (i))
9474 return i;
9475 }
9476
9477 return INVALID_REGNUM;
9478 }
9479
9480 /* Return TRUE if we need to save REGNO. */
9481
9482 static bool
9483 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9484 {
9485 if (pic_offset_table_rtx
9486 && !ix86_use_pseudo_pic_reg ()
9487 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9488 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9489 || crtl->profile
9490 || crtl->calls_eh_return
9491 || crtl->uses_const_pool
9492 || cfun->has_nonlocal_label))
9493 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9494
9495 if (crtl->calls_eh_return && maybe_eh_return)
9496 {
9497 unsigned i;
9498 for (i = 0; ; i++)
9499 {
9500 unsigned test = EH_RETURN_DATA_REGNO (i);
9501 if (test == INVALID_REGNUM)
9502 break;
9503 if (test == regno)
9504 return true;
9505 }
9506 }
9507
9508 if (crtl->drap_reg
9509 && regno == REGNO (crtl->drap_reg)
9510 && !cfun->machine->no_drap_save_restore)
9511 return true;
9512
9513 return (df_regs_ever_live_p (regno)
9514 && !call_used_regs[regno]
9515 && !fixed_regs[regno]
9516 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9517 }
9518
9519 /* Return number of saved general prupose registers. */
9520
9521 static int
9522 ix86_nsaved_regs (void)
9523 {
9524 int nregs = 0;
9525 int regno;
9526
9527 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9528 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9529 nregs ++;
9530 return nregs;
9531 }
9532
9533 /* Return number of saved SSE registrers. */
9534
9535 static int
9536 ix86_nsaved_sseregs (void)
9537 {
9538 int nregs = 0;
9539 int regno;
9540
9541 if (!TARGET_64BIT_MS_ABI)
9542 return 0;
9543 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9544 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9545 nregs ++;
9546 return nregs;
9547 }
9548
9549 /* Given FROM and TO register numbers, say whether this elimination is
9550 allowed. If stack alignment is needed, we can only replace argument
9551 pointer with hard frame pointer, or replace frame pointer with stack
9552 pointer. Otherwise, frame pointer elimination is automatically
9553 handled and all other eliminations are valid. */
9554
9555 static bool
9556 ix86_can_eliminate (const int from, const int to)
9557 {
9558 if (stack_realign_fp)
9559 return ((from == ARG_POINTER_REGNUM
9560 && to == HARD_FRAME_POINTER_REGNUM)
9561 || (from == FRAME_POINTER_REGNUM
9562 && to == STACK_POINTER_REGNUM));
9563 else
9564 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9565 }
9566
9567 /* Return the offset between two registers, one to be eliminated, and the other
9568 its replacement, at the start of a routine. */
9569
9570 HOST_WIDE_INT
9571 ix86_initial_elimination_offset (int from, int to)
9572 {
9573 struct ix86_frame frame;
9574 ix86_compute_frame_layout (&frame);
9575
9576 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9577 return frame.hard_frame_pointer_offset;
9578 else if (from == FRAME_POINTER_REGNUM
9579 && to == HARD_FRAME_POINTER_REGNUM)
9580 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9581 else
9582 {
9583 gcc_assert (to == STACK_POINTER_REGNUM);
9584
9585 if (from == ARG_POINTER_REGNUM)
9586 return frame.stack_pointer_offset;
9587
9588 gcc_assert (from == FRAME_POINTER_REGNUM);
9589 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9590 }
9591 }
9592
9593 /* In a dynamically-aligned function, we can't know the offset from
9594 stack pointer to frame pointer, so we must ensure that setjmp
9595 eliminates fp against the hard fp (%ebp) rather than trying to
9596 index from %esp up to the top of the frame across a gap that is
9597 of unknown (at compile-time) size. */
9598 static rtx
9599 ix86_builtin_setjmp_frame_value (void)
9600 {
9601 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9602 }
9603
9604 /* When using -fsplit-stack, the allocation routines set a field in
9605 the TCB to the bottom of the stack plus this much space, measured
9606 in bytes. */
9607
9608 #define SPLIT_STACK_AVAILABLE 256
9609
9610 /* Fill structure ix86_frame about frame of currently computed function. */
9611
9612 static void
9613 ix86_compute_frame_layout (struct ix86_frame *frame)
9614 {
9615 unsigned HOST_WIDE_INT stack_alignment_needed;
9616 HOST_WIDE_INT offset;
9617 unsigned HOST_WIDE_INT preferred_alignment;
9618 HOST_WIDE_INT size = get_frame_size ();
9619 HOST_WIDE_INT to_allocate;
9620
9621 frame->nregs = ix86_nsaved_regs ();
9622 frame->nsseregs = ix86_nsaved_sseregs ();
9623
9624 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9625 function prologues and leaf. */
9626 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9627 && (!crtl->is_leaf || cfun->calls_alloca != 0
9628 || ix86_current_function_calls_tls_descriptor))
9629 {
9630 crtl->preferred_stack_boundary = 128;
9631 crtl->stack_alignment_needed = 128;
9632 }
9633 /* preferred_stack_boundary is never updated for call
9634 expanded from tls descriptor. Update it here. We don't update it in
9635 expand stage because according to the comments before
9636 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9637 away. */
9638 else if (ix86_current_function_calls_tls_descriptor
9639 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9640 {
9641 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9642 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9643 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9644 }
9645
9646 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9647 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9648
9649 gcc_assert (!size || stack_alignment_needed);
9650 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9651 gcc_assert (preferred_alignment <= stack_alignment_needed);
9652
9653 /* For SEH we have to limit the amount of code movement into the prologue.
9654 At present we do this via a BLOCKAGE, at which point there's very little
9655 scheduling that can be done, which means that there's very little point
9656 in doing anything except PUSHs. */
9657 if (TARGET_SEH)
9658 cfun->machine->use_fast_prologue_epilogue = false;
9659
9660 /* During reload iteration the amount of registers saved can change.
9661 Recompute the value as needed. Do not recompute when amount of registers
9662 didn't change as reload does multiple calls to the function and does not
9663 expect the decision to change within single iteration. */
9664 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9665 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9666 {
9667 int count = frame->nregs;
9668 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9669
9670 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9671
9672 /* The fast prologue uses move instead of push to save registers. This
9673 is significantly longer, but also executes faster as modern hardware
9674 can execute the moves in parallel, but can't do that for push/pop.
9675
9676 Be careful about choosing what prologue to emit: When function takes
9677 many instructions to execute we may use slow version as well as in
9678 case function is known to be outside hot spot (this is known with
9679 feedback only). Weight the size of function by number of registers
9680 to save as it is cheap to use one or two push instructions but very
9681 slow to use many of them. */
9682 if (count)
9683 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9684 if (node->frequency < NODE_FREQUENCY_NORMAL
9685 || (flag_branch_probabilities
9686 && node->frequency < NODE_FREQUENCY_HOT))
9687 cfun->machine->use_fast_prologue_epilogue = false;
9688 else
9689 cfun->machine->use_fast_prologue_epilogue
9690 = !expensive_function_p (count);
9691 }
9692
9693 frame->save_regs_using_mov
9694 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9695 /* If static stack checking is enabled and done with probes,
9696 the registers need to be saved before allocating the frame. */
9697 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9698
9699 /* Skip return address. */
9700 offset = UNITS_PER_WORD;
9701
9702 /* Skip pushed static chain. */
9703 if (ix86_static_chain_on_stack)
9704 offset += UNITS_PER_WORD;
9705
9706 /* Skip saved base pointer. */
9707 if (frame_pointer_needed)
9708 offset += UNITS_PER_WORD;
9709 frame->hfp_save_offset = offset;
9710
9711 /* The traditional frame pointer location is at the top of the frame. */
9712 frame->hard_frame_pointer_offset = offset;
9713
9714 /* Register save area */
9715 offset += frame->nregs * UNITS_PER_WORD;
9716 frame->reg_save_offset = offset;
9717
9718 /* On SEH target, registers are pushed just before the frame pointer
9719 location. */
9720 if (TARGET_SEH)
9721 frame->hard_frame_pointer_offset = offset;
9722
9723 /* Align and set SSE register save area. */
9724 if (frame->nsseregs)
9725 {
9726 /* The only ABI that has saved SSE registers (Win64) also has a
9727 16-byte aligned default stack, and thus we don't need to be
9728 within the re-aligned local stack frame to save them. */
9729 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9730 offset = (offset + 16 - 1) & -16;
9731 offset += frame->nsseregs * 16;
9732 }
9733 frame->sse_reg_save_offset = offset;
9734
9735 /* The re-aligned stack starts here. Values before this point are not
9736 directly comparable with values below this point. In order to make
9737 sure that no value happens to be the same before and after, force
9738 the alignment computation below to add a non-zero value. */
9739 if (stack_realign_fp)
9740 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9741
9742 /* Va-arg area */
9743 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9744 offset += frame->va_arg_size;
9745
9746 /* Align start of frame for local function. */
9747 if (stack_realign_fp
9748 || offset != frame->sse_reg_save_offset
9749 || size != 0
9750 || !crtl->is_leaf
9751 || cfun->calls_alloca
9752 || ix86_current_function_calls_tls_descriptor)
9753 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9754
9755 /* Frame pointer points here. */
9756 frame->frame_pointer_offset = offset;
9757
9758 offset += size;
9759
9760 /* Add outgoing arguments area. Can be skipped if we eliminated
9761 all the function calls as dead code.
9762 Skipping is however impossible when function calls alloca. Alloca
9763 expander assumes that last crtl->outgoing_args_size
9764 of stack frame are unused. */
9765 if (ACCUMULATE_OUTGOING_ARGS
9766 && (!crtl->is_leaf || cfun->calls_alloca
9767 || ix86_current_function_calls_tls_descriptor))
9768 {
9769 offset += crtl->outgoing_args_size;
9770 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9771 }
9772 else
9773 frame->outgoing_arguments_size = 0;
9774
9775 /* Align stack boundary. Only needed if we're calling another function
9776 or using alloca. */
9777 if (!crtl->is_leaf || cfun->calls_alloca
9778 || ix86_current_function_calls_tls_descriptor)
9779 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9780
9781 /* We've reached end of stack frame. */
9782 frame->stack_pointer_offset = offset;
9783
9784 /* Size prologue needs to allocate. */
9785 to_allocate = offset - frame->sse_reg_save_offset;
9786
9787 if ((!to_allocate && frame->nregs <= 1)
9788 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9789 frame->save_regs_using_mov = false;
9790
9791 if (ix86_using_red_zone ()
9792 && crtl->sp_is_unchanging
9793 && crtl->is_leaf
9794 && !ix86_current_function_calls_tls_descriptor)
9795 {
9796 frame->red_zone_size = to_allocate;
9797 if (frame->save_regs_using_mov)
9798 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9799 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9800 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9801 }
9802 else
9803 frame->red_zone_size = 0;
9804 frame->stack_pointer_offset -= frame->red_zone_size;
9805
9806 /* The SEH frame pointer location is near the bottom of the frame.
9807 This is enforced by the fact that the difference between the
9808 stack pointer and the frame pointer is limited to 240 bytes in
9809 the unwind data structure. */
9810 if (TARGET_SEH)
9811 {
9812 HOST_WIDE_INT diff;
9813
9814 /* If we can leave the frame pointer where it is, do so. Also, returns
9815 the establisher frame for __builtin_frame_address (0). */
9816 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9817 if (diff <= SEH_MAX_FRAME_SIZE
9818 && (diff > 240 || (diff & 15) != 0)
9819 && !crtl->accesses_prior_frames)
9820 {
9821 /* Ideally we'd determine what portion of the local stack frame
9822 (within the constraint of the lowest 240) is most heavily used.
9823 But without that complication, simply bias the frame pointer
9824 by 128 bytes so as to maximize the amount of the local stack
9825 frame that is addressable with 8-bit offsets. */
9826 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9827 }
9828 }
9829 }
9830
9831 /* This is semi-inlined memory_address_length, but simplified
9832 since we know that we're always dealing with reg+offset, and
9833 to avoid having to create and discard all that rtl. */
9834
9835 static inline int
9836 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9837 {
9838 int len = 4;
9839
9840 if (offset == 0)
9841 {
9842 /* EBP and R13 cannot be encoded without an offset. */
9843 len = (regno == BP_REG || regno == R13_REG);
9844 }
9845 else if (IN_RANGE (offset, -128, 127))
9846 len = 1;
9847
9848 /* ESP and R12 must be encoded with a SIB byte. */
9849 if (regno == SP_REG || regno == R12_REG)
9850 len++;
9851
9852 return len;
9853 }
9854
9855 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9856 The valid base registers are taken from CFUN->MACHINE->FS. */
9857
9858 static rtx
9859 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9860 {
9861 const struct machine_function *m = cfun->machine;
9862 rtx base_reg = NULL;
9863 HOST_WIDE_INT base_offset = 0;
9864
9865 if (m->use_fast_prologue_epilogue)
9866 {
9867 /* Choose the base register most likely to allow the most scheduling
9868 opportunities. Generally FP is valid throughout the function,
9869 while DRAP must be reloaded within the epilogue. But choose either
9870 over the SP due to increased encoding size. */
9871
9872 if (m->fs.fp_valid)
9873 {
9874 base_reg = hard_frame_pointer_rtx;
9875 base_offset = m->fs.fp_offset - cfa_offset;
9876 }
9877 else if (m->fs.drap_valid)
9878 {
9879 base_reg = crtl->drap_reg;
9880 base_offset = 0 - cfa_offset;
9881 }
9882 else if (m->fs.sp_valid)
9883 {
9884 base_reg = stack_pointer_rtx;
9885 base_offset = m->fs.sp_offset - cfa_offset;
9886 }
9887 }
9888 else
9889 {
9890 HOST_WIDE_INT toffset;
9891 int len = 16, tlen;
9892
9893 /* Choose the base register with the smallest address encoding.
9894 With a tie, choose FP > DRAP > SP. */
9895 if (m->fs.sp_valid)
9896 {
9897 base_reg = stack_pointer_rtx;
9898 base_offset = m->fs.sp_offset - cfa_offset;
9899 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9900 }
9901 if (m->fs.drap_valid)
9902 {
9903 toffset = 0 - cfa_offset;
9904 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9905 if (tlen <= len)
9906 {
9907 base_reg = crtl->drap_reg;
9908 base_offset = toffset;
9909 len = tlen;
9910 }
9911 }
9912 if (m->fs.fp_valid)
9913 {
9914 toffset = m->fs.fp_offset - cfa_offset;
9915 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9916 if (tlen <= len)
9917 {
9918 base_reg = hard_frame_pointer_rtx;
9919 base_offset = toffset;
9920 len = tlen;
9921 }
9922 }
9923 }
9924 gcc_assert (base_reg != NULL);
9925
9926 return plus_constant (Pmode, base_reg, base_offset);
9927 }
9928
9929 /* Emit code to save registers in the prologue. */
9930
9931 static void
9932 ix86_emit_save_regs (void)
9933 {
9934 unsigned int regno;
9935 rtx insn;
9936
9937 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9938 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9939 {
9940 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9941 RTX_FRAME_RELATED_P (insn) = 1;
9942 }
9943 }
9944
9945 /* Emit a single register save at CFA - CFA_OFFSET. */
9946
9947 static void
9948 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9949 HOST_WIDE_INT cfa_offset)
9950 {
9951 struct machine_function *m = cfun->machine;
9952 rtx reg = gen_rtx_REG (mode, regno);
9953 rtx mem, addr, base, insn;
9954
9955 addr = choose_baseaddr (cfa_offset);
9956 mem = gen_frame_mem (mode, addr);
9957
9958 /* For SSE saves, we need to indicate the 128-bit alignment. */
9959 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9960
9961 insn = emit_move_insn (mem, reg);
9962 RTX_FRAME_RELATED_P (insn) = 1;
9963
9964 base = addr;
9965 if (GET_CODE (base) == PLUS)
9966 base = XEXP (base, 0);
9967 gcc_checking_assert (REG_P (base));
9968
9969 /* When saving registers into a re-aligned local stack frame, avoid
9970 any tricky guessing by dwarf2out. */
9971 if (m->fs.realigned)
9972 {
9973 gcc_checking_assert (stack_realign_drap);
9974
9975 if (regno == REGNO (crtl->drap_reg))
9976 {
9977 /* A bit of a hack. We force the DRAP register to be saved in
9978 the re-aligned stack frame, which provides us with a copy
9979 of the CFA that will last past the prologue. Install it. */
9980 gcc_checking_assert (cfun->machine->fs.fp_valid);
9981 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9982 cfun->machine->fs.fp_offset - cfa_offset);
9983 mem = gen_rtx_MEM (mode, addr);
9984 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9985 }
9986 else
9987 {
9988 /* The frame pointer is a stable reference within the
9989 aligned frame. Use it. */
9990 gcc_checking_assert (cfun->machine->fs.fp_valid);
9991 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9992 cfun->machine->fs.fp_offset - cfa_offset);
9993 mem = gen_rtx_MEM (mode, addr);
9994 add_reg_note (insn, REG_CFA_EXPRESSION,
9995 gen_rtx_SET (VOIDmode, mem, reg));
9996 }
9997 }
9998
9999 /* The memory may not be relative to the current CFA register,
10000 which means that we may need to generate a new pattern for
10001 use by the unwind info. */
10002 else if (base != m->fs.cfa_reg)
10003 {
10004 addr = plus_constant (Pmode, m->fs.cfa_reg,
10005 m->fs.cfa_offset - cfa_offset);
10006 mem = gen_rtx_MEM (mode, addr);
10007 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10008 }
10009 }
10010
10011 /* Emit code to save registers using MOV insns.
10012 First register is stored at CFA - CFA_OFFSET. */
10013 static void
10014 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10015 {
10016 unsigned int regno;
10017
10018 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10019 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10020 {
10021 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10022 cfa_offset -= UNITS_PER_WORD;
10023 }
10024 }
10025
10026 /* Emit code to save SSE registers using MOV insns.
10027 First register is stored at CFA - CFA_OFFSET. */
10028 static void
10029 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10030 {
10031 unsigned int regno;
10032
10033 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10034 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10035 {
10036 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10037 cfa_offset -= 16;
10038 }
10039 }
10040
10041 static GTY(()) rtx queued_cfa_restores;
10042
10043 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10044 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10045 Don't add the note if the previously saved value will be left untouched
10046 within stack red-zone till return, as unwinders can find the same value
10047 in the register and on the stack. */
10048
10049 static void
10050 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10051 {
10052 if (!crtl->shrink_wrapped
10053 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10054 return;
10055
10056 if (insn)
10057 {
10058 add_reg_note (insn, REG_CFA_RESTORE, reg);
10059 RTX_FRAME_RELATED_P (insn) = 1;
10060 }
10061 else
10062 queued_cfa_restores
10063 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10064 }
10065
10066 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10067
10068 static void
10069 ix86_add_queued_cfa_restore_notes (rtx insn)
10070 {
10071 rtx last;
10072 if (!queued_cfa_restores)
10073 return;
10074 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10075 ;
10076 XEXP (last, 1) = REG_NOTES (insn);
10077 REG_NOTES (insn) = queued_cfa_restores;
10078 queued_cfa_restores = NULL_RTX;
10079 RTX_FRAME_RELATED_P (insn) = 1;
10080 }
10081
10082 /* Expand prologue or epilogue stack adjustment.
10083 The pattern exist to put a dependency on all ebp-based memory accesses.
10084 STYLE should be negative if instructions should be marked as frame related,
10085 zero if %r11 register is live and cannot be freely used and positive
10086 otherwise. */
10087
10088 static void
10089 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10090 int style, bool set_cfa)
10091 {
10092 struct machine_function *m = cfun->machine;
10093 rtx insn;
10094 bool add_frame_related_expr = false;
10095
10096 if (Pmode == SImode)
10097 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10098 else if (x86_64_immediate_operand (offset, DImode))
10099 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10100 else
10101 {
10102 rtx tmp;
10103 /* r11 is used by indirect sibcall return as well, set before the
10104 epilogue and used after the epilogue. */
10105 if (style)
10106 tmp = gen_rtx_REG (DImode, R11_REG);
10107 else
10108 {
10109 gcc_assert (src != hard_frame_pointer_rtx
10110 && dest != hard_frame_pointer_rtx);
10111 tmp = hard_frame_pointer_rtx;
10112 }
10113 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10114 if (style < 0)
10115 add_frame_related_expr = true;
10116
10117 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10118 }
10119
10120 insn = emit_insn (insn);
10121 if (style >= 0)
10122 ix86_add_queued_cfa_restore_notes (insn);
10123
10124 if (set_cfa)
10125 {
10126 rtx r;
10127
10128 gcc_assert (m->fs.cfa_reg == src);
10129 m->fs.cfa_offset += INTVAL (offset);
10130 m->fs.cfa_reg = dest;
10131
10132 r = gen_rtx_PLUS (Pmode, src, offset);
10133 r = gen_rtx_SET (VOIDmode, dest, r);
10134 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10135 RTX_FRAME_RELATED_P (insn) = 1;
10136 }
10137 else if (style < 0)
10138 {
10139 RTX_FRAME_RELATED_P (insn) = 1;
10140 if (add_frame_related_expr)
10141 {
10142 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10143 r = gen_rtx_SET (VOIDmode, dest, r);
10144 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10145 }
10146 }
10147
10148 if (dest == stack_pointer_rtx)
10149 {
10150 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10151 bool valid = m->fs.sp_valid;
10152
10153 if (src == hard_frame_pointer_rtx)
10154 {
10155 valid = m->fs.fp_valid;
10156 ooffset = m->fs.fp_offset;
10157 }
10158 else if (src == crtl->drap_reg)
10159 {
10160 valid = m->fs.drap_valid;
10161 ooffset = 0;
10162 }
10163 else
10164 {
10165 /* Else there are two possibilities: SP itself, which we set
10166 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10167 taken care of this by hand along the eh_return path. */
10168 gcc_checking_assert (src == stack_pointer_rtx
10169 || offset == const0_rtx);
10170 }
10171
10172 m->fs.sp_offset = ooffset - INTVAL (offset);
10173 m->fs.sp_valid = valid;
10174 }
10175 }
10176
10177 /* Find an available register to be used as dynamic realign argument
10178 pointer regsiter. Such a register will be written in prologue and
10179 used in begin of body, so it must not be
10180 1. parameter passing register.
10181 2. GOT pointer.
10182 We reuse static-chain register if it is available. Otherwise, we
10183 use DI for i386 and R13 for x86-64. We chose R13 since it has
10184 shorter encoding.
10185
10186 Return: the regno of chosen register. */
10187
10188 static unsigned int
10189 find_drap_reg (void)
10190 {
10191 tree decl = cfun->decl;
10192
10193 if (TARGET_64BIT)
10194 {
10195 /* Use R13 for nested function or function need static chain.
10196 Since function with tail call may use any caller-saved
10197 registers in epilogue, DRAP must not use caller-saved
10198 register in such case. */
10199 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10200 return R13_REG;
10201
10202 return R10_REG;
10203 }
10204 else
10205 {
10206 /* Use DI for nested function or function need static chain.
10207 Since function with tail call may use any caller-saved
10208 registers in epilogue, DRAP must not use caller-saved
10209 register in such case. */
10210 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10211 return DI_REG;
10212
10213 /* Reuse static chain register if it isn't used for parameter
10214 passing. */
10215 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10216 {
10217 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10218 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10219 return CX_REG;
10220 }
10221 return DI_REG;
10222 }
10223 }
10224
10225 /* Return minimum incoming stack alignment. */
10226
10227 static unsigned int
10228 ix86_minimum_incoming_stack_boundary (bool sibcall)
10229 {
10230 unsigned int incoming_stack_boundary;
10231
10232 /* Prefer the one specified at command line. */
10233 if (ix86_user_incoming_stack_boundary)
10234 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10235 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10236 if -mstackrealign is used, it isn't used for sibcall check and
10237 estimated stack alignment is 128bit. */
10238 else if (!sibcall
10239 && !TARGET_64BIT
10240 && ix86_force_align_arg_pointer
10241 && crtl->stack_alignment_estimated == 128)
10242 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10243 else
10244 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10245
10246 /* Incoming stack alignment can be changed on individual functions
10247 via force_align_arg_pointer attribute. We use the smallest
10248 incoming stack boundary. */
10249 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10250 && lookup_attribute (ix86_force_align_arg_pointer_string,
10251 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10252 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10253
10254 /* The incoming stack frame has to be aligned at least at
10255 parm_stack_boundary. */
10256 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10257 incoming_stack_boundary = crtl->parm_stack_boundary;
10258
10259 /* Stack at entrance of main is aligned by runtime. We use the
10260 smallest incoming stack boundary. */
10261 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10262 && DECL_NAME (current_function_decl)
10263 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10264 && DECL_FILE_SCOPE_P (current_function_decl))
10265 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10266
10267 return incoming_stack_boundary;
10268 }
10269
10270 /* Update incoming stack boundary and estimated stack alignment. */
10271
10272 static void
10273 ix86_update_stack_boundary (void)
10274 {
10275 ix86_incoming_stack_boundary
10276 = ix86_minimum_incoming_stack_boundary (false);
10277
10278 /* x86_64 vararg needs 16byte stack alignment for register save
10279 area. */
10280 if (TARGET_64BIT
10281 && cfun->stdarg
10282 && crtl->stack_alignment_estimated < 128)
10283 crtl->stack_alignment_estimated = 128;
10284 }
10285
10286 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10287 needed or an rtx for DRAP otherwise. */
10288
10289 static rtx
10290 ix86_get_drap_rtx (void)
10291 {
10292 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10293 crtl->need_drap = true;
10294
10295 if (stack_realign_drap)
10296 {
10297 /* Assign DRAP to vDRAP and returns vDRAP */
10298 unsigned int regno = find_drap_reg ();
10299 rtx drap_vreg;
10300 rtx arg_ptr;
10301 rtx_insn *seq, *insn;
10302
10303 arg_ptr = gen_rtx_REG (Pmode, regno);
10304 crtl->drap_reg = arg_ptr;
10305
10306 start_sequence ();
10307 drap_vreg = copy_to_reg (arg_ptr);
10308 seq = get_insns ();
10309 end_sequence ();
10310
10311 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10312 if (!optimize)
10313 {
10314 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10315 RTX_FRAME_RELATED_P (insn) = 1;
10316 }
10317 return drap_vreg;
10318 }
10319 else
10320 return NULL;
10321 }
10322
10323 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10324
10325 static rtx
10326 ix86_internal_arg_pointer (void)
10327 {
10328 return virtual_incoming_args_rtx;
10329 }
10330
10331 struct scratch_reg {
10332 rtx reg;
10333 bool saved;
10334 };
10335
10336 /* Return a short-lived scratch register for use on function entry.
10337 In 32-bit mode, it is valid only after the registers are saved
10338 in the prologue. This register must be released by means of
10339 release_scratch_register_on_entry once it is dead. */
10340
10341 static void
10342 get_scratch_register_on_entry (struct scratch_reg *sr)
10343 {
10344 int regno;
10345
10346 sr->saved = false;
10347
10348 if (TARGET_64BIT)
10349 {
10350 /* We always use R11 in 64-bit mode. */
10351 regno = R11_REG;
10352 }
10353 else
10354 {
10355 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10356 bool fastcall_p
10357 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10358 bool thiscall_p
10359 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10360 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10361 int regparm = ix86_function_regparm (fntype, decl);
10362 int drap_regno
10363 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10364
10365 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10366 for the static chain register. */
10367 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10368 && drap_regno != AX_REG)
10369 regno = AX_REG;
10370 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10371 for the static chain register. */
10372 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10373 regno = AX_REG;
10374 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10375 regno = DX_REG;
10376 /* ecx is the static chain register. */
10377 else if (regparm < 3 && !fastcall_p && !thiscall_p
10378 && !static_chain_p
10379 && drap_regno != CX_REG)
10380 regno = CX_REG;
10381 else if (ix86_save_reg (BX_REG, true))
10382 regno = BX_REG;
10383 /* esi is the static chain register. */
10384 else if (!(regparm == 3 && static_chain_p)
10385 && ix86_save_reg (SI_REG, true))
10386 regno = SI_REG;
10387 else if (ix86_save_reg (DI_REG, true))
10388 regno = DI_REG;
10389 else
10390 {
10391 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10392 sr->saved = true;
10393 }
10394 }
10395
10396 sr->reg = gen_rtx_REG (Pmode, regno);
10397 if (sr->saved)
10398 {
10399 rtx insn = emit_insn (gen_push (sr->reg));
10400 RTX_FRAME_RELATED_P (insn) = 1;
10401 }
10402 }
10403
10404 /* Release a scratch register obtained from the preceding function. */
10405
10406 static void
10407 release_scratch_register_on_entry (struct scratch_reg *sr)
10408 {
10409 if (sr->saved)
10410 {
10411 struct machine_function *m = cfun->machine;
10412 rtx x, insn = emit_insn (gen_pop (sr->reg));
10413
10414 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10415 RTX_FRAME_RELATED_P (insn) = 1;
10416 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10417 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10418 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10419 m->fs.sp_offset -= UNITS_PER_WORD;
10420 }
10421 }
10422
10423 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10424
10425 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10426
10427 static void
10428 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10429 {
10430 /* We skip the probe for the first interval + a small dope of 4 words and
10431 probe that many bytes past the specified size to maintain a protection
10432 area at the botton of the stack. */
10433 const int dope = 4 * UNITS_PER_WORD;
10434 rtx size_rtx = GEN_INT (size), last;
10435
10436 /* See if we have a constant small number of probes to generate. If so,
10437 that's the easy case. The run-time loop is made up of 11 insns in the
10438 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10439 for n # of intervals. */
10440 if (size <= 5 * PROBE_INTERVAL)
10441 {
10442 HOST_WIDE_INT i, adjust;
10443 bool first_probe = true;
10444
10445 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10446 values of N from 1 until it exceeds SIZE. If only one probe is
10447 needed, this will not generate any code. Then adjust and probe
10448 to PROBE_INTERVAL + SIZE. */
10449 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10450 {
10451 if (first_probe)
10452 {
10453 adjust = 2 * PROBE_INTERVAL + dope;
10454 first_probe = false;
10455 }
10456 else
10457 adjust = PROBE_INTERVAL;
10458
10459 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10460 plus_constant (Pmode, stack_pointer_rtx,
10461 -adjust)));
10462 emit_stack_probe (stack_pointer_rtx);
10463 }
10464
10465 if (first_probe)
10466 adjust = size + PROBE_INTERVAL + dope;
10467 else
10468 adjust = size + PROBE_INTERVAL - i;
10469
10470 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10471 plus_constant (Pmode, stack_pointer_rtx,
10472 -adjust)));
10473 emit_stack_probe (stack_pointer_rtx);
10474
10475 /* Adjust back to account for the additional first interval. */
10476 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10477 plus_constant (Pmode, stack_pointer_rtx,
10478 PROBE_INTERVAL + dope)));
10479 }
10480
10481 /* Otherwise, do the same as above, but in a loop. Note that we must be
10482 extra careful with variables wrapping around because we might be at
10483 the very top (or the very bottom) of the address space and we have
10484 to be able to handle this case properly; in particular, we use an
10485 equality test for the loop condition. */
10486 else
10487 {
10488 HOST_WIDE_INT rounded_size;
10489 struct scratch_reg sr;
10490
10491 get_scratch_register_on_entry (&sr);
10492
10493
10494 /* Step 1: round SIZE to the previous multiple of the interval. */
10495
10496 rounded_size = size & -PROBE_INTERVAL;
10497
10498
10499 /* Step 2: compute initial and final value of the loop counter. */
10500
10501 /* SP = SP_0 + PROBE_INTERVAL. */
10502 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10503 plus_constant (Pmode, stack_pointer_rtx,
10504 - (PROBE_INTERVAL + dope))));
10505
10506 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10507 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10508 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10509 gen_rtx_PLUS (Pmode, sr.reg,
10510 stack_pointer_rtx)));
10511
10512
10513 /* Step 3: the loop
10514
10515 while (SP != LAST_ADDR)
10516 {
10517 SP = SP + PROBE_INTERVAL
10518 probe at SP
10519 }
10520
10521 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10522 values of N from 1 until it is equal to ROUNDED_SIZE. */
10523
10524 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10525
10526
10527 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10528 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10529
10530 if (size != rounded_size)
10531 {
10532 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10533 plus_constant (Pmode, stack_pointer_rtx,
10534 rounded_size - size)));
10535 emit_stack_probe (stack_pointer_rtx);
10536 }
10537
10538 /* Adjust back to account for the additional first interval. */
10539 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10540 plus_constant (Pmode, stack_pointer_rtx,
10541 PROBE_INTERVAL + dope)));
10542
10543 release_scratch_register_on_entry (&sr);
10544 }
10545
10546 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10547
10548 /* Even if the stack pointer isn't the CFA register, we need to correctly
10549 describe the adjustments made to it, in particular differentiate the
10550 frame-related ones from the frame-unrelated ones. */
10551 if (size > 0)
10552 {
10553 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10554 XVECEXP (expr, 0, 0)
10555 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10556 plus_constant (Pmode, stack_pointer_rtx, -size));
10557 XVECEXP (expr, 0, 1)
10558 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10559 plus_constant (Pmode, stack_pointer_rtx,
10560 PROBE_INTERVAL + dope + size));
10561 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10562 RTX_FRAME_RELATED_P (last) = 1;
10563
10564 cfun->machine->fs.sp_offset += size;
10565 }
10566
10567 /* Make sure nothing is scheduled before we are done. */
10568 emit_insn (gen_blockage ());
10569 }
10570
10571 /* Adjust the stack pointer up to REG while probing it. */
10572
10573 const char *
10574 output_adjust_stack_and_probe (rtx reg)
10575 {
10576 static int labelno = 0;
10577 char loop_lab[32], end_lab[32];
10578 rtx xops[2];
10579
10580 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10581 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10582
10583 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10584
10585 /* Jump to END_LAB if SP == LAST_ADDR. */
10586 xops[0] = stack_pointer_rtx;
10587 xops[1] = reg;
10588 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10589 fputs ("\tje\t", asm_out_file);
10590 assemble_name_raw (asm_out_file, end_lab);
10591 fputc ('\n', asm_out_file);
10592
10593 /* SP = SP + PROBE_INTERVAL. */
10594 xops[1] = GEN_INT (PROBE_INTERVAL);
10595 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10596
10597 /* Probe at SP. */
10598 xops[1] = const0_rtx;
10599 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10600
10601 fprintf (asm_out_file, "\tjmp\t");
10602 assemble_name_raw (asm_out_file, loop_lab);
10603 fputc ('\n', asm_out_file);
10604
10605 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10606
10607 return "";
10608 }
10609
10610 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10611 inclusive. These are offsets from the current stack pointer. */
10612
10613 static void
10614 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10615 {
10616 /* See if we have a constant small number of probes to generate. If so,
10617 that's the easy case. The run-time loop is made up of 7 insns in the
10618 generic case while the compile-time loop is made up of n insns for n #
10619 of intervals. */
10620 if (size <= 7 * PROBE_INTERVAL)
10621 {
10622 HOST_WIDE_INT i;
10623
10624 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10625 it exceeds SIZE. If only one probe is needed, this will not
10626 generate any code. Then probe at FIRST + SIZE. */
10627 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10628 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10629 -(first + i)));
10630
10631 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10632 -(first + size)));
10633 }
10634
10635 /* Otherwise, do the same as above, but in a loop. Note that we must be
10636 extra careful with variables wrapping around because we might be at
10637 the very top (or the very bottom) of the address space and we have
10638 to be able to handle this case properly; in particular, we use an
10639 equality test for the loop condition. */
10640 else
10641 {
10642 HOST_WIDE_INT rounded_size, last;
10643 struct scratch_reg sr;
10644
10645 get_scratch_register_on_entry (&sr);
10646
10647
10648 /* Step 1: round SIZE to the previous multiple of the interval. */
10649
10650 rounded_size = size & -PROBE_INTERVAL;
10651
10652
10653 /* Step 2: compute initial and final value of the loop counter. */
10654
10655 /* TEST_OFFSET = FIRST. */
10656 emit_move_insn (sr.reg, GEN_INT (-first));
10657
10658 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10659 last = first + rounded_size;
10660
10661
10662 /* Step 3: the loop
10663
10664 while (TEST_ADDR != LAST_ADDR)
10665 {
10666 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10667 probe at TEST_ADDR
10668 }
10669
10670 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10671 until it is equal to ROUNDED_SIZE. */
10672
10673 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10674
10675
10676 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10677 that SIZE is equal to ROUNDED_SIZE. */
10678
10679 if (size != rounded_size)
10680 emit_stack_probe (plus_constant (Pmode,
10681 gen_rtx_PLUS (Pmode,
10682 stack_pointer_rtx,
10683 sr.reg),
10684 rounded_size - size));
10685
10686 release_scratch_register_on_entry (&sr);
10687 }
10688
10689 /* Make sure nothing is scheduled before we are done. */
10690 emit_insn (gen_blockage ());
10691 }
10692
10693 /* Probe a range of stack addresses from REG to END, inclusive. These are
10694 offsets from the current stack pointer. */
10695
10696 const char *
10697 output_probe_stack_range (rtx reg, rtx end)
10698 {
10699 static int labelno = 0;
10700 char loop_lab[32], end_lab[32];
10701 rtx xops[3];
10702
10703 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10704 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10705
10706 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10707
10708 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10709 xops[0] = reg;
10710 xops[1] = end;
10711 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10712 fputs ("\tje\t", asm_out_file);
10713 assemble_name_raw (asm_out_file, end_lab);
10714 fputc ('\n', asm_out_file);
10715
10716 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10717 xops[1] = GEN_INT (PROBE_INTERVAL);
10718 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10719
10720 /* Probe at TEST_ADDR. */
10721 xops[0] = stack_pointer_rtx;
10722 xops[1] = reg;
10723 xops[2] = const0_rtx;
10724 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10725
10726 fprintf (asm_out_file, "\tjmp\t");
10727 assemble_name_raw (asm_out_file, loop_lab);
10728 fputc ('\n', asm_out_file);
10729
10730 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10731
10732 return "";
10733 }
10734
10735 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10736 to be generated in correct form. */
10737 static void
10738 ix86_finalize_stack_realign_flags (void)
10739 {
10740 /* Check if stack realign is really needed after reload, and
10741 stores result in cfun */
10742 unsigned int incoming_stack_boundary
10743 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10744 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10745 unsigned int stack_realign = (incoming_stack_boundary
10746 < (crtl->is_leaf
10747 ? crtl->max_used_stack_slot_alignment
10748 : crtl->stack_alignment_needed));
10749
10750 if (crtl->stack_realign_finalized)
10751 {
10752 /* After stack_realign_needed is finalized, we can't no longer
10753 change it. */
10754 gcc_assert (crtl->stack_realign_needed == stack_realign);
10755 return;
10756 }
10757
10758 /* If the only reason for frame_pointer_needed is that we conservatively
10759 assumed stack realignment might be needed, but in the end nothing that
10760 needed the stack alignment had been spilled, clear frame_pointer_needed
10761 and say we don't need stack realignment. */
10762 if (stack_realign
10763 && frame_pointer_needed
10764 && crtl->is_leaf
10765 && flag_omit_frame_pointer
10766 && crtl->sp_is_unchanging
10767 && !ix86_current_function_calls_tls_descriptor
10768 && !crtl->accesses_prior_frames
10769 && !cfun->calls_alloca
10770 && !crtl->calls_eh_return
10771 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10772 && !ix86_frame_pointer_required ()
10773 && get_frame_size () == 0
10774 && ix86_nsaved_sseregs () == 0
10775 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10776 {
10777 HARD_REG_SET set_up_by_prologue, prologue_used;
10778 basic_block bb;
10779
10780 CLEAR_HARD_REG_SET (prologue_used);
10781 CLEAR_HARD_REG_SET (set_up_by_prologue);
10782 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10783 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10784 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10785 HARD_FRAME_POINTER_REGNUM);
10786 FOR_EACH_BB_FN (bb, cfun)
10787 {
10788 rtx_insn *insn;
10789 FOR_BB_INSNS (bb, insn)
10790 if (NONDEBUG_INSN_P (insn)
10791 && requires_stack_frame_p (insn, prologue_used,
10792 set_up_by_prologue))
10793 {
10794 crtl->stack_realign_needed = stack_realign;
10795 crtl->stack_realign_finalized = true;
10796 return;
10797 }
10798 }
10799
10800 /* If drap has been set, but it actually isn't live at the start
10801 of the function, there is no reason to set it up. */
10802 if (crtl->drap_reg)
10803 {
10804 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10805 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10806 {
10807 crtl->drap_reg = NULL_RTX;
10808 crtl->need_drap = false;
10809 }
10810 }
10811 else
10812 cfun->machine->no_drap_save_restore = true;
10813
10814 frame_pointer_needed = false;
10815 stack_realign = false;
10816 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10817 crtl->stack_alignment_needed = incoming_stack_boundary;
10818 crtl->stack_alignment_estimated = incoming_stack_boundary;
10819 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10820 crtl->preferred_stack_boundary = incoming_stack_boundary;
10821 df_finish_pass (true);
10822 df_scan_alloc (NULL);
10823 df_scan_blocks ();
10824 df_compute_regs_ever_live (true);
10825 df_analyze ();
10826 }
10827
10828 crtl->stack_realign_needed = stack_realign;
10829 crtl->stack_realign_finalized = true;
10830 }
10831
10832 /* Expand the prologue into a bunch of separate insns. */
10833
10834 void
10835 ix86_expand_prologue (void)
10836 {
10837 struct machine_function *m = cfun->machine;
10838 rtx insn, t;
10839 struct ix86_frame frame;
10840 HOST_WIDE_INT allocate;
10841 bool int_registers_saved;
10842 bool sse_registers_saved;
10843
10844 ix86_finalize_stack_realign_flags ();
10845
10846 /* DRAP should not coexist with stack_realign_fp */
10847 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10848
10849 memset (&m->fs, 0, sizeof (m->fs));
10850
10851 /* Initialize CFA state for before the prologue. */
10852 m->fs.cfa_reg = stack_pointer_rtx;
10853 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10854
10855 /* Track SP offset to the CFA. We continue tracking this after we've
10856 swapped the CFA register away from SP. In the case of re-alignment
10857 this is fudged; we're interested to offsets within the local frame. */
10858 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10859 m->fs.sp_valid = true;
10860
10861 ix86_compute_frame_layout (&frame);
10862
10863 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10864 {
10865 /* We should have already generated an error for any use of
10866 ms_hook on a nested function. */
10867 gcc_checking_assert (!ix86_static_chain_on_stack);
10868
10869 /* Check if profiling is active and we shall use profiling before
10870 prologue variant. If so sorry. */
10871 if (crtl->profile && flag_fentry != 0)
10872 sorry ("ms_hook_prologue attribute isn%'t compatible "
10873 "with -mfentry for 32-bit");
10874
10875 /* In ix86_asm_output_function_label we emitted:
10876 8b ff movl.s %edi,%edi
10877 55 push %ebp
10878 8b ec movl.s %esp,%ebp
10879
10880 This matches the hookable function prologue in Win32 API
10881 functions in Microsoft Windows XP Service Pack 2 and newer.
10882 Wine uses this to enable Windows apps to hook the Win32 API
10883 functions provided by Wine.
10884
10885 What that means is that we've already set up the frame pointer. */
10886
10887 if (frame_pointer_needed
10888 && !(crtl->drap_reg && crtl->stack_realign_needed))
10889 {
10890 rtx push, mov;
10891
10892 /* We've decided to use the frame pointer already set up.
10893 Describe this to the unwinder by pretending that both
10894 push and mov insns happen right here.
10895
10896 Putting the unwind info here at the end of the ms_hook
10897 is done so that we can make absolutely certain we get
10898 the required byte sequence at the start of the function,
10899 rather than relying on an assembler that can produce
10900 the exact encoding required.
10901
10902 However it does mean (in the unpatched case) that we have
10903 a 1 insn window where the asynchronous unwind info is
10904 incorrect. However, if we placed the unwind info at
10905 its correct location we would have incorrect unwind info
10906 in the patched case. Which is probably all moot since
10907 I don't expect Wine generates dwarf2 unwind info for the
10908 system libraries that use this feature. */
10909
10910 insn = emit_insn (gen_blockage ());
10911
10912 push = gen_push (hard_frame_pointer_rtx);
10913 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10914 stack_pointer_rtx);
10915 RTX_FRAME_RELATED_P (push) = 1;
10916 RTX_FRAME_RELATED_P (mov) = 1;
10917
10918 RTX_FRAME_RELATED_P (insn) = 1;
10919 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10920 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10921
10922 /* Note that gen_push incremented m->fs.cfa_offset, even
10923 though we didn't emit the push insn here. */
10924 m->fs.cfa_reg = hard_frame_pointer_rtx;
10925 m->fs.fp_offset = m->fs.cfa_offset;
10926 m->fs.fp_valid = true;
10927 }
10928 else
10929 {
10930 /* The frame pointer is not needed so pop %ebp again.
10931 This leaves us with a pristine state. */
10932 emit_insn (gen_pop (hard_frame_pointer_rtx));
10933 }
10934 }
10935
10936 /* The first insn of a function that accepts its static chain on the
10937 stack is to push the register that would be filled in by a direct
10938 call. This insn will be skipped by the trampoline. */
10939 else if (ix86_static_chain_on_stack)
10940 {
10941 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10942 emit_insn (gen_blockage ());
10943
10944 /* We don't want to interpret this push insn as a register save,
10945 only as a stack adjustment. The real copy of the register as
10946 a save will be done later, if needed. */
10947 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10948 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10949 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10950 RTX_FRAME_RELATED_P (insn) = 1;
10951 }
10952
10953 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10954 of DRAP is needed and stack realignment is really needed after reload */
10955 if (stack_realign_drap)
10956 {
10957 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10958
10959 /* Only need to push parameter pointer reg if it is caller saved. */
10960 if (!call_used_regs[REGNO (crtl->drap_reg)])
10961 {
10962 /* Push arg pointer reg */
10963 insn = emit_insn (gen_push (crtl->drap_reg));
10964 RTX_FRAME_RELATED_P (insn) = 1;
10965 }
10966
10967 /* Grab the argument pointer. */
10968 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10969 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10970 RTX_FRAME_RELATED_P (insn) = 1;
10971 m->fs.cfa_reg = crtl->drap_reg;
10972 m->fs.cfa_offset = 0;
10973
10974 /* Align the stack. */
10975 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10976 stack_pointer_rtx,
10977 GEN_INT (-align_bytes)));
10978 RTX_FRAME_RELATED_P (insn) = 1;
10979
10980 /* Replicate the return address on the stack so that return
10981 address can be reached via (argp - 1) slot. This is needed
10982 to implement macro RETURN_ADDR_RTX and intrinsic function
10983 expand_builtin_return_addr etc. */
10984 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10985 t = gen_frame_mem (word_mode, t);
10986 insn = emit_insn (gen_push (t));
10987 RTX_FRAME_RELATED_P (insn) = 1;
10988
10989 /* For the purposes of frame and register save area addressing,
10990 we've started over with a new frame. */
10991 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10992 m->fs.realigned = true;
10993 }
10994
10995 int_registers_saved = (frame.nregs == 0);
10996 sse_registers_saved = (frame.nsseregs == 0);
10997
10998 if (frame_pointer_needed && !m->fs.fp_valid)
10999 {
11000 /* Note: AT&T enter does NOT have reversed args. Enter is probably
11001 slower on all targets. Also sdb doesn't like it. */
11002 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11003 RTX_FRAME_RELATED_P (insn) = 1;
11004
11005 /* Push registers now, before setting the frame pointer
11006 on SEH target. */
11007 if (!int_registers_saved
11008 && TARGET_SEH
11009 && !frame.save_regs_using_mov)
11010 {
11011 ix86_emit_save_regs ();
11012 int_registers_saved = true;
11013 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11014 }
11015
11016 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11017 {
11018 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11019 RTX_FRAME_RELATED_P (insn) = 1;
11020
11021 if (m->fs.cfa_reg == stack_pointer_rtx)
11022 m->fs.cfa_reg = hard_frame_pointer_rtx;
11023 m->fs.fp_offset = m->fs.sp_offset;
11024 m->fs.fp_valid = true;
11025 }
11026 }
11027
11028 if (!int_registers_saved)
11029 {
11030 /* If saving registers via PUSH, do so now. */
11031 if (!frame.save_regs_using_mov)
11032 {
11033 ix86_emit_save_regs ();
11034 int_registers_saved = true;
11035 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11036 }
11037
11038 /* When using red zone we may start register saving before allocating
11039 the stack frame saving one cycle of the prologue. However, avoid
11040 doing this if we have to probe the stack; at least on x86_64 the
11041 stack probe can turn into a call that clobbers a red zone location. */
11042 else if (ix86_using_red_zone ()
11043 && (! TARGET_STACK_PROBE
11044 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11045 {
11046 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11047 int_registers_saved = true;
11048 }
11049 }
11050
11051 if (stack_realign_fp)
11052 {
11053 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11054 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11055
11056 /* The computation of the size of the re-aligned stack frame means
11057 that we must allocate the size of the register save area before
11058 performing the actual alignment. Otherwise we cannot guarantee
11059 that there's enough storage above the realignment point. */
11060 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11061 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11062 GEN_INT (m->fs.sp_offset
11063 - frame.sse_reg_save_offset),
11064 -1, false);
11065
11066 /* Align the stack. */
11067 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11068 stack_pointer_rtx,
11069 GEN_INT (-align_bytes)));
11070
11071 /* For the purposes of register save area addressing, the stack
11072 pointer is no longer valid. As for the value of sp_offset,
11073 see ix86_compute_frame_layout, which we need to match in order
11074 to pass verification of stack_pointer_offset at the end. */
11075 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11076 m->fs.sp_valid = false;
11077 }
11078
11079 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11080
11081 if (flag_stack_usage_info)
11082 {
11083 /* We start to count from ARG_POINTER. */
11084 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11085
11086 /* If it was realigned, take into account the fake frame. */
11087 if (stack_realign_drap)
11088 {
11089 if (ix86_static_chain_on_stack)
11090 stack_size += UNITS_PER_WORD;
11091
11092 if (!call_used_regs[REGNO (crtl->drap_reg)])
11093 stack_size += UNITS_PER_WORD;
11094
11095 /* This over-estimates by 1 minimal-stack-alignment-unit but
11096 mitigates that by counting in the new return address slot. */
11097 current_function_dynamic_stack_size
11098 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11099 }
11100
11101 current_function_static_stack_size = stack_size;
11102 }
11103
11104 /* On SEH target with very large frame size, allocate an area to save
11105 SSE registers (as the very large allocation won't be described). */
11106 if (TARGET_SEH
11107 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11108 && !sse_registers_saved)
11109 {
11110 HOST_WIDE_INT sse_size =
11111 frame.sse_reg_save_offset - frame.reg_save_offset;
11112
11113 gcc_assert (int_registers_saved);
11114
11115 /* No need to do stack checking as the area will be immediately
11116 written. */
11117 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11118 GEN_INT (-sse_size), -1,
11119 m->fs.cfa_reg == stack_pointer_rtx);
11120 allocate -= sse_size;
11121 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11122 sse_registers_saved = true;
11123 }
11124
11125 /* The stack has already been decremented by the instruction calling us
11126 so probe if the size is non-negative to preserve the protection area. */
11127 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11128 {
11129 /* We expect the registers to be saved when probes are used. */
11130 gcc_assert (int_registers_saved);
11131
11132 if (STACK_CHECK_MOVING_SP)
11133 {
11134 if (!(crtl->is_leaf && !cfun->calls_alloca
11135 && allocate <= PROBE_INTERVAL))
11136 {
11137 ix86_adjust_stack_and_probe (allocate);
11138 allocate = 0;
11139 }
11140 }
11141 else
11142 {
11143 HOST_WIDE_INT size = allocate;
11144
11145 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11146 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11147
11148 if (TARGET_STACK_PROBE)
11149 {
11150 if (crtl->is_leaf && !cfun->calls_alloca)
11151 {
11152 if (size > PROBE_INTERVAL)
11153 ix86_emit_probe_stack_range (0, size);
11154 }
11155 else
11156 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11157 }
11158 else
11159 {
11160 if (crtl->is_leaf && !cfun->calls_alloca)
11161 {
11162 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11163 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11164 size - STACK_CHECK_PROTECT);
11165 }
11166 else
11167 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11168 }
11169 }
11170 }
11171
11172 if (allocate == 0)
11173 ;
11174 else if (!ix86_target_stack_probe ()
11175 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11176 {
11177 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11178 GEN_INT (-allocate), -1,
11179 m->fs.cfa_reg == stack_pointer_rtx);
11180 }
11181 else
11182 {
11183 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11184 rtx r10 = NULL;
11185 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11186 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11187 bool eax_live = ix86_eax_live_at_start_p ();
11188 bool r10_live = false;
11189
11190 if (TARGET_64BIT)
11191 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11192
11193 if (eax_live)
11194 {
11195 insn = emit_insn (gen_push (eax));
11196 allocate -= UNITS_PER_WORD;
11197 /* Note that SEH directives need to continue tracking the stack
11198 pointer even after the frame pointer has been set up. */
11199 if (sp_is_cfa_reg || TARGET_SEH)
11200 {
11201 if (sp_is_cfa_reg)
11202 m->fs.cfa_offset += UNITS_PER_WORD;
11203 RTX_FRAME_RELATED_P (insn) = 1;
11204 }
11205 }
11206
11207 if (r10_live)
11208 {
11209 r10 = gen_rtx_REG (Pmode, R10_REG);
11210 insn = emit_insn (gen_push (r10));
11211 allocate -= UNITS_PER_WORD;
11212 if (sp_is_cfa_reg || TARGET_SEH)
11213 {
11214 if (sp_is_cfa_reg)
11215 m->fs.cfa_offset += UNITS_PER_WORD;
11216 RTX_FRAME_RELATED_P (insn) = 1;
11217 }
11218 }
11219
11220 emit_move_insn (eax, GEN_INT (allocate));
11221 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11222
11223 /* Use the fact that AX still contains ALLOCATE. */
11224 adjust_stack_insn = (Pmode == DImode
11225 ? gen_pro_epilogue_adjust_stack_di_sub
11226 : gen_pro_epilogue_adjust_stack_si_sub);
11227
11228 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11229 stack_pointer_rtx, eax));
11230
11231 if (sp_is_cfa_reg || TARGET_SEH)
11232 {
11233 if (sp_is_cfa_reg)
11234 m->fs.cfa_offset += allocate;
11235 RTX_FRAME_RELATED_P (insn) = 1;
11236 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11237 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11238 plus_constant (Pmode, stack_pointer_rtx,
11239 -allocate)));
11240 }
11241 m->fs.sp_offset += allocate;
11242
11243 /* Use stack_pointer_rtx for relative addressing so that code
11244 works for realigned stack, too. */
11245 if (r10_live && eax_live)
11246 {
11247 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11248 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11249 gen_frame_mem (word_mode, t));
11250 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11251 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11252 gen_frame_mem (word_mode, t));
11253 }
11254 else if (eax_live || r10_live)
11255 {
11256 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11257 emit_move_insn (gen_rtx_REG (word_mode,
11258 (eax_live ? AX_REG : R10_REG)),
11259 gen_frame_mem (word_mode, t));
11260 }
11261 }
11262 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11263
11264 /* If we havn't already set up the frame pointer, do so now. */
11265 if (frame_pointer_needed && !m->fs.fp_valid)
11266 {
11267 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11268 GEN_INT (frame.stack_pointer_offset
11269 - frame.hard_frame_pointer_offset));
11270 insn = emit_insn (insn);
11271 RTX_FRAME_RELATED_P (insn) = 1;
11272 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11273
11274 if (m->fs.cfa_reg == stack_pointer_rtx)
11275 m->fs.cfa_reg = hard_frame_pointer_rtx;
11276 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11277 m->fs.fp_valid = true;
11278 }
11279
11280 if (!int_registers_saved)
11281 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11282 if (!sse_registers_saved)
11283 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11284
11285 if (crtl->drap_reg && !crtl->stack_realign_needed)
11286 {
11287 /* vDRAP is setup but after reload it turns out stack realign
11288 isn't necessary, here we will emit prologue to setup DRAP
11289 without stack realign adjustment */
11290 t = choose_baseaddr (0);
11291 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11292 }
11293
11294 /* Prevent instructions from being scheduled into register save push
11295 sequence when access to the redzone area is done through frame pointer.
11296 The offset between the frame pointer and the stack pointer is calculated
11297 relative to the value of the stack pointer at the end of the function
11298 prologue, and moving instructions that access redzone area via frame
11299 pointer inside push sequence violates this assumption. */
11300 if (frame_pointer_needed && frame.red_zone_size)
11301 emit_insn (gen_memory_blockage ());
11302
11303 /* Emit cld instruction if stringops are used in the function. */
11304 if (TARGET_CLD && ix86_current_function_needs_cld)
11305 emit_insn (gen_cld ());
11306
11307 /* SEH requires that the prologue end within 256 bytes of the start of
11308 the function. Prevent instruction schedules that would extend that.
11309 Further, prevent alloca modifications to the stack pointer from being
11310 combined with prologue modifications. */
11311 if (TARGET_SEH)
11312 emit_insn (gen_prologue_use (stack_pointer_rtx));
11313 }
11314
11315 /* Emit code to restore REG using a POP insn. */
11316
11317 static void
11318 ix86_emit_restore_reg_using_pop (rtx reg)
11319 {
11320 struct machine_function *m = cfun->machine;
11321 rtx insn = emit_insn (gen_pop (reg));
11322
11323 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11324 m->fs.sp_offset -= UNITS_PER_WORD;
11325
11326 if (m->fs.cfa_reg == crtl->drap_reg
11327 && REGNO (reg) == REGNO (crtl->drap_reg))
11328 {
11329 /* Previously we'd represented the CFA as an expression
11330 like *(%ebp - 8). We've just popped that value from
11331 the stack, which means we need to reset the CFA to
11332 the drap register. This will remain until we restore
11333 the stack pointer. */
11334 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11335 RTX_FRAME_RELATED_P (insn) = 1;
11336
11337 /* This means that the DRAP register is valid for addressing too. */
11338 m->fs.drap_valid = true;
11339 return;
11340 }
11341
11342 if (m->fs.cfa_reg == stack_pointer_rtx)
11343 {
11344 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11345 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11346 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11347 RTX_FRAME_RELATED_P (insn) = 1;
11348
11349 m->fs.cfa_offset -= UNITS_PER_WORD;
11350 }
11351
11352 /* When the frame pointer is the CFA, and we pop it, we are
11353 swapping back to the stack pointer as the CFA. This happens
11354 for stack frames that don't allocate other data, so we assume
11355 the stack pointer is now pointing at the return address, i.e.
11356 the function entry state, which makes the offset be 1 word. */
11357 if (reg == hard_frame_pointer_rtx)
11358 {
11359 m->fs.fp_valid = false;
11360 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11361 {
11362 m->fs.cfa_reg = stack_pointer_rtx;
11363 m->fs.cfa_offset -= UNITS_PER_WORD;
11364
11365 add_reg_note (insn, REG_CFA_DEF_CFA,
11366 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11367 GEN_INT (m->fs.cfa_offset)));
11368 RTX_FRAME_RELATED_P (insn) = 1;
11369 }
11370 }
11371 }
11372
11373 /* Emit code to restore saved registers using POP insns. */
11374
11375 static void
11376 ix86_emit_restore_regs_using_pop (void)
11377 {
11378 unsigned int regno;
11379
11380 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11381 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11382 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11383 }
11384
11385 /* Emit code and notes for the LEAVE instruction. */
11386
11387 static void
11388 ix86_emit_leave (void)
11389 {
11390 struct machine_function *m = cfun->machine;
11391 rtx insn = emit_insn (ix86_gen_leave ());
11392
11393 ix86_add_queued_cfa_restore_notes (insn);
11394
11395 gcc_assert (m->fs.fp_valid);
11396 m->fs.sp_valid = true;
11397 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11398 m->fs.fp_valid = false;
11399
11400 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11401 {
11402 m->fs.cfa_reg = stack_pointer_rtx;
11403 m->fs.cfa_offset = m->fs.sp_offset;
11404
11405 add_reg_note (insn, REG_CFA_DEF_CFA,
11406 plus_constant (Pmode, stack_pointer_rtx,
11407 m->fs.sp_offset));
11408 RTX_FRAME_RELATED_P (insn) = 1;
11409 }
11410 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11411 m->fs.fp_offset);
11412 }
11413
11414 /* Emit code to restore saved registers using MOV insns.
11415 First register is restored from CFA - CFA_OFFSET. */
11416 static void
11417 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11418 bool maybe_eh_return)
11419 {
11420 struct machine_function *m = cfun->machine;
11421 unsigned int regno;
11422
11423 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11424 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11425 {
11426 rtx reg = gen_rtx_REG (word_mode, regno);
11427 rtx insn, mem;
11428
11429 mem = choose_baseaddr (cfa_offset);
11430 mem = gen_frame_mem (word_mode, mem);
11431 insn = emit_move_insn (reg, mem);
11432
11433 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11434 {
11435 /* Previously we'd represented the CFA as an expression
11436 like *(%ebp - 8). We've just popped that value from
11437 the stack, which means we need to reset the CFA to
11438 the drap register. This will remain until we restore
11439 the stack pointer. */
11440 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11441 RTX_FRAME_RELATED_P (insn) = 1;
11442
11443 /* This means that the DRAP register is valid for addressing. */
11444 m->fs.drap_valid = true;
11445 }
11446 else
11447 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11448
11449 cfa_offset -= UNITS_PER_WORD;
11450 }
11451 }
11452
11453 /* Emit code to restore saved registers using MOV insns.
11454 First register is restored from CFA - CFA_OFFSET. */
11455 static void
11456 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11457 bool maybe_eh_return)
11458 {
11459 unsigned int regno;
11460
11461 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11462 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11463 {
11464 rtx reg = gen_rtx_REG (V4SFmode, regno);
11465 rtx mem;
11466
11467 mem = choose_baseaddr (cfa_offset);
11468 mem = gen_rtx_MEM (V4SFmode, mem);
11469 set_mem_align (mem, 128);
11470 emit_move_insn (reg, mem);
11471
11472 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11473
11474 cfa_offset -= 16;
11475 }
11476 }
11477
11478 /* Restore function stack, frame, and registers. */
11479
11480 void
11481 ix86_expand_epilogue (int style)
11482 {
11483 struct machine_function *m = cfun->machine;
11484 struct machine_frame_state frame_state_save = m->fs;
11485 struct ix86_frame frame;
11486 bool restore_regs_via_mov;
11487 bool using_drap;
11488
11489 ix86_finalize_stack_realign_flags ();
11490 ix86_compute_frame_layout (&frame);
11491
11492 m->fs.sp_valid = (!frame_pointer_needed
11493 || (crtl->sp_is_unchanging
11494 && !stack_realign_fp));
11495 gcc_assert (!m->fs.sp_valid
11496 || m->fs.sp_offset == frame.stack_pointer_offset);
11497
11498 /* The FP must be valid if the frame pointer is present. */
11499 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11500 gcc_assert (!m->fs.fp_valid
11501 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11502
11503 /* We must have *some* valid pointer to the stack frame. */
11504 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11505
11506 /* The DRAP is never valid at this point. */
11507 gcc_assert (!m->fs.drap_valid);
11508
11509 /* See the comment about red zone and frame
11510 pointer usage in ix86_expand_prologue. */
11511 if (frame_pointer_needed && frame.red_zone_size)
11512 emit_insn (gen_memory_blockage ());
11513
11514 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11515 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11516
11517 /* Determine the CFA offset of the end of the red-zone. */
11518 m->fs.red_zone_offset = 0;
11519 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11520 {
11521 /* The red-zone begins below the return address. */
11522 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11523
11524 /* When the register save area is in the aligned portion of
11525 the stack, determine the maximum runtime displacement that
11526 matches up with the aligned frame. */
11527 if (stack_realign_drap)
11528 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11529 + UNITS_PER_WORD);
11530 }
11531
11532 /* Special care must be taken for the normal return case of a function
11533 using eh_return: the eax and edx registers are marked as saved, but
11534 not restored along this path. Adjust the save location to match. */
11535 if (crtl->calls_eh_return && style != 2)
11536 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11537
11538 /* EH_RETURN requires the use of moves to function properly. */
11539 if (crtl->calls_eh_return)
11540 restore_regs_via_mov = true;
11541 /* SEH requires the use of pops to identify the epilogue. */
11542 else if (TARGET_SEH)
11543 restore_regs_via_mov = false;
11544 /* If we're only restoring one register and sp is not valid then
11545 using a move instruction to restore the register since it's
11546 less work than reloading sp and popping the register. */
11547 else if (!m->fs.sp_valid && frame.nregs <= 1)
11548 restore_regs_via_mov = true;
11549 else if (TARGET_EPILOGUE_USING_MOVE
11550 && cfun->machine->use_fast_prologue_epilogue
11551 && (frame.nregs > 1
11552 || m->fs.sp_offset != frame.reg_save_offset))
11553 restore_regs_via_mov = true;
11554 else if (frame_pointer_needed
11555 && !frame.nregs
11556 && m->fs.sp_offset != frame.reg_save_offset)
11557 restore_regs_via_mov = true;
11558 else if (frame_pointer_needed
11559 && TARGET_USE_LEAVE
11560 && cfun->machine->use_fast_prologue_epilogue
11561 && frame.nregs == 1)
11562 restore_regs_via_mov = true;
11563 else
11564 restore_regs_via_mov = false;
11565
11566 if (restore_regs_via_mov || frame.nsseregs)
11567 {
11568 /* Ensure that the entire register save area is addressable via
11569 the stack pointer, if we will restore via sp. */
11570 if (TARGET_64BIT
11571 && m->fs.sp_offset > 0x7fffffff
11572 && !(m->fs.fp_valid || m->fs.drap_valid)
11573 && (frame.nsseregs + frame.nregs) != 0)
11574 {
11575 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11576 GEN_INT (m->fs.sp_offset
11577 - frame.sse_reg_save_offset),
11578 style,
11579 m->fs.cfa_reg == stack_pointer_rtx);
11580 }
11581 }
11582
11583 /* If there are any SSE registers to restore, then we have to do it
11584 via moves, since there's obviously no pop for SSE regs. */
11585 if (frame.nsseregs)
11586 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11587 style == 2);
11588
11589 if (restore_regs_via_mov)
11590 {
11591 rtx t;
11592
11593 if (frame.nregs)
11594 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11595
11596 /* eh_return epilogues need %ecx added to the stack pointer. */
11597 if (style == 2)
11598 {
11599 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11600
11601 /* Stack align doesn't work with eh_return. */
11602 gcc_assert (!stack_realign_drap);
11603 /* Neither does regparm nested functions. */
11604 gcc_assert (!ix86_static_chain_on_stack);
11605
11606 if (frame_pointer_needed)
11607 {
11608 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11609 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11610 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11611
11612 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11613 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11614
11615 /* Note that we use SA as a temporary CFA, as the return
11616 address is at the proper place relative to it. We
11617 pretend this happens at the FP restore insn because
11618 prior to this insn the FP would be stored at the wrong
11619 offset relative to SA, and after this insn we have no
11620 other reasonable register to use for the CFA. We don't
11621 bother resetting the CFA to the SP for the duration of
11622 the return insn. */
11623 add_reg_note (insn, REG_CFA_DEF_CFA,
11624 plus_constant (Pmode, sa, UNITS_PER_WORD));
11625 ix86_add_queued_cfa_restore_notes (insn);
11626 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11627 RTX_FRAME_RELATED_P (insn) = 1;
11628
11629 m->fs.cfa_reg = sa;
11630 m->fs.cfa_offset = UNITS_PER_WORD;
11631 m->fs.fp_valid = false;
11632
11633 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11634 const0_rtx, style, false);
11635 }
11636 else
11637 {
11638 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11639 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11640 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11641 ix86_add_queued_cfa_restore_notes (insn);
11642
11643 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11644 if (m->fs.cfa_offset != UNITS_PER_WORD)
11645 {
11646 m->fs.cfa_offset = UNITS_PER_WORD;
11647 add_reg_note (insn, REG_CFA_DEF_CFA,
11648 plus_constant (Pmode, stack_pointer_rtx,
11649 UNITS_PER_WORD));
11650 RTX_FRAME_RELATED_P (insn) = 1;
11651 }
11652 }
11653 m->fs.sp_offset = UNITS_PER_WORD;
11654 m->fs.sp_valid = true;
11655 }
11656 }
11657 else
11658 {
11659 /* SEH requires that the function end with (1) a stack adjustment
11660 if necessary, (2) a sequence of pops, and (3) a return or
11661 jump instruction. Prevent insns from the function body from
11662 being scheduled into this sequence. */
11663 if (TARGET_SEH)
11664 {
11665 /* Prevent a catch region from being adjacent to the standard
11666 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11667 several other flags that would be interesting to test are
11668 not yet set up. */
11669 if (flag_non_call_exceptions)
11670 emit_insn (gen_nops (const1_rtx));
11671 else
11672 emit_insn (gen_blockage ());
11673 }
11674
11675 /* First step is to deallocate the stack frame so that we can
11676 pop the registers. Also do it on SEH target for very large
11677 frame as the emitted instructions aren't allowed by the ABI in
11678 epilogues. */
11679 if (!m->fs.sp_valid
11680 || (TARGET_SEH
11681 && (m->fs.sp_offset - frame.reg_save_offset
11682 >= SEH_MAX_FRAME_SIZE)))
11683 {
11684 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11685 GEN_INT (m->fs.fp_offset
11686 - frame.reg_save_offset),
11687 style, false);
11688 }
11689 else if (m->fs.sp_offset != frame.reg_save_offset)
11690 {
11691 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11692 GEN_INT (m->fs.sp_offset
11693 - frame.reg_save_offset),
11694 style,
11695 m->fs.cfa_reg == stack_pointer_rtx);
11696 }
11697
11698 ix86_emit_restore_regs_using_pop ();
11699 }
11700
11701 /* If we used a stack pointer and haven't already got rid of it,
11702 then do so now. */
11703 if (m->fs.fp_valid)
11704 {
11705 /* If the stack pointer is valid and pointing at the frame
11706 pointer store address, then we only need a pop. */
11707 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11708 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11709 /* Leave results in shorter dependency chains on CPUs that are
11710 able to grok it fast. */
11711 else if (TARGET_USE_LEAVE
11712 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11713 || !cfun->machine->use_fast_prologue_epilogue)
11714 ix86_emit_leave ();
11715 else
11716 {
11717 pro_epilogue_adjust_stack (stack_pointer_rtx,
11718 hard_frame_pointer_rtx,
11719 const0_rtx, style, !using_drap);
11720 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11721 }
11722 }
11723
11724 if (using_drap)
11725 {
11726 int param_ptr_offset = UNITS_PER_WORD;
11727 rtx insn;
11728
11729 gcc_assert (stack_realign_drap);
11730
11731 if (ix86_static_chain_on_stack)
11732 param_ptr_offset += UNITS_PER_WORD;
11733 if (!call_used_regs[REGNO (crtl->drap_reg)])
11734 param_ptr_offset += UNITS_PER_WORD;
11735
11736 insn = emit_insn (gen_rtx_SET
11737 (VOIDmode, stack_pointer_rtx,
11738 gen_rtx_PLUS (Pmode,
11739 crtl->drap_reg,
11740 GEN_INT (-param_ptr_offset))));
11741 m->fs.cfa_reg = stack_pointer_rtx;
11742 m->fs.cfa_offset = param_ptr_offset;
11743 m->fs.sp_offset = param_ptr_offset;
11744 m->fs.realigned = false;
11745
11746 add_reg_note (insn, REG_CFA_DEF_CFA,
11747 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11748 GEN_INT (param_ptr_offset)));
11749 RTX_FRAME_RELATED_P (insn) = 1;
11750
11751 if (!call_used_regs[REGNO (crtl->drap_reg)])
11752 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11753 }
11754
11755 /* At this point the stack pointer must be valid, and we must have
11756 restored all of the registers. We may not have deallocated the
11757 entire stack frame. We've delayed this until now because it may
11758 be possible to merge the local stack deallocation with the
11759 deallocation forced by ix86_static_chain_on_stack. */
11760 gcc_assert (m->fs.sp_valid);
11761 gcc_assert (!m->fs.fp_valid);
11762 gcc_assert (!m->fs.realigned);
11763 if (m->fs.sp_offset != UNITS_PER_WORD)
11764 {
11765 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11766 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11767 style, true);
11768 }
11769 else
11770 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11771
11772 /* Sibcall epilogues don't want a return instruction. */
11773 if (style == 0)
11774 {
11775 m->fs = frame_state_save;
11776 return;
11777 }
11778
11779 if (crtl->args.pops_args && crtl->args.size)
11780 {
11781 rtx popc = GEN_INT (crtl->args.pops_args);
11782
11783 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11784 address, do explicit add, and jump indirectly to the caller. */
11785
11786 if (crtl->args.pops_args >= 65536)
11787 {
11788 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11789 rtx insn;
11790
11791 /* There is no "pascal" calling convention in any 64bit ABI. */
11792 gcc_assert (!TARGET_64BIT);
11793
11794 insn = emit_insn (gen_pop (ecx));
11795 m->fs.cfa_offset -= UNITS_PER_WORD;
11796 m->fs.sp_offset -= UNITS_PER_WORD;
11797
11798 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11799 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11800 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11801 add_reg_note (insn, REG_CFA_REGISTER,
11802 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11803 RTX_FRAME_RELATED_P (insn) = 1;
11804
11805 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11806 popc, -1, true);
11807 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11808 }
11809 else
11810 emit_jump_insn (gen_simple_return_pop_internal (popc));
11811 }
11812 else
11813 emit_jump_insn (gen_simple_return_internal ());
11814
11815 /* Restore the state back to the state from the prologue,
11816 so that it's correct for the next epilogue. */
11817 m->fs = frame_state_save;
11818 }
11819
11820 /* Reset from the function's potential modifications. */
11821
11822 static void
11823 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11824 {
11825 if (pic_offset_table_rtx
11826 && !ix86_use_pseudo_pic_reg ())
11827 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11828 #if TARGET_MACHO
11829 /* Mach-O doesn't support labels at the end of objects, so if
11830 it looks like we might want one, insert a NOP. */
11831 {
11832 rtx_insn *insn = get_last_insn ();
11833 rtx_insn *deleted_debug_label = NULL;
11834 while (insn
11835 && NOTE_P (insn)
11836 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11837 {
11838 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11839 notes only, instead set their CODE_LABEL_NUMBER to -1,
11840 otherwise there would be code generation differences
11841 in between -g and -g0. */
11842 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11843 deleted_debug_label = insn;
11844 insn = PREV_INSN (insn);
11845 }
11846 if (insn
11847 && (LABEL_P (insn)
11848 || (NOTE_P (insn)
11849 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11850 fputs ("\tnop\n", file);
11851 else if (deleted_debug_label)
11852 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11853 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11854 CODE_LABEL_NUMBER (insn) = -1;
11855 }
11856 #endif
11857
11858 }
11859
11860 /* Return a scratch register to use in the split stack prologue. The
11861 split stack prologue is used for -fsplit-stack. It is the first
11862 instructions in the function, even before the regular prologue.
11863 The scratch register can be any caller-saved register which is not
11864 used for parameters or for the static chain. */
11865
11866 static unsigned int
11867 split_stack_prologue_scratch_regno (void)
11868 {
11869 if (TARGET_64BIT)
11870 return R11_REG;
11871 else
11872 {
11873 bool is_fastcall, is_thiscall;
11874 int regparm;
11875
11876 is_fastcall = (lookup_attribute ("fastcall",
11877 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11878 != NULL);
11879 is_thiscall = (lookup_attribute ("thiscall",
11880 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11881 != NULL);
11882 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11883
11884 if (is_fastcall)
11885 {
11886 if (DECL_STATIC_CHAIN (cfun->decl))
11887 {
11888 sorry ("-fsplit-stack does not support fastcall with "
11889 "nested function");
11890 return INVALID_REGNUM;
11891 }
11892 return AX_REG;
11893 }
11894 else if (is_thiscall)
11895 {
11896 if (!DECL_STATIC_CHAIN (cfun->decl))
11897 return DX_REG;
11898 return AX_REG;
11899 }
11900 else if (regparm < 3)
11901 {
11902 if (!DECL_STATIC_CHAIN (cfun->decl))
11903 return CX_REG;
11904 else
11905 {
11906 if (regparm >= 2)
11907 {
11908 sorry ("-fsplit-stack does not support 2 register "
11909 "parameters for a nested function");
11910 return INVALID_REGNUM;
11911 }
11912 return DX_REG;
11913 }
11914 }
11915 else
11916 {
11917 /* FIXME: We could make this work by pushing a register
11918 around the addition and comparison. */
11919 sorry ("-fsplit-stack does not support 3 register parameters");
11920 return INVALID_REGNUM;
11921 }
11922 }
11923 }
11924
11925 /* A SYMBOL_REF for the function which allocates new stackspace for
11926 -fsplit-stack. */
11927
11928 static GTY(()) rtx split_stack_fn;
11929
11930 /* A SYMBOL_REF for the more stack function when using the large
11931 model. */
11932
11933 static GTY(()) rtx split_stack_fn_large;
11934
11935 /* Handle -fsplit-stack. These are the first instructions in the
11936 function, even before the regular prologue. */
11937
11938 void
11939 ix86_expand_split_stack_prologue (void)
11940 {
11941 struct ix86_frame frame;
11942 HOST_WIDE_INT allocate;
11943 unsigned HOST_WIDE_INT args_size;
11944 rtx_code_label *label;
11945 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11946 rtx scratch_reg = NULL_RTX;
11947 rtx_code_label *varargs_label = NULL;
11948 rtx fn;
11949
11950 gcc_assert (flag_split_stack && reload_completed);
11951
11952 ix86_finalize_stack_realign_flags ();
11953 ix86_compute_frame_layout (&frame);
11954 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11955
11956 /* This is the label we will branch to if we have enough stack
11957 space. We expect the basic block reordering pass to reverse this
11958 branch if optimizing, so that we branch in the unlikely case. */
11959 label = gen_label_rtx ();
11960
11961 /* We need to compare the stack pointer minus the frame size with
11962 the stack boundary in the TCB. The stack boundary always gives
11963 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11964 can compare directly. Otherwise we need to do an addition. */
11965
11966 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11967 UNSPEC_STACK_CHECK);
11968 limit = gen_rtx_CONST (Pmode, limit);
11969 limit = gen_rtx_MEM (Pmode, limit);
11970 if (allocate < SPLIT_STACK_AVAILABLE)
11971 current = stack_pointer_rtx;
11972 else
11973 {
11974 unsigned int scratch_regno;
11975 rtx offset;
11976
11977 /* We need a scratch register to hold the stack pointer minus
11978 the required frame size. Since this is the very start of the
11979 function, the scratch register can be any caller-saved
11980 register which is not used for parameters. */
11981 offset = GEN_INT (- allocate);
11982 scratch_regno = split_stack_prologue_scratch_regno ();
11983 if (scratch_regno == INVALID_REGNUM)
11984 return;
11985 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11986 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11987 {
11988 /* We don't use ix86_gen_add3 in this case because it will
11989 want to split to lea, but when not optimizing the insn
11990 will not be split after this point. */
11991 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11992 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11993 offset)));
11994 }
11995 else
11996 {
11997 emit_move_insn (scratch_reg, offset);
11998 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11999 stack_pointer_rtx));
12000 }
12001 current = scratch_reg;
12002 }
12003
12004 ix86_expand_branch (GEU, current, limit, label);
12005 jump_insn = get_last_insn ();
12006 JUMP_LABEL (jump_insn) = label;
12007
12008 /* Mark the jump as very likely to be taken. */
12009 add_int_reg_note (jump_insn, REG_BR_PROB,
12010 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12011
12012 if (split_stack_fn == NULL_RTX)
12013 {
12014 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12015 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12016 }
12017 fn = split_stack_fn;
12018
12019 /* Get more stack space. We pass in the desired stack space and the
12020 size of the arguments to copy to the new stack. In 32-bit mode
12021 we push the parameters; __morestack will return on a new stack
12022 anyhow. In 64-bit mode we pass the parameters in r10 and
12023 r11. */
12024 allocate_rtx = GEN_INT (allocate);
12025 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12026 call_fusage = NULL_RTX;
12027 if (TARGET_64BIT)
12028 {
12029 rtx reg10, reg11;
12030
12031 reg10 = gen_rtx_REG (Pmode, R10_REG);
12032 reg11 = gen_rtx_REG (Pmode, R11_REG);
12033
12034 /* If this function uses a static chain, it will be in %r10.
12035 Preserve it across the call to __morestack. */
12036 if (DECL_STATIC_CHAIN (cfun->decl))
12037 {
12038 rtx rax;
12039
12040 rax = gen_rtx_REG (word_mode, AX_REG);
12041 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12042 use_reg (&call_fusage, rax);
12043 }
12044
12045 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12046 && !TARGET_PECOFF)
12047 {
12048 HOST_WIDE_INT argval;
12049
12050 gcc_assert (Pmode == DImode);
12051 /* When using the large model we need to load the address
12052 into a register, and we've run out of registers. So we
12053 switch to a different calling convention, and we call a
12054 different function: __morestack_large. We pass the
12055 argument size in the upper 32 bits of r10 and pass the
12056 frame size in the lower 32 bits. */
12057 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12058 gcc_assert ((args_size & 0xffffffff) == args_size);
12059
12060 if (split_stack_fn_large == NULL_RTX)
12061 {
12062 split_stack_fn_large =
12063 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12064 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12065 }
12066 if (ix86_cmodel == CM_LARGE_PIC)
12067 {
12068 rtx_code_label *label;
12069 rtx x;
12070
12071 label = gen_label_rtx ();
12072 emit_label (label);
12073 LABEL_PRESERVE_P (label) = 1;
12074 emit_insn (gen_set_rip_rex64 (reg10, label));
12075 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12076 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12077 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12078 UNSPEC_GOT);
12079 x = gen_rtx_CONST (Pmode, x);
12080 emit_move_insn (reg11, x);
12081 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12082 x = gen_const_mem (Pmode, x);
12083 emit_move_insn (reg11, x);
12084 }
12085 else
12086 emit_move_insn (reg11, split_stack_fn_large);
12087
12088 fn = reg11;
12089
12090 argval = ((args_size << 16) << 16) + allocate;
12091 emit_move_insn (reg10, GEN_INT (argval));
12092 }
12093 else
12094 {
12095 emit_move_insn (reg10, allocate_rtx);
12096 emit_move_insn (reg11, GEN_INT (args_size));
12097 use_reg (&call_fusage, reg11);
12098 }
12099
12100 use_reg (&call_fusage, reg10);
12101 }
12102 else
12103 {
12104 emit_insn (gen_push (GEN_INT (args_size)));
12105 emit_insn (gen_push (allocate_rtx));
12106 }
12107 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12108 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12109 NULL_RTX, false);
12110 add_function_usage_to (call_insn, call_fusage);
12111
12112 /* In order to make call/return prediction work right, we now need
12113 to execute a return instruction. See
12114 libgcc/config/i386/morestack.S for the details on how this works.
12115
12116 For flow purposes gcc must not see this as a return
12117 instruction--we need control flow to continue at the subsequent
12118 label. Therefore, we use an unspec. */
12119 gcc_assert (crtl->args.pops_args < 65536);
12120 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12121
12122 /* If we are in 64-bit mode and this function uses a static chain,
12123 we saved %r10 in %rax before calling _morestack. */
12124 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12125 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12126 gen_rtx_REG (word_mode, AX_REG));
12127
12128 /* If this function calls va_start, we need to store a pointer to
12129 the arguments on the old stack, because they may not have been
12130 all copied to the new stack. At this point the old stack can be
12131 found at the frame pointer value used by __morestack, because
12132 __morestack has set that up before calling back to us. Here we
12133 store that pointer in a scratch register, and in
12134 ix86_expand_prologue we store the scratch register in a stack
12135 slot. */
12136 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12137 {
12138 unsigned int scratch_regno;
12139 rtx frame_reg;
12140 int words;
12141
12142 scratch_regno = split_stack_prologue_scratch_regno ();
12143 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12144 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12145
12146 /* 64-bit:
12147 fp -> old fp value
12148 return address within this function
12149 return address of caller of this function
12150 stack arguments
12151 So we add three words to get to the stack arguments.
12152
12153 32-bit:
12154 fp -> old fp value
12155 return address within this function
12156 first argument to __morestack
12157 second argument to __morestack
12158 return address of caller of this function
12159 stack arguments
12160 So we add five words to get to the stack arguments.
12161 */
12162 words = TARGET_64BIT ? 3 : 5;
12163 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12164 gen_rtx_PLUS (Pmode, frame_reg,
12165 GEN_INT (words * UNITS_PER_WORD))));
12166
12167 varargs_label = gen_label_rtx ();
12168 emit_jump_insn (gen_jump (varargs_label));
12169 JUMP_LABEL (get_last_insn ()) = varargs_label;
12170
12171 emit_barrier ();
12172 }
12173
12174 emit_label (label);
12175 LABEL_NUSES (label) = 1;
12176
12177 /* If this function calls va_start, we now have to set the scratch
12178 register for the case where we do not call __morestack. In this
12179 case we need to set it based on the stack pointer. */
12180 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12181 {
12182 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12183 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12184 GEN_INT (UNITS_PER_WORD))));
12185
12186 emit_label (varargs_label);
12187 LABEL_NUSES (varargs_label) = 1;
12188 }
12189 }
12190
12191 /* We may have to tell the dataflow pass that the split stack prologue
12192 is initializing a scratch register. */
12193
12194 static void
12195 ix86_live_on_entry (bitmap regs)
12196 {
12197 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12198 {
12199 gcc_assert (flag_split_stack);
12200 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12201 }
12202 }
12203 \f
12204 /* Extract the parts of an RTL expression that is a valid memory address
12205 for an instruction. Return 0 if the structure of the address is
12206 grossly off. Return -1 if the address contains ASHIFT, so it is not
12207 strictly valid, but still used for computing length of lea instruction. */
12208
12209 int
12210 ix86_decompose_address (rtx addr, struct ix86_address *out)
12211 {
12212 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12213 rtx base_reg, index_reg;
12214 HOST_WIDE_INT scale = 1;
12215 rtx scale_rtx = NULL_RTX;
12216 rtx tmp;
12217 int retval = 1;
12218 enum ix86_address_seg seg = SEG_DEFAULT;
12219
12220 /* Allow zero-extended SImode addresses,
12221 they will be emitted with addr32 prefix. */
12222 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12223 {
12224 if (GET_CODE (addr) == ZERO_EXTEND
12225 && GET_MODE (XEXP (addr, 0)) == SImode)
12226 {
12227 addr = XEXP (addr, 0);
12228 if (CONST_INT_P (addr))
12229 return 0;
12230 }
12231 else if (GET_CODE (addr) == AND
12232 && const_32bit_mask (XEXP (addr, 1), DImode))
12233 {
12234 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12235 if (addr == NULL_RTX)
12236 return 0;
12237
12238 if (CONST_INT_P (addr))
12239 return 0;
12240 }
12241 }
12242
12243 /* Allow SImode subregs of DImode addresses,
12244 they will be emitted with addr32 prefix. */
12245 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12246 {
12247 if (GET_CODE (addr) == SUBREG
12248 && GET_MODE (SUBREG_REG (addr)) == DImode)
12249 {
12250 addr = SUBREG_REG (addr);
12251 if (CONST_INT_P (addr))
12252 return 0;
12253 }
12254 }
12255
12256 if (REG_P (addr))
12257 base = addr;
12258 else if (GET_CODE (addr) == SUBREG)
12259 {
12260 if (REG_P (SUBREG_REG (addr)))
12261 base = addr;
12262 else
12263 return 0;
12264 }
12265 else if (GET_CODE (addr) == PLUS)
12266 {
12267 rtx addends[4], op;
12268 int n = 0, i;
12269
12270 op = addr;
12271 do
12272 {
12273 if (n >= 4)
12274 return 0;
12275 addends[n++] = XEXP (op, 1);
12276 op = XEXP (op, 0);
12277 }
12278 while (GET_CODE (op) == PLUS);
12279 if (n >= 4)
12280 return 0;
12281 addends[n] = op;
12282
12283 for (i = n; i >= 0; --i)
12284 {
12285 op = addends[i];
12286 switch (GET_CODE (op))
12287 {
12288 case MULT:
12289 if (index)
12290 return 0;
12291 index = XEXP (op, 0);
12292 scale_rtx = XEXP (op, 1);
12293 break;
12294
12295 case ASHIFT:
12296 if (index)
12297 return 0;
12298 index = XEXP (op, 0);
12299 tmp = XEXP (op, 1);
12300 if (!CONST_INT_P (tmp))
12301 return 0;
12302 scale = INTVAL (tmp);
12303 if ((unsigned HOST_WIDE_INT) scale > 3)
12304 return 0;
12305 scale = 1 << scale;
12306 break;
12307
12308 case ZERO_EXTEND:
12309 op = XEXP (op, 0);
12310 if (GET_CODE (op) != UNSPEC)
12311 return 0;
12312 /* FALLTHRU */
12313
12314 case UNSPEC:
12315 if (XINT (op, 1) == UNSPEC_TP
12316 && TARGET_TLS_DIRECT_SEG_REFS
12317 && seg == SEG_DEFAULT)
12318 seg = DEFAULT_TLS_SEG_REG;
12319 else
12320 return 0;
12321 break;
12322
12323 case SUBREG:
12324 if (!REG_P (SUBREG_REG (op)))
12325 return 0;
12326 /* FALLTHRU */
12327
12328 case REG:
12329 if (!base)
12330 base = op;
12331 else if (!index)
12332 index = op;
12333 else
12334 return 0;
12335 break;
12336
12337 case CONST:
12338 case CONST_INT:
12339 case SYMBOL_REF:
12340 case LABEL_REF:
12341 if (disp)
12342 return 0;
12343 disp = op;
12344 break;
12345
12346 default:
12347 return 0;
12348 }
12349 }
12350 }
12351 else if (GET_CODE (addr) == MULT)
12352 {
12353 index = XEXP (addr, 0); /* index*scale */
12354 scale_rtx = XEXP (addr, 1);
12355 }
12356 else if (GET_CODE (addr) == ASHIFT)
12357 {
12358 /* We're called for lea too, which implements ashift on occasion. */
12359 index = XEXP (addr, 0);
12360 tmp = XEXP (addr, 1);
12361 if (!CONST_INT_P (tmp))
12362 return 0;
12363 scale = INTVAL (tmp);
12364 if ((unsigned HOST_WIDE_INT) scale > 3)
12365 return 0;
12366 scale = 1 << scale;
12367 retval = -1;
12368 }
12369 else
12370 disp = addr; /* displacement */
12371
12372 if (index)
12373 {
12374 if (REG_P (index))
12375 ;
12376 else if (GET_CODE (index) == SUBREG
12377 && REG_P (SUBREG_REG (index)))
12378 ;
12379 else
12380 return 0;
12381 }
12382
12383 /* Extract the integral value of scale. */
12384 if (scale_rtx)
12385 {
12386 if (!CONST_INT_P (scale_rtx))
12387 return 0;
12388 scale = INTVAL (scale_rtx);
12389 }
12390
12391 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12392 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12393
12394 /* Avoid useless 0 displacement. */
12395 if (disp == const0_rtx && (base || index))
12396 disp = NULL_RTX;
12397
12398 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12399 if (base_reg && index_reg && scale == 1
12400 && (index_reg == arg_pointer_rtx
12401 || index_reg == frame_pointer_rtx
12402 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12403 {
12404 rtx tmp;
12405 tmp = base, base = index, index = tmp;
12406 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12407 }
12408
12409 /* Special case: %ebp cannot be encoded as a base without a displacement.
12410 Similarly %r13. */
12411 if (!disp
12412 && base_reg
12413 && (base_reg == hard_frame_pointer_rtx
12414 || base_reg == frame_pointer_rtx
12415 || base_reg == arg_pointer_rtx
12416 || (REG_P (base_reg)
12417 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12418 || REGNO (base_reg) == R13_REG))))
12419 disp = const0_rtx;
12420
12421 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12422 Avoid this by transforming to [%esi+0].
12423 Reload calls address legitimization without cfun defined, so we need
12424 to test cfun for being non-NULL. */
12425 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12426 && base_reg && !index_reg && !disp
12427 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12428 disp = const0_rtx;
12429
12430 /* Special case: encode reg+reg instead of reg*2. */
12431 if (!base && index && scale == 2)
12432 base = index, base_reg = index_reg, scale = 1;
12433
12434 /* Special case: scaling cannot be encoded without base or displacement. */
12435 if (!base && !disp && index && scale != 1)
12436 disp = const0_rtx;
12437
12438 out->base = base;
12439 out->index = index;
12440 out->disp = disp;
12441 out->scale = scale;
12442 out->seg = seg;
12443
12444 return retval;
12445 }
12446 \f
12447 /* Return cost of the memory address x.
12448 For i386, it is better to use a complex address than let gcc copy
12449 the address into a reg and make a new pseudo. But not if the address
12450 requires to two regs - that would mean more pseudos with longer
12451 lifetimes. */
12452 static int
12453 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12454 {
12455 struct ix86_address parts;
12456 int cost = 1;
12457 int ok = ix86_decompose_address (x, &parts);
12458
12459 gcc_assert (ok);
12460
12461 if (parts.base && GET_CODE (parts.base) == SUBREG)
12462 parts.base = SUBREG_REG (parts.base);
12463 if (parts.index && GET_CODE (parts.index) == SUBREG)
12464 parts.index = SUBREG_REG (parts.index);
12465
12466 /* Attempt to minimize number of registers in the address. */
12467 if ((parts.base
12468 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12469 || (parts.index
12470 && (!REG_P (parts.index)
12471 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12472 cost++;
12473
12474 /* When address base or index is "pic_offset_table_rtx" we don't increase
12475 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12476 itself it most likely means that base or index is not invariant.
12477 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12478 profitable for x86. */
12479 if (parts.base
12480 && (!pic_offset_table_rtx
12481 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12482 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12483 && parts.index
12484 && (!pic_offset_table_rtx
12485 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12486 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12487 && parts.base != parts.index)
12488 cost++;
12489
12490 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12491 since it's predecode logic can't detect the length of instructions
12492 and it degenerates to vector decoded. Increase cost of such
12493 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12494 to split such addresses or even refuse such addresses at all.
12495
12496 Following addressing modes are affected:
12497 [base+scale*index]
12498 [scale*index+disp]
12499 [base+index]
12500
12501 The first and last case may be avoidable by explicitly coding the zero in
12502 memory address, but I don't have AMD-K6 machine handy to check this
12503 theory. */
12504
12505 if (TARGET_K6
12506 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12507 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12508 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12509 cost += 10;
12510
12511 return cost;
12512 }
12513 \f
12514 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12515 this is used for to form addresses to local data when -fPIC is in
12516 use. */
12517
12518 static bool
12519 darwin_local_data_pic (rtx disp)
12520 {
12521 return (GET_CODE (disp) == UNSPEC
12522 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12523 }
12524
12525 /* Determine if a given RTX is a valid constant. We already know this
12526 satisfies CONSTANT_P. */
12527
12528 static bool
12529 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12530 {
12531 switch (GET_CODE (x))
12532 {
12533 case CONST:
12534 x = XEXP (x, 0);
12535
12536 if (GET_CODE (x) == PLUS)
12537 {
12538 if (!CONST_INT_P (XEXP (x, 1)))
12539 return false;
12540 x = XEXP (x, 0);
12541 }
12542
12543 if (TARGET_MACHO && darwin_local_data_pic (x))
12544 return true;
12545
12546 /* Only some unspecs are valid as "constants". */
12547 if (GET_CODE (x) == UNSPEC)
12548 switch (XINT (x, 1))
12549 {
12550 case UNSPEC_GOT:
12551 case UNSPEC_GOTOFF:
12552 case UNSPEC_PLTOFF:
12553 return TARGET_64BIT;
12554 case UNSPEC_TPOFF:
12555 case UNSPEC_NTPOFF:
12556 x = XVECEXP (x, 0, 0);
12557 return (GET_CODE (x) == SYMBOL_REF
12558 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12559 case UNSPEC_DTPOFF:
12560 x = XVECEXP (x, 0, 0);
12561 return (GET_CODE (x) == SYMBOL_REF
12562 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12563 default:
12564 return false;
12565 }
12566
12567 /* We must have drilled down to a symbol. */
12568 if (GET_CODE (x) == LABEL_REF)
12569 return true;
12570 if (GET_CODE (x) != SYMBOL_REF)
12571 return false;
12572 /* FALLTHRU */
12573
12574 case SYMBOL_REF:
12575 /* TLS symbols are never valid. */
12576 if (SYMBOL_REF_TLS_MODEL (x))
12577 return false;
12578
12579 /* DLLIMPORT symbols are never valid. */
12580 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12581 && SYMBOL_REF_DLLIMPORT_P (x))
12582 return false;
12583
12584 #if TARGET_MACHO
12585 /* mdynamic-no-pic */
12586 if (MACHO_DYNAMIC_NO_PIC_P)
12587 return machopic_symbol_defined_p (x);
12588 #endif
12589 break;
12590
12591 case CONST_DOUBLE:
12592 if (GET_MODE (x) == TImode
12593 && x != CONST0_RTX (TImode)
12594 && !TARGET_64BIT)
12595 return false;
12596 break;
12597
12598 case CONST_VECTOR:
12599 if (!standard_sse_constant_p (x))
12600 return false;
12601
12602 default:
12603 break;
12604 }
12605
12606 /* Otherwise we handle everything else in the move patterns. */
12607 return true;
12608 }
12609
12610 /* Determine if it's legal to put X into the constant pool. This
12611 is not possible for the address of thread-local symbols, which
12612 is checked above. */
12613
12614 static bool
12615 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12616 {
12617 /* We can always put integral constants and vectors in memory. */
12618 switch (GET_CODE (x))
12619 {
12620 case CONST_INT:
12621 case CONST_DOUBLE:
12622 case CONST_VECTOR:
12623 return false;
12624
12625 default:
12626 break;
12627 }
12628 return !ix86_legitimate_constant_p (mode, x);
12629 }
12630
12631 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12632 otherwise zero. */
12633
12634 static bool
12635 is_imported_p (rtx x)
12636 {
12637 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12638 || GET_CODE (x) != SYMBOL_REF)
12639 return false;
12640
12641 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12642 }
12643
12644
12645 /* Nonzero if the constant value X is a legitimate general operand
12646 when generating PIC code. It is given that flag_pic is on and
12647 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12648
12649 bool
12650 legitimate_pic_operand_p (rtx x)
12651 {
12652 rtx inner;
12653
12654 switch (GET_CODE (x))
12655 {
12656 case CONST:
12657 inner = XEXP (x, 0);
12658 if (GET_CODE (inner) == PLUS
12659 && CONST_INT_P (XEXP (inner, 1)))
12660 inner = XEXP (inner, 0);
12661
12662 /* Only some unspecs are valid as "constants". */
12663 if (GET_CODE (inner) == UNSPEC)
12664 switch (XINT (inner, 1))
12665 {
12666 case UNSPEC_GOT:
12667 case UNSPEC_GOTOFF:
12668 case UNSPEC_PLTOFF:
12669 return TARGET_64BIT;
12670 case UNSPEC_TPOFF:
12671 x = XVECEXP (inner, 0, 0);
12672 return (GET_CODE (x) == SYMBOL_REF
12673 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12674 case UNSPEC_MACHOPIC_OFFSET:
12675 return legitimate_pic_address_disp_p (x);
12676 default:
12677 return false;
12678 }
12679 /* FALLTHRU */
12680
12681 case SYMBOL_REF:
12682 case LABEL_REF:
12683 return legitimate_pic_address_disp_p (x);
12684
12685 default:
12686 return true;
12687 }
12688 }
12689
12690 /* Determine if a given CONST RTX is a valid memory displacement
12691 in PIC mode. */
12692
12693 bool
12694 legitimate_pic_address_disp_p (rtx disp)
12695 {
12696 bool saw_plus;
12697
12698 /* In 64bit mode we can allow direct addresses of symbols and labels
12699 when they are not dynamic symbols. */
12700 if (TARGET_64BIT)
12701 {
12702 rtx op0 = disp, op1;
12703
12704 switch (GET_CODE (disp))
12705 {
12706 case LABEL_REF:
12707 return true;
12708
12709 case CONST:
12710 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12711 break;
12712 op0 = XEXP (XEXP (disp, 0), 0);
12713 op1 = XEXP (XEXP (disp, 0), 1);
12714 if (!CONST_INT_P (op1)
12715 || INTVAL (op1) >= 16*1024*1024
12716 || INTVAL (op1) < -16*1024*1024)
12717 break;
12718 if (GET_CODE (op0) == LABEL_REF)
12719 return true;
12720 if (GET_CODE (op0) == CONST
12721 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12722 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12723 return true;
12724 if (GET_CODE (op0) == UNSPEC
12725 && XINT (op0, 1) == UNSPEC_PCREL)
12726 return true;
12727 if (GET_CODE (op0) != SYMBOL_REF)
12728 break;
12729 /* FALLTHRU */
12730
12731 case SYMBOL_REF:
12732 /* TLS references should always be enclosed in UNSPEC.
12733 The dllimported symbol needs always to be resolved. */
12734 if (SYMBOL_REF_TLS_MODEL (op0)
12735 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12736 return false;
12737
12738 if (TARGET_PECOFF)
12739 {
12740 if (is_imported_p (op0))
12741 return true;
12742
12743 if (SYMBOL_REF_FAR_ADDR_P (op0)
12744 || !SYMBOL_REF_LOCAL_P (op0))
12745 break;
12746
12747 /* Function-symbols need to be resolved only for
12748 large-model.
12749 For the small-model we don't need to resolve anything
12750 here. */
12751 if ((ix86_cmodel != CM_LARGE_PIC
12752 && SYMBOL_REF_FUNCTION_P (op0))
12753 || ix86_cmodel == CM_SMALL_PIC)
12754 return true;
12755 /* Non-external symbols don't need to be resolved for
12756 large, and medium-model. */
12757 if ((ix86_cmodel == CM_LARGE_PIC
12758 || ix86_cmodel == CM_MEDIUM_PIC)
12759 && !SYMBOL_REF_EXTERNAL_P (op0))
12760 return true;
12761 }
12762 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12763 && SYMBOL_REF_LOCAL_P (op0)
12764 && ix86_cmodel != CM_LARGE_PIC)
12765 return true;
12766 break;
12767
12768 default:
12769 break;
12770 }
12771 }
12772 if (GET_CODE (disp) != CONST)
12773 return false;
12774 disp = XEXP (disp, 0);
12775
12776 if (TARGET_64BIT)
12777 {
12778 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12779 of GOT tables. We should not need these anyway. */
12780 if (GET_CODE (disp) != UNSPEC
12781 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12782 && XINT (disp, 1) != UNSPEC_GOTOFF
12783 && XINT (disp, 1) != UNSPEC_PCREL
12784 && XINT (disp, 1) != UNSPEC_PLTOFF))
12785 return false;
12786
12787 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12788 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12789 return false;
12790 return true;
12791 }
12792
12793 saw_plus = false;
12794 if (GET_CODE (disp) == PLUS)
12795 {
12796 if (!CONST_INT_P (XEXP (disp, 1)))
12797 return false;
12798 disp = XEXP (disp, 0);
12799 saw_plus = true;
12800 }
12801
12802 if (TARGET_MACHO && darwin_local_data_pic (disp))
12803 return true;
12804
12805 if (GET_CODE (disp) != UNSPEC)
12806 return false;
12807
12808 switch (XINT (disp, 1))
12809 {
12810 case UNSPEC_GOT:
12811 if (saw_plus)
12812 return false;
12813 /* We need to check for both symbols and labels because VxWorks loads
12814 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12815 details. */
12816 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12817 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12818 case UNSPEC_GOTOFF:
12819 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12820 While ABI specify also 32bit relocation but we don't produce it in
12821 small PIC model at all. */
12822 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12823 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12824 && !TARGET_64BIT)
12825 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12826 return false;
12827 case UNSPEC_GOTTPOFF:
12828 case UNSPEC_GOTNTPOFF:
12829 case UNSPEC_INDNTPOFF:
12830 if (saw_plus)
12831 return false;
12832 disp = XVECEXP (disp, 0, 0);
12833 return (GET_CODE (disp) == SYMBOL_REF
12834 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12835 case UNSPEC_NTPOFF:
12836 disp = XVECEXP (disp, 0, 0);
12837 return (GET_CODE (disp) == SYMBOL_REF
12838 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12839 case UNSPEC_DTPOFF:
12840 disp = XVECEXP (disp, 0, 0);
12841 return (GET_CODE (disp) == SYMBOL_REF
12842 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12843 }
12844
12845 return false;
12846 }
12847
12848 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12849 replace the input X, or the original X if no replacement is called for.
12850 The output parameter *WIN is 1 if the calling macro should goto WIN,
12851 0 if it should not. */
12852
12853 bool
12854 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12855 int)
12856 {
12857 /* Reload can generate:
12858
12859 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12860 (reg:DI 97))
12861 (reg:DI 2 cx))
12862
12863 This RTX is rejected from ix86_legitimate_address_p due to
12864 non-strictness of base register 97. Following this rejection,
12865 reload pushes all three components into separate registers,
12866 creating invalid memory address RTX.
12867
12868 Following code reloads only the invalid part of the
12869 memory address RTX. */
12870
12871 if (GET_CODE (x) == PLUS
12872 && REG_P (XEXP (x, 1))
12873 && GET_CODE (XEXP (x, 0)) == PLUS
12874 && REG_P (XEXP (XEXP (x, 0), 1)))
12875 {
12876 rtx base, index;
12877 bool something_reloaded = false;
12878
12879 base = XEXP (XEXP (x, 0), 1);
12880 if (!REG_OK_FOR_BASE_STRICT_P (base))
12881 {
12882 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12883 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12884 opnum, (enum reload_type) type);
12885 something_reloaded = true;
12886 }
12887
12888 index = XEXP (x, 1);
12889 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12890 {
12891 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12892 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12893 opnum, (enum reload_type) type);
12894 something_reloaded = true;
12895 }
12896
12897 gcc_assert (something_reloaded);
12898 return true;
12899 }
12900
12901 return false;
12902 }
12903
12904 /* Determine if op is suitable RTX for an address register.
12905 Return naked register if a register or a register subreg is
12906 found, otherwise return NULL_RTX. */
12907
12908 static rtx
12909 ix86_validate_address_register (rtx op)
12910 {
12911 enum machine_mode mode = GET_MODE (op);
12912
12913 /* Only SImode or DImode registers can form the address. */
12914 if (mode != SImode && mode != DImode)
12915 return NULL_RTX;
12916
12917 if (REG_P (op))
12918 return op;
12919 else if (GET_CODE (op) == SUBREG)
12920 {
12921 rtx reg = SUBREG_REG (op);
12922
12923 if (!REG_P (reg))
12924 return NULL_RTX;
12925
12926 mode = GET_MODE (reg);
12927
12928 /* Don't allow SUBREGs that span more than a word. It can
12929 lead to spill failures when the register is one word out
12930 of a two word structure. */
12931 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12932 return NULL_RTX;
12933
12934 /* Allow only SUBREGs of non-eliminable hard registers. */
12935 if (register_no_elim_operand (reg, mode))
12936 return reg;
12937 }
12938
12939 /* Op is not a register. */
12940 return NULL_RTX;
12941 }
12942
12943 /* Recognizes RTL expressions that are valid memory addresses for an
12944 instruction. The MODE argument is the machine mode for the MEM
12945 expression that wants to use this address.
12946
12947 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12948 convert common non-canonical forms to canonical form so that they will
12949 be recognized. */
12950
12951 static bool
12952 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12953 {
12954 struct ix86_address parts;
12955 rtx base, index, disp;
12956 HOST_WIDE_INT scale;
12957 enum ix86_address_seg seg;
12958
12959 if (ix86_decompose_address (addr, &parts) <= 0)
12960 /* Decomposition failed. */
12961 return false;
12962
12963 base = parts.base;
12964 index = parts.index;
12965 disp = parts.disp;
12966 scale = parts.scale;
12967 seg = parts.seg;
12968
12969 /* Validate base register. */
12970 if (base)
12971 {
12972 rtx reg = ix86_validate_address_register (base);
12973
12974 if (reg == NULL_RTX)
12975 return false;
12976
12977 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12978 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12979 /* Base is not valid. */
12980 return false;
12981 }
12982
12983 /* Validate index register. */
12984 if (index)
12985 {
12986 rtx reg = ix86_validate_address_register (index);
12987
12988 if (reg == NULL_RTX)
12989 return false;
12990
12991 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12992 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12993 /* Index is not valid. */
12994 return false;
12995 }
12996
12997 /* Index and base should have the same mode. */
12998 if (base && index
12999 && GET_MODE (base) != GET_MODE (index))
13000 return false;
13001
13002 /* Address override works only on the (%reg) part of %fs:(%reg). */
13003 if (seg != SEG_DEFAULT
13004 && ((base && GET_MODE (base) != word_mode)
13005 || (index && GET_MODE (index) != word_mode)))
13006 return false;
13007
13008 /* Validate scale factor. */
13009 if (scale != 1)
13010 {
13011 if (!index)
13012 /* Scale without index. */
13013 return false;
13014
13015 if (scale != 2 && scale != 4 && scale != 8)
13016 /* Scale is not a valid multiplier. */
13017 return false;
13018 }
13019
13020 /* Validate displacement. */
13021 if (disp)
13022 {
13023 if (GET_CODE (disp) == CONST
13024 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13025 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13026 switch (XINT (XEXP (disp, 0), 1))
13027 {
13028 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13029 used. While ABI specify also 32bit relocations, we don't produce
13030 them at all and use IP relative instead. */
13031 case UNSPEC_GOT:
13032 case UNSPEC_GOTOFF:
13033 gcc_assert (flag_pic);
13034 if (!TARGET_64BIT)
13035 goto is_legitimate_pic;
13036
13037 /* 64bit address unspec. */
13038 return false;
13039
13040 case UNSPEC_GOTPCREL:
13041 case UNSPEC_PCREL:
13042 gcc_assert (flag_pic);
13043 goto is_legitimate_pic;
13044
13045 case UNSPEC_GOTTPOFF:
13046 case UNSPEC_GOTNTPOFF:
13047 case UNSPEC_INDNTPOFF:
13048 case UNSPEC_NTPOFF:
13049 case UNSPEC_DTPOFF:
13050 break;
13051
13052 case UNSPEC_STACK_CHECK:
13053 gcc_assert (flag_split_stack);
13054 break;
13055
13056 default:
13057 /* Invalid address unspec. */
13058 return false;
13059 }
13060
13061 else if (SYMBOLIC_CONST (disp)
13062 && (flag_pic
13063 || (TARGET_MACHO
13064 #if TARGET_MACHO
13065 && MACHOPIC_INDIRECT
13066 && !machopic_operand_p (disp)
13067 #endif
13068 )))
13069 {
13070
13071 is_legitimate_pic:
13072 if (TARGET_64BIT && (index || base))
13073 {
13074 /* foo@dtpoff(%rX) is ok. */
13075 if (GET_CODE (disp) != CONST
13076 || GET_CODE (XEXP (disp, 0)) != PLUS
13077 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13078 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13079 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13080 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13081 /* Non-constant pic memory reference. */
13082 return false;
13083 }
13084 else if ((!TARGET_MACHO || flag_pic)
13085 && ! legitimate_pic_address_disp_p (disp))
13086 /* Displacement is an invalid pic construct. */
13087 return false;
13088 #if TARGET_MACHO
13089 else if (MACHO_DYNAMIC_NO_PIC_P
13090 && !ix86_legitimate_constant_p (Pmode, disp))
13091 /* displacment must be referenced via non_lazy_pointer */
13092 return false;
13093 #endif
13094
13095 /* This code used to verify that a symbolic pic displacement
13096 includes the pic_offset_table_rtx register.
13097
13098 While this is good idea, unfortunately these constructs may
13099 be created by "adds using lea" optimization for incorrect
13100 code like:
13101
13102 int a;
13103 int foo(int i)
13104 {
13105 return *(&a+i);
13106 }
13107
13108 This code is nonsensical, but results in addressing
13109 GOT table with pic_offset_table_rtx base. We can't
13110 just refuse it easily, since it gets matched by
13111 "addsi3" pattern, that later gets split to lea in the
13112 case output register differs from input. While this
13113 can be handled by separate addsi pattern for this case
13114 that never results in lea, this seems to be easier and
13115 correct fix for crash to disable this test. */
13116 }
13117 else if (GET_CODE (disp) != LABEL_REF
13118 && !CONST_INT_P (disp)
13119 && (GET_CODE (disp) != CONST
13120 || !ix86_legitimate_constant_p (Pmode, disp))
13121 && (GET_CODE (disp) != SYMBOL_REF
13122 || !ix86_legitimate_constant_p (Pmode, disp)))
13123 /* Displacement is not constant. */
13124 return false;
13125 else if (TARGET_64BIT
13126 && !x86_64_immediate_operand (disp, VOIDmode))
13127 /* Displacement is out of range. */
13128 return false;
13129 /* In x32 mode, constant addresses are sign extended to 64bit, so
13130 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13131 else if (TARGET_X32 && !(index || base)
13132 && CONST_INT_P (disp)
13133 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13134 return false;
13135 }
13136
13137 /* Everything looks valid. */
13138 return true;
13139 }
13140
13141 /* Determine if a given RTX is a valid constant address. */
13142
13143 bool
13144 constant_address_p (rtx x)
13145 {
13146 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13147 }
13148 \f
13149 /* Return a unique alias set for the GOT. */
13150
13151 static alias_set_type
13152 ix86_GOT_alias_set (void)
13153 {
13154 static alias_set_type set = -1;
13155 if (set == -1)
13156 set = new_alias_set ();
13157 return set;
13158 }
13159
13160 /* Set regs_ever_live for PIC base address register
13161 to true if required. */
13162 static void
13163 set_pic_reg_ever_live ()
13164 {
13165 if (reload_in_progress)
13166 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13167 }
13168
13169 /* Return a legitimate reference for ORIG (an address) using the
13170 register REG. If REG is 0, a new pseudo is generated.
13171
13172 There are two types of references that must be handled:
13173
13174 1. Global data references must load the address from the GOT, via
13175 the PIC reg. An insn is emitted to do this load, and the reg is
13176 returned.
13177
13178 2. Static data references, constant pool addresses, and code labels
13179 compute the address as an offset from the GOT, whose base is in
13180 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13181 differentiate them from global data objects. The returned
13182 address is the PIC reg + an unspec constant.
13183
13184 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13185 reg also appears in the address. */
13186
13187 static rtx
13188 legitimize_pic_address (rtx orig, rtx reg)
13189 {
13190 rtx addr = orig;
13191 rtx new_rtx = orig;
13192
13193 #if TARGET_MACHO
13194 if (TARGET_MACHO && !TARGET_64BIT)
13195 {
13196 if (reg == 0)
13197 reg = gen_reg_rtx (Pmode);
13198 /* Use the generic Mach-O PIC machinery. */
13199 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13200 }
13201 #endif
13202
13203 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13204 {
13205 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13206 if (tmp)
13207 return tmp;
13208 }
13209
13210 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13211 new_rtx = addr;
13212 else if (TARGET_64BIT && !TARGET_PECOFF
13213 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13214 {
13215 rtx tmpreg;
13216 /* This symbol may be referenced via a displacement from the PIC
13217 base address (@GOTOFF). */
13218
13219 set_pic_reg_ever_live ();
13220 if (GET_CODE (addr) == CONST)
13221 addr = XEXP (addr, 0);
13222 if (GET_CODE (addr) == PLUS)
13223 {
13224 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13225 UNSPEC_GOTOFF);
13226 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13227 }
13228 else
13229 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13230 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13231 if (!reg)
13232 tmpreg = gen_reg_rtx (Pmode);
13233 else
13234 tmpreg = reg;
13235 emit_move_insn (tmpreg, new_rtx);
13236
13237 if (reg != 0)
13238 {
13239 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13240 tmpreg, 1, OPTAB_DIRECT);
13241 new_rtx = reg;
13242 }
13243 else
13244 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13245 }
13246 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13247 {
13248 /* This symbol may be referenced via a displacement from the PIC
13249 base address (@GOTOFF). */
13250
13251 set_pic_reg_ever_live ();
13252 if (GET_CODE (addr) == CONST)
13253 addr = XEXP (addr, 0);
13254 if (GET_CODE (addr) == PLUS)
13255 {
13256 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13257 UNSPEC_GOTOFF);
13258 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13259 }
13260 else
13261 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13262 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13263 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13264
13265 if (reg != 0)
13266 {
13267 emit_move_insn (reg, new_rtx);
13268 new_rtx = reg;
13269 }
13270 }
13271 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13272 /* We can't use @GOTOFF for text labels on VxWorks;
13273 see gotoff_operand. */
13274 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13275 {
13276 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13277 if (tmp)
13278 return tmp;
13279
13280 /* For x64 PE-COFF there is no GOT table. So we use address
13281 directly. */
13282 if (TARGET_64BIT && TARGET_PECOFF)
13283 {
13284 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13285 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13286
13287 if (reg == 0)
13288 reg = gen_reg_rtx (Pmode);
13289 emit_move_insn (reg, new_rtx);
13290 new_rtx = reg;
13291 }
13292 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13293 {
13294 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13295 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13296 new_rtx = gen_const_mem (Pmode, new_rtx);
13297 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13298
13299 if (reg == 0)
13300 reg = gen_reg_rtx (Pmode);
13301 /* Use directly gen_movsi, otherwise the address is loaded
13302 into register for CSE. We don't want to CSE this addresses,
13303 instead we CSE addresses from the GOT table, so skip this. */
13304 emit_insn (gen_movsi (reg, new_rtx));
13305 new_rtx = reg;
13306 }
13307 else
13308 {
13309 /* This symbol must be referenced via a load from the
13310 Global Offset Table (@GOT). */
13311
13312 set_pic_reg_ever_live ();
13313 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13314 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13315 if (TARGET_64BIT)
13316 new_rtx = force_reg (Pmode, new_rtx);
13317 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13318 new_rtx = gen_const_mem (Pmode, new_rtx);
13319 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13320
13321 if (reg == 0)
13322 reg = gen_reg_rtx (Pmode);
13323 emit_move_insn (reg, new_rtx);
13324 new_rtx = reg;
13325 }
13326 }
13327 else
13328 {
13329 if (CONST_INT_P (addr)
13330 && !x86_64_immediate_operand (addr, VOIDmode))
13331 {
13332 if (reg)
13333 {
13334 emit_move_insn (reg, addr);
13335 new_rtx = reg;
13336 }
13337 else
13338 new_rtx = force_reg (Pmode, addr);
13339 }
13340 else if (GET_CODE (addr) == CONST)
13341 {
13342 addr = XEXP (addr, 0);
13343
13344 /* We must match stuff we generate before. Assume the only
13345 unspecs that can get here are ours. Not that we could do
13346 anything with them anyway.... */
13347 if (GET_CODE (addr) == UNSPEC
13348 || (GET_CODE (addr) == PLUS
13349 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13350 return orig;
13351 gcc_assert (GET_CODE (addr) == PLUS);
13352 }
13353 if (GET_CODE (addr) == PLUS)
13354 {
13355 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13356
13357 /* Check first to see if this is a constant offset from a @GOTOFF
13358 symbol reference. */
13359 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13360 && CONST_INT_P (op1))
13361 {
13362 if (!TARGET_64BIT)
13363 {
13364 set_pic_reg_ever_live ();
13365 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13366 UNSPEC_GOTOFF);
13367 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13368 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13369 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13370
13371 if (reg != 0)
13372 {
13373 emit_move_insn (reg, new_rtx);
13374 new_rtx = reg;
13375 }
13376 }
13377 else
13378 {
13379 if (INTVAL (op1) < -16*1024*1024
13380 || INTVAL (op1) >= 16*1024*1024)
13381 {
13382 if (!x86_64_immediate_operand (op1, Pmode))
13383 op1 = force_reg (Pmode, op1);
13384 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13385 }
13386 }
13387 }
13388 else
13389 {
13390 rtx base = legitimize_pic_address (op0, reg);
13391 enum machine_mode mode = GET_MODE (base);
13392 new_rtx
13393 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13394
13395 if (CONST_INT_P (new_rtx))
13396 {
13397 if (INTVAL (new_rtx) < -16*1024*1024
13398 || INTVAL (new_rtx) >= 16*1024*1024)
13399 {
13400 if (!x86_64_immediate_operand (new_rtx, mode))
13401 new_rtx = force_reg (mode, new_rtx);
13402 new_rtx
13403 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13404 }
13405 else
13406 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13407 }
13408 else
13409 {
13410 if (GET_CODE (new_rtx) == PLUS
13411 && CONSTANT_P (XEXP (new_rtx, 1)))
13412 {
13413 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13414 new_rtx = XEXP (new_rtx, 1);
13415 }
13416 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13417 }
13418 }
13419 }
13420 }
13421 return new_rtx;
13422 }
13423 \f
13424 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13425
13426 static rtx
13427 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13428 {
13429 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13430
13431 if (GET_MODE (tp) != tp_mode)
13432 {
13433 gcc_assert (GET_MODE (tp) == SImode);
13434 gcc_assert (tp_mode == DImode);
13435
13436 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13437 }
13438
13439 if (to_reg)
13440 tp = copy_to_mode_reg (tp_mode, tp);
13441
13442 return tp;
13443 }
13444
13445 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13446
13447 static GTY(()) rtx ix86_tls_symbol;
13448
13449 static rtx
13450 ix86_tls_get_addr (void)
13451 {
13452 if (!ix86_tls_symbol)
13453 {
13454 const char *sym
13455 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13456 ? "___tls_get_addr" : "__tls_get_addr");
13457
13458 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13459 }
13460
13461 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13462 {
13463 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13464 UNSPEC_PLTOFF);
13465 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13466 gen_rtx_CONST (Pmode, unspec));
13467 }
13468
13469 return ix86_tls_symbol;
13470 }
13471
13472 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13473
13474 static GTY(()) rtx ix86_tls_module_base_symbol;
13475
13476 rtx
13477 ix86_tls_module_base (void)
13478 {
13479 if (!ix86_tls_module_base_symbol)
13480 {
13481 ix86_tls_module_base_symbol
13482 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13483
13484 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13485 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13486 }
13487
13488 return ix86_tls_module_base_symbol;
13489 }
13490
13491 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13492 false if we expect this to be used for a memory address and true if
13493 we expect to load the address into a register. */
13494
13495 static rtx
13496 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13497 {
13498 rtx dest, base, off;
13499 rtx pic = NULL_RTX, tp = NULL_RTX;
13500 enum machine_mode tp_mode = Pmode;
13501 int type;
13502
13503 /* Fall back to global dynamic model if tool chain cannot support local
13504 dynamic. */
13505 if (TARGET_SUN_TLS && !TARGET_64BIT
13506 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13507 && model == TLS_MODEL_LOCAL_DYNAMIC)
13508 model = TLS_MODEL_GLOBAL_DYNAMIC;
13509
13510 switch (model)
13511 {
13512 case TLS_MODEL_GLOBAL_DYNAMIC:
13513 dest = gen_reg_rtx (Pmode);
13514
13515 if (!TARGET_64BIT)
13516 {
13517 if (flag_pic && !TARGET_PECOFF)
13518 pic = pic_offset_table_rtx;
13519 else
13520 {
13521 pic = gen_reg_rtx (Pmode);
13522 emit_insn (gen_set_got (pic));
13523 }
13524 }
13525
13526 if (TARGET_GNU2_TLS)
13527 {
13528 if (TARGET_64BIT)
13529 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13530 else
13531 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13532
13533 tp = get_thread_pointer (Pmode, true);
13534 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13535
13536 if (GET_MODE (x) != Pmode)
13537 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13538
13539 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13540 }
13541 else
13542 {
13543 rtx caddr = ix86_tls_get_addr ();
13544
13545 if (TARGET_64BIT)
13546 {
13547 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13548 rtx_insn *insns;
13549
13550 start_sequence ();
13551 emit_call_insn
13552 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13553 insns = get_insns ();
13554 end_sequence ();
13555
13556 if (GET_MODE (x) != Pmode)
13557 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13558
13559 RTL_CONST_CALL_P (insns) = 1;
13560 emit_libcall_block (insns, dest, rax, x);
13561 }
13562 else
13563 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13564 }
13565 break;
13566
13567 case TLS_MODEL_LOCAL_DYNAMIC:
13568 base = gen_reg_rtx (Pmode);
13569
13570 if (!TARGET_64BIT)
13571 {
13572 if (flag_pic)
13573 pic = pic_offset_table_rtx;
13574 else
13575 {
13576 pic = gen_reg_rtx (Pmode);
13577 emit_insn (gen_set_got (pic));
13578 }
13579 }
13580
13581 if (TARGET_GNU2_TLS)
13582 {
13583 rtx tmp = ix86_tls_module_base ();
13584
13585 if (TARGET_64BIT)
13586 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13587 else
13588 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13589
13590 tp = get_thread_pointer (Pmode, true);
13591 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13592 gen_rtx_MINUS (Pmode, tmp, tp));
13593 }
13594 else
13595 {
13596 rtx caddr = ix86_tls_get_addr ();
13597
13598 if (TARGET_64BIT)
13599 {
13600 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13601 rtx_insn *insns;
13602 rtx eqv;
13603
13604 start_sequence ();
13605 emit_call_insn
13606 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13607 insns = get_insns ();
13608 end_sequence ();
13609
13610 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13611 share the LD_BASE result with other LD model accesses. */
13612 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13613 UNSPEC_TLS_LD_BASE);
13614
13615 RTL_CONST_CALL_P (insns) = 1;
13616 emit_libcall_block (insns, base, rax, eqv);
13617 }
13618 else
13619 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13620 }
13621
13622 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13623 off = gen_rtx_CONST (Pmode, off);
13624
13625 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13626
13627 if (TARGET_GNU2_TLS)
13628 {
13629 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13630
13631 if (GET_MODE (x) != Pmode)
13632 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13633
13634 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13635 }
13636 break;
13637
13638 case TLS_MODEL_INITIAL_EXEC:
13639 if (TARGET_64BIT)
13640 {
13641 if (TARGET_SUN_TLS && !TARGET_X32)
13642 {
13643 /* The Sun linker took the AMD64 TLS spec literally
13644 and can only handle %rax as destination of the
13645 initial executable code sequence. */
13646
13647 dest = gen_reg_rtx (DImode);
13648 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13649 return dest;
13650 }
13651
13652 /* Generate DImode references to avoid %fs:(%reg32)
13653 problems and linker IE->LE relaxation bug. */
13654 tp_mode = DImode;
13655 pic = NULL;
13656 type = UNSPEC_GOTNTPOFF;
13657 }
13658 else if (flag_pic)
13659 {
13660 set_pic_reg_ever_live ();
13661 pic = pic_offset_table_rtx;
13662 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13663 }
13664 else if (!TARGET_ANY_GNU_TLS)
13665 {
13666 pic = gen_reg_rtx (Pmode);
13667 emit_insn (gen_set_got (pic));
13668 type = UNSPEC_GOTTPOFF;
13669 }
13670 else
13671 {
13672 pic = NULL;
13673 type = UNSPEC_INDNTPOFF;
13674 }
13675
13676 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13677 off = gen_rtx_CONST (tp_mode, off);
13678 if (pic)
13679 off = gen_rtx_PLUS (tp_mode, pic, off);
13680 off = gen_const_mem (tp_mode, off);
13681 set_mem_alias_set (off, ix86_GOT_alias_set ());
13682
13683 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13684 {
13685 base = get_thread_pointer (tp_mode,
13686 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13687 off = force_reg (tp_mode, off);
13688 return gen_rtx_PLUS (tp_mode, base, off);
13689 }
13690 else
13691 {
13692 base = get_thread_pointer (Pmode, true);
13693 dest = gen_reg_rtx (Pmode);
13694 emit_insn (ix86_gen_sub3 (dest, base, off));
13695 }
13696 break;
13697
13698 case TLS_MODEL_LOCAL_EXEC:
13699 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13700 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13701 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13702 off = gen_rtx_CONST (Pmode, off);
13703
13704 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13705 {
13706 base = get_thread_pointer (Pmode,
13707 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13708 return gen_rtx_PLUS (Pmode, base, off);
13709 }
13710 else
13711 {
13712 base = get_thread_pointer (Pmode, true);
13713 dest = gen_reg_rtx (Pmode);
13714 emit_insn (ix86_gen_sub3 (dest, base, off));
13715 }
13716 break;
13717
13718 default:
13719 gcc_unreachable ();
13720 }
13721
13722 return dest;
13723 }
13724
13725 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13726 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13727 unique refptr-DECL symbol corresponding to symbol DECL. */
13728
13729 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13730 htab_t dllimport_map;
13731
13732 static tree
13733 get_dllimport_decl (tree decl, bool beimport)
13734 {
13735 struct tree_map *h, in;
13736 void **loc;
13737 const char *name;
13738 const char *prefix;
13739 size_t namelen, prefixlen;
13740 char *imp_name;
13741 tree to;
13742 rtx rtl;
13743
13744 if (!dllimport_map)
13745 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13746
13747 in.hash = htab_hash_pointer (decl);
13748 in.base.from = decl;
13749 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13750 h = (struct tree_map *) *loc;
13751 if (h)
13752 return h->to;
13753
13754 *loc = h = ggc_alloc<tree_map> ();
13755 h->hash = in.hash;
13756 h->base.from = decl;
13757 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13758 VAR_DECL, NULL, ptr_type_node);
13759 DECL_ARTIFICIAL (to) = 1;
13760 DECL_IGNORED_P (to) = 1;
13761 DECL_EXTERNAL (to) = 1;
13762 TREE_READONLY (to) = 1;
13763
13764 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13765 name = targetm.strip_name_encoding (name);
13766 if (beimport)
13767 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13768 ? "*__imp_" : "*__imp__";
13769 else
13770 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13771 namelen = strlen (name);
13772 prefixlen = strlen (prefix);
13773 imp_name = (char *) alloca (namelen + prefixlen + 1);
13774 memcpy (imp_name, prefix, prefixlen);
13775 memcpy (imp_name + prefixlen, name, namelen + 1);
13776
13777 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13778 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13779 SET_SYMBOL_REF_DECL (rtl, to);
13780 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13781 if (!beimport)
13782 {
13783 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13784 #ifdef SUB_TARGET_RECORD_STUB
13785 SUB_TARGET_RECORD_STUB (name);
13786 #endif
13787 }
13788
13789 rtl = gen_const_mem (Pmode, rtl);
13790 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13791
13792 SET_DECL_RTL (to, rtl);
13793 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13794
13795 return to;
13796 }
13797
13798 /* Expand SYMBOL into its corresponding far-addresse symbol.
13799 WANT_REG is true if we require the result be a register. */
13800
13801 static rtx
13802 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13803 {
13804 tree imp_decl;
13805 rtx x;
13806
13807 gcc_assert (SYMBOL_REF_DECL (symbol));
13808 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13809
13810 x = DECL_RTL (imp_decl);
13811 if (want_reg)
13812 x = force_reg (Pmode, x);
13813 return x;
13814 }
13815
13816 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13817 true if we require the result be a register. */
13818
13819 static rtx
13820 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13821 {
13822 tree imp_decl;
13823 rtx x;
13824
13825 gcc_assert (SYMBOL_REF_DECL (symbol));
13826 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13827
13828 x = DECL_RTL (imp_decl);
13829 if (want_reg)
13830 x = force_reg (Pmode, x);
13831 return x;
13832 }
13833
13834 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13835 is true if we require the result be a register. */
13836
13837 static rtx
13838 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13839 {
13840 if (!TARGET_PECOFF)
13841 return NULL_RTX;
13842
13843 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13844 {
13845 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13846 return legitimize_dllimport_symbol (addr, inreg);
13847 if (GET_CODE (addr) == CONST
13848 && GET_CODE (XEXP (addr, 0)) == PLUS
13849 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13850 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13851 {
13852 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13853 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13854 }
13855 }
13856
13857 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13858 return NULL_RTX;
13859 if (GET_CODE (addr) == SYMBOL_REF
13860 && !is_imported_p (addr)
13861 && SYMBOL_REF_EXTERNAL_P (addr)
13862 && SYMBOL_REF_DECL (addr))
13863 return legitimize_pe_coff_extern_decl (addr, inreg);
13864
13865 if (GET_CODE (addr) == CONST
13866 && GET_CODE (XEXP (addr, 0)) == PLUS
13867 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13868 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13869 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13870 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13871 {
13872 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13873 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13874 }
13875 return NULL_RTX;
13876 }
13877
13878 /* Try machine-dependent ways of modifying an illegitimate address
13879 to be legitimate. If we find one, return the new, valid address.
13880 This macro is used in only one place: `memory_address' in explow.c.
13881
13882 OLDX is the address as it was before break_out_memory_refs was called.
13883 In some cases it is useful to look at this to decide what needs to be done.
13884
13885 It is always safe for this macro to do nothing. It exists to recognize
13886 opportunities to optimize the output.
13887
13888 For the 80386, we handle X+REG by loading X into a register R and
13889 using R+REG. R will go in a general reg and indexing will be used.
13890 However, if REG is a broken-out memory address or multiplication,
13891 nothing needs to be done because REG can certainly go in a general reg.
13892
13893 When -fpic is used, special handling is needed for symbolic references.
13894 See comments by legitimize_pic_address in i386.c for details. */
13895
13896 static rtx
13897 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13898 {
13899 int changed = 0;
13900 unsigned log;
13901
13902 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13903 if (log)
13904 return legitimize_tls_address (x, (enum tls_model) log, false);
13905 if (GET_CODE (x) == CONST
13906 && GET_CODE (XEXP (x, 0)) == PLUS
13907 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13908 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13909 {
13910 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13911 (enum tls_model) log, false);
13912 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13913 }
13914
13915 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13916 {
13917 rtx tmp = legitimize_pe_coff_symbol (x, true);
13918 if (tmp)
13919 return tmp;
13920 }
13921
13922 if (flag_pic && SYMBOLIC_CONST (x))
13923 return legitimize_pic_address (x, 0);
13924
13925 #if TARGET_MACHO
13926 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13927 return machopic_indirect_data_reference (x, 0);
13928 #endif
13929
13930 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13931 if (GET_CODE (x) == ASHIFT
13932 && CONST_INT_P (XEXP (x, 1))
13933 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13934 {
13935 changed = 1;
13936 log = INTVAL (XEXP (x, 1));
13937 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13938 GEN_INT (1 << log));
13939 }
13940
13941 if (GET_CODE (x) == PLUS)
13942 {
13943 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13944
13945 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13946 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13947 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13948 {
13949 changed = 1;
13950 log = INTVAL (XEXP (XEXP (x, 0), 1));
13951 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13952 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13953 GEN_INT (1 << log));
13954 }
13955
13956 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13957 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13958 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13959 {
13960 changed = 1;
13961 log = INTVAL (XEXP (XEXP (x, 1), 1));
13962 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13963 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13964 GEN_INT (1 << log));
13965 }
13966
13967 /* Put multiply first if it isn't already. */
13968 if (GET_CODE (XEXP (x, 1)) == MULT)
13969 {
13970 rtx tmp = XEXP (x, 0);
13971 XEXP (x, 0) = XEXP (x, 1);
13972 XEXP (x, 1) = tmp;
13973 changed = 1;
13974 }
13975
13976 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13977 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13978 created by virtual register instantiation, register elimination, and
13979 similar optimizations. */
13980 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13981 {
13982 changed = 1;
13983 x = gen_rtx_PLUS (Pmode,
13984 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13985 XEXP (XEXP (x, 1), 0)),
13986 XEXP (XEXP (x, 1), 1));
13987 }
13988
13989 /* Canonicalize
13990 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13991 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13992 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13993 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13994 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13995 && CONSTANT_P (XEXP (x, 1)))
13996 {
13997 rtx constant;
13998 rtx other = NULL_RTX;
13999
14000 if (CONST_INT_P (XEXP (x, 1)))
14001 {
14002 constant = XEXP (x, 1);
14003 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14004 }
14005 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14006 {
14007 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14008 other = XEXP (x, 1);
14009 }
14010 else
14011 constant = 0;
14012
14013 if (constant)
14014 {
14015 changed = 1;
14016 x = gen_rtx_PLUS (Pmode,
14017 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14018 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14019 plus_constant (Pmode, other,
14020 INTVAL (constant)));
14021 }
14022 }
14023
14024 if (changed && ix86_legitimate_address_p (mode, x, false))
14025 return x;
14026
14027 if (GET_CODE (XEXP (x, 0)) == MULT)
14028 {
14029 changed = 1;
14030 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14031 }
14032
14033 if (GET_CODE (XEXP (x, 1)) == MULT)
14034 {
14035 changed = 1;
14036 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14037 }
14038
14039 if (changed
14040 && REG_P (XEXP (x, 1))
14041 && REG_P (XEXP (x, 0)))
14042 return x;
14043
14044 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14045 {
14046 changed = 1;
14047 x = legitimize_pic_address (x, 0);
14048 }
14049
14050 if (changed && ix86_legitimate_address_p (mode, x, false))
14051 return x;
14052
14053 if (REG_P (XEXP (x, 0)))
14054 {
14055 rtx temp = gen_reg_rtx (Pmode);
14056 rtx val = force_operand (XEXP (x, 1), temp);
14057 if (val != temp)
14058 {
14059 val = convert_to_mode (Pmode, val, 1);
14060 emit_move_insn (temp, val);
14061 }
14062
14063 XEXP (x, 1) = temp;
14064 return x;
14065 }
14066
14067 else if (REG_P (XEXP (x, 1)))
14068 {
14069 rtx temp = gen_reg_rtx (Pmode);
14070 rtx val = force_operand (XEXP (x, 0), temp);
14071 if (val != temp)
14072 {
14073 val = convert_to_mode (Pmode, val, 1);
14074 emit_move_insn (temp, val);
14075 }
14076
14077 XEXP (x, 0) = temp;
14078 return x;
14079 }
14080 }
14081
14082 return x;
14083 }
14084 \f
14085 /* Print an integer constant expression in assembler syntax. Addition
14086 and subtraction are the only arithmetic that may appear in these
14087 expressions. FILE is the stdio stream to write to, X is the rtx, and
14088 CODE is the operand print code from the output string. */
14089
14090 static void
14091 output_pic_addr_const (FILE *file, rtx x, int code)
14092 {
14093 char buf[256];
14094
14095 switch (GET_CODE (x))
14096 {
14097 case PC:
14098 gcc_assert (flag_pic);
14099 putc ('.', file);
14100 break;
14101
14102 case SYMBOL_REF:
14103 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14104 output_addr_const (file, x);
14105 else
14106 {
14107 const char *name = XSTR (x, 0);
14108
14109 /* Mark the decl as referenced so that cgraph will
14110 output the function. */
14111 if (SYMBOL_REF_DECL (x))
14112 mark_decl_referenced (SYMBOL_REF_DECL (x));
14113
14114 #if TARGET_MACHO
14115 if (MACHOPIC_INDIRECT
14116 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14117 name = machopic_indirection_name (x, /*stub_p=*/true);
14118 #endif
14119 assemble_name (file, name);
14120 }
14121 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14122 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14123 fputs ("@PLT", file);
14124 break;
14125
14126 case LABEL_REF:
14127 x = XEXP (x, 0);
14128 /* FALLTHRU */
14129 case CODE_LABEL:
14130 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14131 assemble_name (asm_out_file, buf);
14132 break;
14133
14134 case CONST_INT:
14135 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14136 break;
14137
14138 case CONST:
14139 /* This used to output parentheses around the expression,
14140 but that does not work on the 386 (either ATT or BSD assembler). */
14141 output_pic_addr_const (file, XEXP (x, 0), code);
14142 break;
14143
14144 case CONST_DOUBLE:
14145 if (GET_MODE (x) == VOIDmode)
14146 {
14147 /* We can use %d if the number is <32 bits and positive. */
14148 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14149 fprintf (file, "0x%lx%08lx",
14150 (unsigned long) CONST_DOUBLE_HIGH (x),
14151 (unsigned long) CONST_DOUBLE_LOW (x));
14152 else
14153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14154 }
14155 else
14156 /* We can't handle floating point constants;
14157 TARGET_PRINT_OPERAND must handle them. */
14158 output_operand_lossage ("floating constant misused");
14159 break;
14160
14161 case PLUS:
14162 /* Some assemblers need integer constants to appear first. */
14163 if (CONST_INT_P (XEXP (x, 0)))
14164 {
14165 output_pic_addr_const (file, XEXP (x, 0), code);
14166 putc ('+', file);
14167 output_pic_addr_const (file, XEXP (x, 1), code);
14168 }
14169 else
14170 {
14171 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14172 output_pic_addr_const (file, XEXP (x, 1), code);
14173 putc ('+', file);
14174 output_pic_addr_const (file, XEXP (x, 0), code);
14175 }
14176 break;
14177
14178 case MINUS:
14179 if (!TARGET_MACHO)
14180 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14181 output_pic_addr_const (file, XEXP (x, 0), code);
14182 putc ('-', file);
14183 output_pic_addr_const (file, XEXP (x, 1), code);
14184 if (!TARGET_MACHO)
14185 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14186 break;
14187
14188 case UNSPEC:
14189 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14190 {
14191 bool f = i386_asm_output_addr_const_extra (file, x);
14192 gcc_assert (f);
14193 break;
14194 }
14195
14196 gcc_assert (XVECLEN (x, 0) == 1);
14197 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14198 switch (XINT (x, 1))
14199 {
14200 case UNSPEC_GOT:
14201 fputs ("@GOT", file);
14202 break;
14203 case UNSPEC_GOTOFF:
14204 fputs ("@GOTOFF", file);
14205 break;
14206 case UNSPEC_PLTOFF:
14207 fputs ("@PLTOFF", file);
14208 break;
14209 case UNSPEC_PCREL:
14210 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14211 "(%rip)" : "[rip]", file);
14212 break;
14213 case UNSPEC_GOTPCREL:
14214 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14215 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14216 break;
14217 case UNSPEC_GOTTPOFF:
14218 /* FIXME: This might be @TPOFF in Sun ld too. */
14219 fputs ("@gottpoff", file);
14220 break;
14221 case UNSPEC_TPOFF:
14222 fputs ("@tpoff", file);
14223 break;
14224 case UNSPEC_NTPOFF:
14225 if (TARGET_64BIT)
14226 fputs ("@tpoff", file);
14227 else
14228 fputs ("@ntpoff", file);
14229 break;
14230 case UNSPEC_DTPOFF:
14231 fputs ("@dtpoff", file);
14232 break;
14233 case UNSPEC_GOTNTPOFF:
14234 if (TARGET_64BIT)
14235 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14236 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14237 else
14238 fputs ("@gotntpoff", file);
14239 break;
14240 case UNSPEC_INDNTPOFF:
14241 fputs ("@indntpoff", file);
14242 break;
14243 #if TARGET_MACHO
14244 case UNSPEC_MACHOPIC_OFFSET:
14245 putc ('-', file);
14246 machopic_output_function_base_name (file);
14247 break;
14248 #endif
14249 default:
14250 output_operand_lossage ("invalid UNSPEC as operand");
14251 break;
14252 }
14253 break;
14254
14255 default:
14256 output_operand_lossage ("invalid expression as operand");
14257 }
14258 }
14259
14260 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14261 We need to emit DTP-relative relocations. */
14262
14263 static void ATTRIBUTE_UNUSED
14264 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14265 {
14266 fputs (ASM_LONG, file);
14267 output_addr_const (file, x);
14268 fputs ("@dtpoff", file);
14269 switch (size)
14270 {
14271 case 4:
14272 break;
14273 case 8:
14274 fputs (", 0", file);
14275 break;
14276 default:
14277 gcc_unreachable ();
14278 }
14279 }
14280
14281 /* Return true if X is a representation of the PIC register. This copes
14282 with calls from ix86_find_base_term, where the register might have
14283 been replaced by a cselib value. */
14284
14285 static bool
14286 ix86_pic_register_p (rtx x)
14287 {
14288 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14289 return (pic_offset_table_rtx
14290 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14291 else if (!REG_P (x))
14292 return false;
14293 else if (pic_offset_table_rtx)
14294 {
14295 if (REGNO (x) == REGNO (pic_offset_table_rtx))
14296 return true;
14297 if (HARD_REGISTER_P (x)
14298 && !HARD_REGISTER_P (pic_offset_table_rtx)
14299 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14300 return true;
14301 return false;
14302 }
14303 else
14304 return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14305 }
14306
14307 /* Helper function for ix86_delegitimize_address.
14308 Attempt to delegitimize TLS local-exec accesses. */
14309
14310 static rtx
14311 ix86_delegitimize_tls_address (rtx orig_x)
14312 {
14313 rtx x = orig_x, unspec;
14314 struct ix86_address addr;
14315
14316 if (!TARGET_TLS_DIRECT_SEG_REFS)
14317 return orig_x;
14318 if (MEM_P (x))
14319 x = XEXP (x, 0);
14320 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14321 return orig_x;
14322 if (ix86_decompose_address (x, &addr) == 0
14323 || addr.seg != DEFAULT_TLS_SEG_REG
14324 || addr.disp == NULL_RTX
14325 || GET_CODE (addr.disp) != CONST)
14326 return orig_x;
14327 unspec = XEXP (addr.disp, 0);
14328 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14329 unspec = XEXP (unspec, 0);
14330 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14331 return orig_x;
14332 x = XVECEXP (unspec, 0, 0);
14333 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14334 if (unspec != XEXP (addr.disp, 0))
14335 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14336 if (addr.index)
14337 {
14338 rtx idx = addr.index;
14339 if (addr.scale != 1)
14340 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14341 x = gen_rtx_PLUS (Pmode, idx, x);
14342 }
14343 if (addr.base)
14344 x = gen_rtx_PLUS (Pmode, addr.base, x);
14345 if (MEM_P (orig_x))
14346 x = replace_equiv_address_nv (orig_x, x);
14347 return x;
14348 }
14349
14350 /* In the name of slightly smaller debug output, and to cater to
14351 general assembler lossage, recognize PIC+GOTOFF and turn it back
14352 into a direct symbol reference.
14353
14354 On Darwin, this is necessary to avoid a crash, because Darwin
14355 has a different PIC label for each routine but the DWARF debugging
14356 information is not associated with any particular routine, so it's
14357 necessary to remove references to the PIC label from RTL stored by
14358 the DWARF output code. */
14359
14360 static rtx
14361 ix86_delegitimize_address (rtx x)
14362 {
14363 rtx orig_x = delegitimize_mem_from_attrs (x);
14364 /* addend is NULL or some rtx if x is something+GOTOFF where
14365 something doesn't include the PIC register. */
14366 rtx addend = NULL_RTX;
14367 /* reg_addend is NULL or a multiple of some register. */
14368 rtx reg_addend = NULL_RTX;
14369 /* const_addend is NULL or a const_int. */
14370 rtx const_addend = NULL_RTX;
14371 /* This is the result, or NULL. */
14372 rtx result = NULL_RTX;
14373
14374 x = orig_x;
14375
14376 if (MEM_P (x))
14377 x = XEXP (x, 0);
14378
14379 if (TARGET_64BIT)
14380 {
14381 if (GET_CODE (x) == CONST
14382 && GET_CODE (XEXP (x, 0)) == PLUS
14383 && GET_MODE (XEXP (x, 0)) == Pmode
14384 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14385 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14386 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14387 {
14388 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14389 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14390 if (MEM_P (orig_x))
14391 x = replace_equiv_address_nv (orig_x, x);
14392 return x;
14393 }
14394
14395 if (GET_CODE (x) == CONST
14396 && GET_CODE (XEXP (x, 0)) == UNSPEC
14397 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14398 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14399 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14400 {
14401 x = XVECEXP (XEXP (x, 0), 0, 0);
14402 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14403 {
14404 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14405 GET_MODE (x), 0);
14406 if (x == NULL_RTX)
14407 return orig_x;
14408 }
14409 return x;
14410 }
14411
14412 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14413 return ix86_delegitimize_tls_address (orig_x);
14414
14415 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14416 and -mcmodel=medium -fpic. */
14417 }
14418
14419 if (GET_CODE (x) != PLUS
14420 || GET_CODE (XEXP (x, 1)) != CONST)
14421 return ix86_delegitimize_tls_address (orig_x);
14422
14423 if (ix86_pic_register_p (XEXP (x, 0)))
14424 /* %ebx + GOT/GOTOFF */
14425 ;
14426 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14427 {
14428 /* %ebx + %reg * scale + GOT/GOTOFF */
14429 reg_addend = XEXP (x, 0);
14430 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14431 reg_addend = XEXP (reg_addend, 1);
14432 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14433 reg_addend = XEXP (reg_addend, 0);
14434 else
14435 {
14436 reg_addend = NULL_RTX;
14437 addend = XEXP (x, 0);
14438 }
14439 }
14440 else
14441 addend = XEXP (x, 0);
14442
14443 x = XEXP (XEXP (x, 1), 0);
14444 if (GET_CODE (x) == PLUS
14445 && CONST_INT_P (XEXP (x, 1)))
14446 {
14447 const_addend = XEXP (x, 1);
14448 x = XEXP (x, 0);
14449 }
14450
14451 if (GET_CODE (x) == UNSPEC
14452 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14453 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14454 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14455 && !MEM_P (orig_x) && !addend)))
14456 result = XVECEXP (x, 0, 0);
14457
14458 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14459 && !MEM_P (orig_x))
14460 result = XVECEXP (x, 0, 0);
14461
14462 if (! result)
14463 return ix86_delegitimize_tls_address (orig_x);
14464
14465 if (const_addend)
14466 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14467 if (reg_addend)
14468 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14469 if (addend)
14470 {
14471 /* If the rest of original X doesn't involve the PIC register, add
14472 addend and subtract pic_offset_table_rtx. This can happen e.g.
14473 for code like:
14474 leal (%ebx, %ecx, 4), %ecx
14475 ...
14476 movl foo@GOTOFF(%ecx), %edx
14477 in which case we return (%ecx - %ebx) + foo
14478 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
14479 and reload has completed. */
14480 if (pic_offset_table_rtx
14481 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14482 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14483 pic_offset_table_rtx),
14484 result);
14485 else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
14486 {
14487 rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
14488 tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
14489 result = gen_rtx_PLUS (Pmode, tmp, result);
14490 }
14491 else
14492 return orig_x;
14493 }
14494 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14495 {
14496 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14497 if (result == NULL_RTX)
14498 return orig_x;
14499 }
14500 return result;
14501 }
14502
14503 /* If X is a machine specific address (i.e. a symbol or label being
14504 referenced as a displacement from the GOT implemented using an
14505 UNSPEC), then return the base term. Otherwise return X. */
14506
14507 rtx
14508 ix86_find_base_term (rtx x)
14509 {
14510 rtx term;
14511
14512 if (TARGET_64BIT)
14513 {
14514 if (GET_CODE (x) != CONST)
14515 return x;
14516 term = XEXP (x, 0);
14517 if (GET_CODE (term) == PLUS
14518 && (CONST_INT_P (XEXP (term, 1))
14519 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14520 term = XEXP (term, 0);
14521 if (GET_CODE (term) != UNSPEC
14522 || (XINT (term, 1) != UNSPEC_GOTPCREL
14523 && XINT (term, 1) != UNSPEC_PCREL))
14524 return x;
14525
14526 return XVECEXP (term, 0, 0);
14527 }
14528
14529 return ix86_delegitimize_address (x);
14530 }
14531 \f
14532 static void
14533 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14534 bool fp, FILE *file)
14535 {
14536 const char *suffix;
14537
14538 if (mode == CCFPmode || mode == CCFPUmode)
14539 {
14540 code = ix86_fp_compare_code_to_integer (code);
14541 mode = CCmode;
14542 }
14543 if (reverse)
14544 code = reverse_condition (code);
14545
14546 switch (code)
14547 {
14548 case EQ:
14549 switch (mode)
14550 {
14551 case CCAmode:
14552 suffix = "a";
14553 break;
14554
14555 case CCCmode:
14556 suffix = "c";
14557 break;
14558
14559 case CCOmode:
14560 suffix = "o";
14561 break;
14562
14563 case CCSmode:
14564 suffix = "s";
14565 break;
14566
14567 default:
14568 suffix = "e";
14569 }
14570 break;
14571 case NE:
14572 switch (mode)
14573 {
14574 case CCAmode:
14575 suffix = "na";
14576 break;
14577
14578 case CCCmode:
14579 suffix = "nc";
14580 break;
14581
14582 case CCOmode:
14583 suffix = "no";
14584 break;
14585
14586 case CCSmode:
14587 suffix = "ns";
14588 break;
14589
14590 default:
14591 suffix = "ne";
14592 }
14593 break;
14594 case GT:
14595 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14596 suffix = "g";
14597 break;
14598 case GTU:
14599 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14600 Those same assemblers have the same but opposite lossage on cmov. */
14601 if (mode == CCmode)
14602 suffix = fp ? "nbe" : "a";
14603 else
14604 gcc_unreachable ();
14605 break;
14606 case LT:
14607 switch (mode)
14608 {
14609 case CCNOmode:
14610 case CCGOCmode:
14611 suffix = "s";
14612 break;
14613
14614 case CCmode:
14615 case CCGCmode:
14616 suffix = "l";
14617 break;
14618
14619 default:
14620 gcc_unreachable ();
14621 }
14622 break;
14623 case LTU:
14624 if (mode == CCmode)
14625 suffix = "b";
14626 else if (mode == CCCmode)
14627 suffix = "c";
14628 else
14629 gcc_unreachable ();
14630 break;
14631 case GE:
14632 switch (mode)
14633 {
14634 case CCNOmode:
14635 case CCGOCmode:
14636 suffix = "ns";
14637 break;
14638
14639 case CCmode:
14640 case CCGCmode:
14641 suffix = "ge";
14642 break;
14643
14644 default:
14645 gcc_unreachable ();
14646 }
14647 break;
14648 case GEU:
14649 if (mode == CCmode)
14650 suffix = fp ? "nb" : "ae";
14651 else if (mode == CCCmode)
14652 suffix = "nc";
14653 else
14654 gcc_unreachable ();
14655 break;
14656 case LE:
14657 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14658 suffix = "le";
14659 break;
14660 case LEU:
14661 if (mode == CCmode)
14662 suffix = "be";
14663 else
14664 gcc_unreachable ();
14665 break;
14666 case UNORDERED:
14667 suffix = fp ? "u" : "p";
14668 break;
14669 case ORDERED:
14670 suffix = fp ? "nu" : "np";
14671 break;
14672 default:
14673 gcc_unreachable ();
14674 }
14675 fputs (suffix, file);
14676 }
14677
14678 /* Print the name of register X to FILE based on its machine mode and number.
14679 If CODE is 'w', pretend the mode is HImode.
14680 If CODE is 'b', pretend the mode is QImode.
14681 If CODE is 'k', pretend the mode is SImode.
14682 If CODE is 'q', pretend the mode is DImode.
14683 If CODE is 'x', pretend the mode is V4SFmode.
14684 If CODE is 't', pretend the mode is V8SFmode.
14685 If CODE is 'g', pretend the mode is V16SFmode.
14686 If CODE is 'h', pretend the reg is the 'high' byte register.
14687 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14688 If CODE is 'd', duplicate the operand for AVX instruction.
14689 */
14690
14691 void
14692 print_reg (rtx x, int code, FILE *file)
14693 {
14694 const char *reg;
14695 unsigned int regno;
14696 bool duplicated = code == 'd' && TARGET_AVX;
14697
14698 if (ASSEMBLER_DIALECT == ASM_ATT)
14699 putc ('%', file);
14700
14701 if (x == pc_rtx)
14702 {
14703 gcc_assert (TARGET_64BIT);
14704 fputs ("rip", file);
14705 return;
14706 }
14707
14708 regno = true_regnum (x);
14709 gcc_assert (regno != ARG_POINTER_REGNUM
14710 && regno != FRAME_POINTER_REGNUM
14711 && regno != FLAGS_REG
14712 && regno != FPSR_REG
14713 && regno != FPCR_REG);
14714
14715 if (code == 'w' || MMX_REG_P (x))
14716 code = 2;
14717 else if (code == 'b')
14718 code = 1;
14719 else if (code == 'k')
14720 code = 4;
14721 else if (code == 'q')
14722 code = 8;
14723 else if (code == 'y')
14724 code = 3;
14725 else if (code == 'h')
14726 code = 0;
14727 else if (code == 'x')
14728 code = 16;
14729 else if (code == 't')
14730 code = 32;
14731 else if (code == 'g')
14732 code = 64;
14733 else
14734 code = GET_MODE_SIZE (GET_MODE (x));
14735
14736 /* Irritatingly, AMD extended registers use different naming convention
14737 from the normal registers: "r%d[bwd]" */
14738 if (REX_INT_REGNO_P (regno))
14739 {
14740 gcc_assert (TARGET_64BIT);
14741 putc ('r', file);
14742 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14743 switch (code)
14744 {
14745 case 0:
14746 error ("extended registers have no high halves");
14747 break;
14748 case 1:
14749 putc ('b', file);
14750 break;
14751 case 2:
14752 putc ('w', file);
14753 break;
14754 case 4:
14755 putc ('d', file);
14756 break;
14757 case 8:
14758 /* no suffix */
14759 break;
14760 default:
14761 error ("unsupported operand size for extended register");
14762 break;
14763 }
14764 return;
14765 }
14766
14767 reg = NULL;
14768 switch (code)
14769 {
14770 case 3:
14771 if (STACK_TOP_P (x))
14772 {
14773 reg = "st(0)";
14774 break;
14775 }
14776 /* FALLTHRU */
14777 case 8:
14778 case 4:
14779 case 12:
14780 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14781 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14782 /* FALLTHRU */
14783 case 16:
14784 case 2:
14785 normal:
14786 reg = hi_reg_name[regno];
14787 break;
14788 case 1:
14789 if (regno >= ARRAY_SIZE (qi_reg_name))
14790 goto normal;
14791 reg = qi_reg_name[regno];
14792 break;
14793 case 0:
14794 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14795 goto normal;
14796 reg = qi_high_reg_name[regno];
14797 break;
14798 case 32:
14799 if (SSE_REG_P (x))
14800 {
14801 gcc_assert (!duplicated);
14802 putc ('y', file);
14803 fputs (hi_reg_name[regno] + 1, file);
14804 return;
14805 }
14806 case 64:
14807 if (SSE_REG_P (x))
14808 {
14809 gcc_assert (!duplicated);
14810 putc ('z', file);
14811 fputs (hi_reg_name[REGNO (x)] + 1, file);
14812 return;
14813 }
14814 break;
14815 default:
14816 gcc_unreachable ();
14817 }
14818
14819 fputs (reg, file);
14820 if (duplicated)
14821 {
14822 if (ASSEMBLER_DIALECT == ASM_ATT)
14823 fprintf (file, ", %%%s", reg);
14824 else
14825 fprintf (file, ", %s", reg);
14826 }
14827 }
14828
14829 /* Meaning of CODE:
14830 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14831 C -- print opcode suffix for set/cmov insn.
14832 c -- like C, but print reversed condition
14833 F,f -- likewise, but for floating-point.
14834 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14835 otherwise nothing
14836 R -- print embeded rounding and sae.
14837 r -- print only sae.
14838 z -- print the opcode suffix for the size of the current operand.
14839 Z -- likewise, with special suffixes for x87 instructions.
14840 * -- print a star (in certain assembler syntax)
14841 A -- print an absolute memory reference.
14842 E -- print address with DImode register names if TARGET_64BIT.
14843 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14844 s -- print a shift double count, followed by the assemblers argument
14845 delimiter.
14846 b -- print the QImode name of the register for the indicated operand.
14847 %b0 would print %al if operands[0] is reg 0.
14848 w -- likewise, print the HImode name of the register.
14849 k -- likewise, print the SImode name of the register.
14850 q -- likewise, print the DImode name of the register.
14851 x -- likewise, print the V4SFmode name of the register.
14852 t -- likewise, print the V8SFmode name of the register.
14853 g -- likewise, print the V16SFmode name of the register.
14854 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14855 y -- print "st(0)" instead of "st" as a register.
14856 d -- print duplicated register operand for AVX instruction.
14857 D -- print condition for SSE cmp instruction.
14858 P -- if PIC, print an @PLT suffix.
14859 p -- print raw symbol name.
14860 X -- don't print any sort of PIC '@' suffix for a symbol.
14861 & -- print some in-use local-dynamic symbol name.
14862 H -- print a memory address offset by 8; used for sse high-parts
14863 Y -- print condition for XOP pcom* instruction.
14864 + -- print a branch hint as 'cs' or 'ds' prefix
14865 ; -- print a semicolon (after prefixes due to bug in older gas).
14866 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14867 @ -- print a segment register of thread base pointer load
14868 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14869 */
14870
14871 void
14872 ix86_print_operand (FILE *file, rtx x, int code)
14873 {
14874 if (code)
14875 {
14876 switch (code)
14877 {
14878 case 'A':
14879 switch (ASSEMBLER_DIALECT)
14880 {
14881 case ASM_ATT:
14882 putc ('*', file);
14883 break;
14884
14885 case ASM_INTEL:
14886 /* Intel syntax. For absolute addresses, registers should not
14887 be surrounded by braces. */
14888 if (!REG_P (x))
14889 {
14890 putc ('[', file);
14891 ix86_print_operand (file, x, 0);
14892 putc (']', file);
14893 return;
14894 }
14895 break;
14896
14897 default:
14898 gcc_unreachable ();
14899 }
14900
14901 ix86_print_operand (file, x, 0);
14902 return;
14903
14904 case 'E':
14905 /* Wrap address in an UNSPEC to declare special handling. */
14906 if (TARGET_64BIT)
14907 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14908
14909 output_address (x);
14910 return;
14911
14912 case 'L':
14913 if (ASSEMBLER_DIALECT == ASM_ATT)
14914 putc ('l', file);
14915 return;
14916
14917 case 'W':
14918 if (ASSEMBLER_DIALECT == ASM_ATT)
14919 putc ('w', file);
14920 return;
14921
14922 case 'B':
14923 if (ASSEMBLER_DIALECT == ASM_ATT)
14924 putc ('b', file);
14925 return;
14926
14927 case 'Q':
14928 if (ASSEMBLER_DIALECT == ASM_ATT)
14929 putc ('l', file);
14930 return;
14931
14932 case 'S':
14933 if (ASSEMBLER_DIALECT == ASM_ATT)
14934 putc ('s', file);
14935 return;
14936
14937 case 'T':
14938 if (ASSEMBLER_DIALECT == ASM_ATT)
14939 putc ('t', file);
14940 return;
14941
14942 case 'O':
14943 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14944 if (ASSEMBLER_DIALECT != ASM_ATT)
14945 return;
14946
14947 switch (GET_MODE_SIZE (GET_MODE (x)))
14948 {
14949 case 2:
14950 putc ('w', file);
14951 break;
14952
14953 case 4:
14954 putc ('l', file);
14955 break;
14956
14957 case 8:
14958 putc ('q', file);
14959 break;
14960
14961 default:
14962 output_operand_lossage
14963 ("invalid operand size for operand code 'O'");
14964 return;
14965 }
14966
14967 putc ('.', file);
14968 #endif
14969 return;
14970
14971 case 'z':
14972 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14973 {
14974 /* Opcodes don't get size suffixes if using Intel opcodes. */
14975 if (ASSEMBLER_DIALECT == ASM_INTEL)
14976 return;
14977
14978 switch (GET_MODE_SIZE (GET_MODE (x)))
14979 {
14980 case 1:
14981 putc ('b', file);
14982 return;
14983
14984 case 2:
14985 putc ('w', file);
14986 return;
14987
14988 case 4:
14989 putc ('l', file);
14990 return;
14991
14992 case 8:
14993 putc ('q', file);
14994 return;
14995
14996 default:
14997 output_operand_lossage
14998 ("invalid operand size for operand code 'z'");
14999 return;
15000 }
15001 }
15002
15003 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15004 warning
15005 (0, "non-integer operand used with operand code 'z'");
15006 /* FALLTHRU */
15007
15008 case 'Z':
15009 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
15010 if (ASSEMBLER_DIALECT == ASM_INTEL)
15011 return;
15012
15013 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15014 {
15015 switch (GET_MODE_SIZE (GET_MODE (x)))
15016 {
15017 case 2:
15018 #ifdef HAVE_AS_IX86_FILDS
15019 putc ('s', file);
15020 #endif
15021 return;
15022
15023 case 4:
15024 putc ('l', file);
15025 return;
15026
15027 case 8:
15028 #ifdef HAVE_AS_IX86_FILDQ
15029 putc ('q', file);
15030 #else
15031 fputs ("ll", file);
15032 #endif
15033 return;
15034
15035 default:
15036 break;
15037 }
15038 }
15039 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15040 {
15041 /* 387 opcodes don't get size suffixes
15042 if the operands are registers. */
15043 if (STACK_REG_P (x))
15044 return;
15045
15046 switch (GET_MODE_SIZE (GET_MODE (x)))
15047 {
15048 case 4:
15049 putc ('s', file);
15050 return;
15051
15052 case 8:
15053 putc ('l', file);
15054 return;
15055
15056 case 12:
15057 case 16:
15058 putc ('t', file);
15059 return;
15060
15061 default:
15062 break;
15063 }
15064 }
15065 else
15066 {
15067 output_operand_lossage
15068 ("invalid operand type used with operand code 'Z'");
15069 return;
15070 }
15071
15072 output_operand_lossage
15073 ("invalid operand size for operand code 'Z'");
15074 return;
15075
15076 case 'd':
15077 case 'b':
15078 case 'w':
15079 case 'k':
15080 case 'q':
15081 case 'h':
15082 case 't':
15083 case 'g':
15084 case 'y':
15085 case 'x':
15086 case 'X':
15087 case 'P':
15088 case 'p':
15089 break;
15090
15091 case 's':
15092 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15093 {
15094 ix86_print_operand (file, x, 0);
15095 fputs (", ", file);
15096 }
15097 return;
15098
15099 case 'Y':
15100 switch (GET_CODE (x))
15101 {
15102 case NE:
15103 fputs ("neq", file);
15104 break;
15105 case EQ:
15106 fputs ("eq", file);
15107 break;
15108 case GE:
15109 case GEU:
15110 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15111 break;
15112 case GT:
15113 case GTU:
15114 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15115 break;
15116 case LE:
15117 case LEU:
15118 fputs ("le", file);
15119 break;
15120 case LT:
15121 case LTU:
15122 fputs ("lt", file);
15123 break;
15124 case UNORDERED:
15125 fputs ("unord", file);
15126 break;
15127 case ORDERED:
15128 fputs ("ord", file);
15129 break;
15130 case UNEQ:
15131 fputs ("ueq", file);
15132 break;
15133 case UNGE:
15134 fputs ("nlt", file);
15135 break;
15136 case UNGT:
15137 fputs ("nle", file);
15138 break;
15139 case UNLE:
15140 fputs ("ule", file);
15141 break;
15142 case UNLT:
15143 fputs ("ult", file);
15144 break;
15145 case LTGT:
15146 fputs ("une", file);
15147 break;
15148 default:
15149 output_operand_lossage ("operand is not a condition code, "
15150 "invalid operand code 'Y'");
15151 return;
15152 }
15153 return;
15154
15155 case 'D':
15156 /* Little bit of braindamage here. The SSE compare instructions
15157 does use completely different names for the comparisons that the
15158 fp conditional moves. */
15159 switch (GET_CODE (x))
15160 {
15161 case UNEQ:
15162 if (TARGET_AVX)
15163 {
15164 fputs ("eq_us", file);
15165 break;
15166 }
15167 case EQ:
15168 fputs ("eq", file);
15169 break;
15170 case UNLT:
15171 if (TARGET_AVX)
15172 {
15173 fputs ("nge", file);
15174 break;
15175 }
15176 case LT:
15177 fputs ("lt", file);
15178 break;
15179 case UNLE:
15180 if (TARGET_AVX)
15181 {
15182 fputs ("ngt", file);
15183 break;
15184 }
15185 case LE:
15186 fputs ("le", file);
15187 break;
15188 case UNORDERED:
15189 fputs ("unord", file);
15190 break;
15191 case LTGT:
15192 if (TARGET_AVX)
15193 {
15194 fputs ("neq_oq", file);
15195 break;
15196 }
15197 case NE:
15198 fputs ("neq", file);
15199 break;
15200 case GE:
15201 if (TARGET_AVX)
15202 {
15203 fputs ("ge", file);
15204 break;
15205 }
15206 case UNGE:
15207 fputs ("nlt", file);
15208 break;
15209 case GT:
15210 if (TARGET_AVX)
15211 {
15212 fputs ("gt", file);
15213 break;
15214 }
15215 case UNGT:
15216 fputs ("nle", file);
15217 break;
15218 case ORDERED:
15219 fputs ("ord", file);
15220 break;
15221 default:
15222 output_operand_lossage ("operand is not a condition code, "
15223 "invalid operand code 'D'");
15224 return;
15225 }
15226 return;
15227
15228 case 'F':
15229 case 'f':
15230 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15231 if (ASSEMBLER_DIALECT == ASM_ATT)
15232 putc ('.', file);
15233 #endif
15234
15235 case 'C':
15236 case 'c':
15237 if (!COMPARISON_P (x))
15238 {
15239 output_operand_lossage ("operand is not a condition code, "
15240 "invalid operand code '%c'", code);
15241 return;
15242 }
15243 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15244 code == 'c' || code == 'f',
15245 code == 'F' || code == 'f',
15246 file);
15247 return;
15248
15249 case 'H':
15250 if (!offsettable_memref_p (x))
15251 {
15252 output_operand_lossage ("operand is not an offsettable memory "
15253 "reference, invalid operand code 'H'");
15254 return;
15255 }
15256 /* It doesn't actually matter what mode we use here, as we're
15257 only going to use this for printing. */
15258 x = adjust_address_nv (x, DImode, 8);
15259 /* Output 'qword ptr' for intel assembler dialect. */
15260 if (ASSEMBLER_DIALECT == ASM_INTEL)
15261 code = 'q';
15262 break;
15263
15264 case 'K':
15265 gcc_assert (CONST_INT_P (x));
15266
15267 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15268 #ifdef HAVE_AS_IX86_HLE
15269 fputs ("xacquire ", file);
15270 #else
15271 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15272 #endif
15273 else if (INTVAL (x) & IX86_HLE_RELEASE)
15274 #ifdef HAVE_AS_IX86_HLE
15275 fputs ("xrelease ", file);
15276 #else
15277 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15278 #endif
15279 /* We do not want to print value of the operand. */
15280 return;
15281
15282 case 'N':
15283 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15284 fputs ("{z}", file);
15285 return;
15286
15287 case 'r':
15288 gcc_assert (CONST_INT_P (x));
15289 gcc_assert (INTVAL (x) == ROUND_SAE);
15290
15291 if (ASSEMBLER_DIALECT == ASM_INTEL)
15292 fputs (", ", file);
15293
15294 fputs ("{sae}", file);
15295
15296 if (ASSEMBLER_DIALECT == ASM_ATT)
15297 fputs (", ", file);
15298
15299 return;
15300
15301 case 'R':
15302 gcc_assert (CONST_INT_P (x));
15303
15304 if (ASSEMBLER_DIALECT == ASM_INTEL)
15305 fputs (", ", file);
15306
15307 switch (INTVAL (x))
15308 {
15309 case ROUND_NEAREST_INT | ROUND_SAE:
15310 fputs ("{rn-sae}", file);
15311 break;
15312 case ROUND_NEG_INF | ROUND_SAE:
15313 fputs ("{rd-sae}", file);
15314 break;
15315 case ROUND_POS_INF | ROUND_SAE:
15316 fputs ("{ru-sae}", file);
15317 break;
15318 case ROUND_ZERO | ROUND_SAE:
15319 fputs ("{rz-sae}", file);
15320 break;
15321 default:
15322 gcc_unreachable ();
15323 }
15324
15325 if (ASSEMBLER_DIALECT == ASM_ATT)
15326 fputs (", ", file);
15327
15328 return;
15329
15330 case '*':
15331 if (ASSEMBLER_DIALECT == ASM_ATT)
15332 putc ('*', file);
15333 return;
15334
15335 case '&':
15336 {
15337 const char *name = get_some_local_dynamic_name ();
15338 if (name == NULL)
15339 output_operand_lossage ("'%%&' used without any "
15340 "local dynamic TLS references");
15341 else
15342 assemble_name (file, name);
15343 return;
15344 }
15345
15346 case '+':
15347 {
15348 rtx x;
15349
15350 if (!optimize
15351 || optimize_function_for_size_p (cfun)
15352 || !TARGET_BRANCH_PREDICTION_HINTS)
15353 return;
15354
15355 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15356 if (x)
15357 {
15358 int pred_val = XINT (x, 0);
15359
15360 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15361 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15362 {
15363 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15364 bool cputaken
15365 = final_forward_branch_p (current_output_insn) == 0;
15366
15367 /* Emit hints only in the case default branch prediction
15368 heuristics would fail. */
15369 if (taken != cputaken)
15370 {
15371 /* We use 3e (DS) prefix for taken branches and
15372 2e (CS) prefix for not taken branches. */
15373 if (taken)
15374 fputs ("ds ; ", file);
15375 else
15376 fputs ("cs ; ", file);
15377 }
15378 }
15379 }
15380 return;
15381 }
15382
15383 case ';':
15384 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15385 putc (';', file);
15386 #endif
15387 return;
15388
15389 case '@':
15390 if (ASSEMBLER_DIALECT == ASM_ATT)
15391 putc ('%', file);
15392
15393 /* The kernel uses a different segment register for performance
15394 reasons; a system call would not have to trash the userspace
15395 segment register, which would be expensive. */
15396 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15397 fputs ("fs", file);
15398 else
15399 fputs ("gs", file);
15400 return;
15401
15402 case '~':
15403 putc (TARGET_AVX2 ? 'i' : 'f', file);
15404 return;
15405
15406 case '^':
15407 if (TARGET_64BIT && Pmode != word_mode)
15408 fputs ("addr32 ", file);
15409 return;
15410
15411 default:
15412 output_operand_lossage ("invalid operand code '%c'", code);
15413 }
15414 }
15415
15416 if (REG_P (x))
15417 print_reg (x, code, file);
15418
15419 else if (MEM_P (x))
15420 {
15421 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15422 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15423 && GET_MODE (x) != BLKmode)
15424 {
15425 const char * size;
15426 switch (GET_MODE_SIZE (GET_MODE (x)))
15427 {
15428 case 1: size = "BYTE"; break;
15429 case 2: size = "WORD"; break;
15430 case 4: size = "DWORD"; break;
15431 case 8: size = "QWORD"; break;
15432 case 12: size = "TBYTE"; break;
15433 case 16:
15434 if (GET_MODE (x) == XFmode)
15435 size = "TBYTE";
15436 else
15437 size = "XMMWORD";
15438 break;
15439 case 32: size = "YMMWORD"; break;
15440 case 64: size = "ZMMWORD"; break;
15441 default:
15442 gcc_unreachable ();
15443 }
15444
15445 /* Check for explicit size override (codes 'b', 'w', 'k',
15446 'q' and 'x') */
15447 if (code == 'b')
15448 size = "BYTE";
15449 else if (code == 'w')
15450 size = "WORD";
15451 else if (code == 'k')
15452 size = "DWORD";
15453 else if (code == 'q')
15454 size = "QWORD";
15455 else if (code == 'x')
15456 size = "XMMWORD";
15457
15458 fputs (size, file);
15459 fputs (" PTR ", file);
15460 }
15461
15462 x = XEXP (x, 0);
15463 /* Avoid (%rip) for call operands. */
15464 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15465 && !CONST_INT_P (x))
15466 output_addr_const (file, x);
15467 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15468 output_operand_lossage ("invalid constraints for operand");
15469 else
15470 output_address (x);
15471 }
15472
15473 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15474 {
15475 REAL_VALUE_TYPE r;
15476 long l;
15477
15478 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15479 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15480
15481 if (ASSEMBLER_DIALECT == ASM_ATT)
15482 putc ('$', file);
15483 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15484 if (code == 'q')
15485 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15486 (unsigned long long) (int) l);
15487 else
15488 fprintf (file, "0x%08x", (unsigned int) l);
15489 }
15490
15491 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15492 {
15493 REAL_VALUE_TYPE r;
15494 long l[2];
15495
15496 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15497 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15498
15499 if (ASSEMBLER_DIALECT == ASM_ATT)
15500 putc ('$', file);
15501 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15502 }
15503
15504 /* These float cases don't actually occur as immediate operands. */
15505 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15506 {
15507 char dstr[30];
15508
15509 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15510 fputs (dstr, file);
15511 }
15512
15513 else
15514 {
15515 /* We have patterns that allow zero sets of memory, for instance.
15516 In 64-bit mode, we should probably support all 8-byte vectors,
15517 since we can in fact encode that into an immediate. */
15518 if (GET_CODE (x) == CONST_VECTOR)
15519 {
15520 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15521 x = const0_rtx;
15522 }
15523
15524 if (code != 'P' && code != 'p')
15525 {
15526 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15527 {
15528 if (ASSEMBLER_DIALECT == ASM_ATT)
15529 putc ('$', file);
15530 }
15531 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15532 || GET_CODE (x) == LABEL_REF)
15533 {
15534 if (ASSEMBLER_DIALECT == ASM_ATT)
15535 putc ('$', file);
15536 else
15537 fputs ("OFFSET FLAT:", file);
15538 }
15539 }
15540 if (CONST_INT_P (x))
15541 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15542 else if (flag_pic || MACHOPIC_INDIRECT)
15543 output_pic_addr_const (file, x, code);
15544 else
15545 output_addr_const (file, x);
15546 }
15547 }
15548
15549 static bool
15550 ix86_print_operand_punct_valid_p (unsigned char code)
15551 {
15552 return (code == '@' || code == '*' || code == '+' || code == '&'
15553 || code == ';' || code == '~' || code == '^');
15554 }
15555 \f
15556 /* Print a memory operand whose address is ADDR. */
15557
15558 static void
15559 ix86_print_operand_address (FILE *file, rtx addr)
15560 {
15561 struct ix86_address parts;
15562 rtx base, index, disp;
15563 int scale;
15564 int ok;
15565 bool vsib = false;
15566 int code = 0;
15567
15568 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15569 {
15570 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15571 gcc_assert (parts.index == NULL_RTX);
15572 parts.index = XVECEXP (addr, 0, 1);
15573 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15574 addr = XVECEXP (addr, 0, 0);
15575 vsib = true;
15576 }
15577 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15578 {
15579 gcc_assert (TARGET_64BIT);
15580 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15581 code = 'q';
15582 }
15583 else
15584 ok = ix86_decompose_address (addr, &parts);
15585
15586 gcc_assert (ok);
15587
15588 base = parts.base;
15589 index = parts.index;
15590 disp = parts.disp;
15591 scale = parts.scale;
15592
15593 switch (parts.seg)
15594 {
15595 case SEG_DEFAULT:
15596 break;
15597 case SEG_FS:
15598 case SEG_GS:
15599 if (ASSEMBLER_DIALECT == ASM_ATT)
15600 putc ('%', file);
15601 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15602 break;
15603 default:
15604 gcc_unreachable ();
15605 }
15606
15607 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15608 if (TARGET_64BIT && !base && !index)
15609 {
15610 rtx symbol = disp;
15611
15612 if (GET_CODE (disp) == CONST
15613 && GET_CODE (XEXP (disp, 0)) == PLUS
15614 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15615 symbol = XEXP (XEXP (disp, 0), 0);
15616
15617 if (GET_CODE (symbol) == LABEL_REF
15618 || (GET_CODE (symbol) == SYMBOL_REF
15619 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15620 base = pc_rtx;
15621 }
15622 if (!base && !index)
15623 {
15624 /* Displacement only requires special attention. */
15625
15626 if (CONST_INT_P (disp))
15627 {
15628 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15629 fputs ("ds:", file);
15630 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15631 }
15632 else if (flag_pic)
15633 output_pic_addr_const (file, disp, 0);
15634 else
15635 output_addr_const (file, disp);
15636 }
15637 else
15638 {
15639 /* Print SImode register names to force addr32 prefix. */
15640 if (SImode_address_operand (addr, VOIDmode))
15641 {
15642 #ifdef ENABLE_CHECKING
15643 gcc_assert (TARGET_64BIT);
15644 switch (GET_CODE (addr))
15645 {
15646 case SUBREG:
15647 gcc_assert (GET_MODE (addr) == SImode);
15648 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15649 break;
15650 case ZERO_EXTEND:
15651 case AND:
15652 gcc_assert (GET_MODE (addr) == DImode);
15653 break;
15654 default:
15655 gcc_unreachable ();
15656 }
15657 #endif
15658 gcc_assert (!code);
15659 code = 'k';
15660 }
15661 else if (code == 0
15662 && TARGET_X32
15663 && disp
15664 && CONST_INT_P (disp)
15665 && INTVAL (disp) < -16*1024*1024)
15666 {
15667 /* X32 runs in 64-bit mode, where displacement, DISP, in
15668 address DISP(%r64), is encoded as 32-bit immediate sign-
15669 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15670 address is %r64 + 0xffffffffbffffd00. When %r64 <
15671 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15672 which is invalid for x32. The correct address is %r64
15673 - 0x40000300 == 0xf7ffdd64. To properly encode
15674 -0x40000300(%r64) for x32, we zero-extend negative
15675 displacement by forcing addr32 prefix which truncates
15676 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15677 zero-extend all negative displacements, including -1(%rsp).
15678 However, for small negative displacements, sign-extension
15679 won't cause overflow. We only zero-extend negative
15680 displacements if they < -16*1024*1024, which is also used
15681 to check legitimate address displacements for PIC. */
15682 code = 'k';
15683 }
15684
15685 if (ASSEMBLER_DIALECT == ASM_ATT)
15686 {
15687 if (disp)
15688 {
15689 if (flag_pic)
15690 output_pic_addr_const (file, disp, 0);
15691 else if (GET_CODE (disp) == LABEL_REF)
15692 output_asm_label (disp);
15693 else
15694 output_addr_const (file, disp);
15695 }
15696
15697 putc ('(', file);
15698 if (base)
15699 print_reg (base, code, file);
15700 if (index)
15701 {
15702 putc (',', file);
15703 print_reg (index, vsib ? 0 : code, file);
15704 if (scale != 1 || vsib)
15705 fprintf (file, ",%d", scale);
15706 }
15707 putc (')', file);
15708 }
15709 else
15710 {
15711 rtx offset = NULL_RTX;
15712
15713 if (disp)
15714 {
15715 /* Pull out the offset of a symbol; print any symbol itself. */
15716 if (GET_CODE (disp) == CONST
15717 && GET_CODE (XEXP (disp, 0)) == PLUS
15718 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15719 {
15720 offset = XEXP (XEXP (disp, 0), 1);
15721 disp = gen_rtx_CONST (VOIDmode,
15722 XEXP (XEXP (disp, 0), 0));
15723 }
15724
15725 if (flag_pic)
15726 output_pic_addr_const (file, disp, 0);
15727 else if (GET_CODE (disp) == LABEL_REF)
15728 output_asm_label (disp);
15729 else if (CONST_INT_P (disp))
15730 offset = disp;
15731 else
15732 output_addr_const (file, disp);
15733 }
15734
15735 putc ('[', file);
15736 if (base)
15737 {
15738 print_reg (base, code, file);
15739 if (offset)
15740 {
15741 if (INTVAL (offset) >= 0)
15742 putc ('+', file);
15743 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15744 }
15745 }
15746 else if (offset)
15747 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15748 else
15749 putc ('0', file);
15750
15751 if (index)
15752 {
15753 putc ('+', file);
15754 print_reg (index, vsib ? 0 : code, file);
15755 if (scale != 1 || vsib)
15756 fprintf (file, "*%d", scale);
15757 }
15758 putc (']', file);
15759 }
15760 }
15761 }
15762
15763 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15764
15765 static bool
15766 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15767 {
15768 rtx op;
15769
15770 if (GET_CODE (x) != UNSPEC)
15771 return false;
15772
15773 op = XVECEXP (x, 0, 0);
15774 switch (XINT (x, 1))
15775 {
15776 case UNSPEC_GOTTPOFF:
15777 output_addr_const (file, op);
15778 /* FIXME: This might be @TPOFF in Sun ld. */
15779 fputs ("@gottpoff", file);
15780 break;
15781 case UNSPEC_TPOFF:
15782 output_addr_const (file, op);
15783 fputs ("@tpoff", file);
15784 break;
15785 case UNSPEC_NTPOFF:
15786 output_addr_const (file, op);
15787 if (TARGET_64BIT)
15788 fputs ("@tpoff", file);
15789 else
15790 fputs ("@ntpoff", file);
15791 break;
15792 case UNSPEC_DTPOFF:
15793 output_addr_const (file, op);
15794 fputs ("@dtpoff", file);
15795 break;
15796 case UNSPEC_GOTNTPOFF:
15797 output_addr_const (file, op);
15798 if (TARGET_64BIT)
15799 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15800 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15801 else
15802 fputs ("@gotntpoff", file);
15803 break;
15804 case UNSPEC_INDNTPOFF:
15805 output_addr_const (file, op);
15806 fputs ("@indntpoff", file);
15807 break;
15808 #if TARGET_MACHO
15809 case UNSPEC_MACHOPIC_OFFSET:
15810 output_addr_const (file, op);
15811 putc ('-', file);
15812 machopic_output_function_base_name (file);
15813 break;
15814 #endif
15815
15816 case UNSPEC_STACK_CHECK:
15817 {
15818 int offset;
15819
15820 gcc_assert (flag_split_stack);
15821
15822 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15823 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15824 #else
15825 gcc_unreachable ();
15826 #endif
15827
15828 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15829 }
15830 break;
15831
15832 default:
15833 return false;
15834 }
15835
15836 return true;
15837 }
15838 \f
15839 /* Split one or more double-mode RTL references into pairs of half-mode
15840 references. The RTL can be REG, offsettable MEM, integer constant, or
15841 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15842 split and "num" is its length. lo_half and hi_half are output arrays
15843 that parallel "operands". */
15844
15845 void
15846 split_double_mode (enum machine_mode mode, rtx operands[],
15847 int num, rtx lo_half[], rtx hi_half[])
15848 {
15849 enum machine_mode half_mode;
15850 unsigned int byte;
15851
15852 switch (mode)
15853 {
15854 case TImode:
15855 half_mode = DImode;
15856 break;
15857 case DImode:
15858 half_mode = SImode;
15859 break;
15860 default:
15861 gcc_unreachable ();
15862 }
15863
15864 byte = GET_MODE_SIZE (half_mode);
15865
15866 while (num--)
15867 {
15868 rtx op = operands[num];
15869
15870 /* simplify_subreg refuse to split volatile memory addresses,
15871 but we still have to handle it. */
15872 if (MEM_P (op))
15873 {
15874 lo_half[num] = adjust_address (op, half_mode, 0);
15875 hi_half[num] = adjust_address (op, half_mode, byte);
15876 }
15877 else
15878 {
15879 lo_half[num] = simplify_gen_subreg (half_mode, op,
15880 GET_MODE (op) == VOIDmode
15881 ? mode : GET_MODE (op), 0);
15882 hi_half[num] = simplify_gen_subreg (half_mode, op,
15883 GET_MODE (op) == VOIDmode
15884 ? mode : GET_MODE (op), byte);
15885 }
15886 }
15887 }
15888 \f
15889 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15890 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15891 is the expression of the binary operation. The output may either be
15892 emitted here, or returned to the caller, like all output_* functions.
15893
15894 There is no guarantee that the operands are the same mode, as they
15895 might be within FLOAT or FLOAT_EXTEND expressions. */
15896
15897 #ifndef SYSV386_COMPAT
15898 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15899 wants to fix the assemblers because that causes incompatibility
15900 with gcc. No-one wants to fix gcc because that causes
15901 incompatibility with assemblers... You can use the option of
15902 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15903 #define SYSV386_COMPAT 1
15904 #endif
15905
15906 const char *
15907 output_387_binary_op (rtx insn, rtx *operands)
15908 {
15909 static char buf[40];
15910 const char *p;
15911 const char *ssep;
15912 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15913
15914 #ifdef ENABLE_CHECKING
15915 /* Even if we do not want to check the inputs, this documents input
15916 constraints. Which helps in understanding the following code. */
15917 if (STACK_REG_P (operands[0])
15918 && ((REG_P (operands[1])
15919 && REGNO (operands[0]) == REGNO (operands[1])
15920 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15921 || (REG_P (operands[2])
15922 && REGNO (operands[0]) == REGNO (operands[2])
15923 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15924 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15925 ; /* ok */
15926 else
15927 gcc_assert (is_sse);
15928 #endif
15929
15930 switch (GET_CODE (operands[3]))
15931 {
15932 case PLUS:
15933 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15934 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15935 p = "fiadd";
15936 else
15937 p = "fadd";
15938 ssep = "vadd";
15939 break;
15940
15941 case MINUS:
15942 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15943 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15944 p = "fisub";
15945 else
15946 p = "fsub";
15947 ssep = "vsub";
15948 break;
15949
15950 case MULT:
15951 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15952 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15953 p = "fimul";
15954 else
15955 p = "fmul";
15956 ssep = "vmul";
15957 break;
15958
15959 case DIV:
15960 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15961 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15962 p = "fidiv";
15963 else
15964 p = "fdiv";
15965 ssep = "vdiv";
15966 break;
15967
15968 default:
15969 gcc_unreachable ();
15970 }
15971
15972 if (is_sse)
15973 {
15974 if (TARGET_AVX)
15975 {
15976 strcpy (buf, ssep);
15977 if (GET_MODE (operands[0]) == SFmode)
15978 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15979 else
15980 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15981 }
15982 else
15983 {
15984 strcpy (buf, ssep + 1);
15985 if (GET_MODE (operands[0]) == SFmode)
15986 strcat (buf, "ss\t{%2, %0|%0, %2}");
15987 else
15988 strcat (buf, "sd\t{%2, %0|%0, %2}");
15989 }
15990 return buf;
15991 }
15992 strcpy (buf, p);
15993
15994 switch (GET_CODE (operands[3]))
15995 {
15996 case MULT:
15997 case PLUS:
15998 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15999 {
16000 rtx temp = operands[2];
16001 operands[2] = operands[1];
16002 operands[1] = temp;
16003 }
16004
16005 /* know operands[0] == operands[1]. */
16006
16007 if (MEM_P (operands[2]))
16008 {
16009 p = "%Z2\t%2";
16010 break;
16011 }
16012
16013 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16014 {
16015 if (STACK_TOP_P (operands[0]))
16016 /* How is it that we are storing to a dead operand[2]?
16017 Well, presumably operands[1] is dead too. We can't
16018 store the result to st(0) as st(0) gets popped on this
16019 instruction. Instead store to operands[2] (which I
16020 think has to be st(1)). st(1) will be popped later.
16021 gcc <= 2.8.1 didn't have this check and generated
16022 assembly code that the Unixware assembler rejected. */
16023 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16024 else
16025 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16026 break;
16027 }
16028
16029 if (STACK_TOP_P (operands[0]))
16030 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16031 else
16032 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16033 break;
16034
16035 case MINUS:
16036 case DIV:
16037 if (MEM_P (operands[1]))
16038 {
16039 p = "r%Z1\t%1";
16040 break;
16041 }
16042
16043 if (MEM_P (operands[2]))
16044 {
16045 p = "%Z2\t%2";
16046 break;
16047 }
16048
16049 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16050 {
16051 #if SYSV386_COMPAT
16052 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16053 derived assemblers, confusingly reverse the direction of
16054 the operation for fsub{r} and fdiv{r} when the
16055 destination register is not st(0). The Intel assembler
16056 doesn't have this brain damage. Read !SYSV386_COMPAT to
16057 figure out what the hardware really does. */
16058 if (STACK_TOP_P (operands[0]))
16059 p = "{p\t%0, %2|rp\t%2, %0}";
16060 else
16061 p = "{rp\t%2, %0|p\t%0, %2}";
16062 #else
16063 if (STACK_TOP_P (operands[0]))
16064 /* As above for fmul/fadd, we can't store to st(0). */
16065 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16066 else
16067 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16068 #endif
16069 break;
16070 }
16071
16072 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16073 {
16074 #if SYSV386_COMPAT
16075 if (STACK_TOP_P (operands[0]))
16076 p = "{rp\t%0, %1|p\t%1, %0}";
16077 else
16078 p = "{p\t%1, %0|rp\t%0, %1}";
16079 #else
16080 if (STACK_TOP_P (operands[0]))
16081 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16082 else
16083 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16084 #endif
16085 break;
16086 }
16087
16088 if (STACK_TOP_P (operands[0]))
16089 {
16090 if (STACK_TOP_P (operands[1]))
16091 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16092 else
16093 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16094 break;
16095 }
16096 else if (STACK_TOP_P (operands[1]))
16097 {
16098 #if SYSV386_COMPAT
16099 p = "{\t%1, %0|r\t%0, %1}";
16100 #else
16101 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16102 #endif
16103 }
16104 else
16105 {
16106 #if SYSV386_COMPAT
16107 p = "{r\t%2, %0|\t%0, %2}";
16108 #else
16109 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16110 #endif
16111 }
16112 break;
16113
16114 default:
16115 gcc_unreachable ();
16116 }
16117
16118 strcat (buf, p);
16119 return buf;
16120 }
16121
16122 /* Check if a 256bit AVX register is referenced inside of EXP. */
16123
16124 static int
16125 ix86_check_avx256_register (rtx *pexp, void *)
16126 {
16127 rtx exp = *pexp;
16128
16129 if (GET_CODE (exp) == SUBREG)
16130 exp = SUBREG_REG (exp);
16131
16132 if (REG_P (exp)
16133 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16134 return 1;
16135
16136 return 0;
16137 }
16138
16139 /* Return needed mode for entity in optimize_mode_switching pass. */
16140
16141 static int
16142 ix86_avx_u128_mode_needed (rtx_insn *insn)
16143 {
16144 if (CALL_P (insn))
16145 {
16146 rtx link;
16147
16148 /* Needed mode is set to AVX_U128_CLEAN if there are
16149 no 256bit modes used in function arguments. */
16150 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16151 link;
16152 link = XEXP (link, 1))
16153 {
16154 if (GET_CODE (XEXP (link, 0)) == USE)
16155 {
16156 rtx arg = XEXP (XEXP (link, 0), 0);
16157
16158 if (ix86_check_avx256_register (&arg, NULL))
16159 return AVX_U128_DIRTY;
16160 }
16161 }
16162
16163 return AVX_U128_CLEAN;
16164 }
16165
16166 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16167 changes state only when a 256bit register is written to, but we need
16168 to prevent the compiler from moving optimal insertion point above
16169 eventual read from 256bit register. */
16170 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16171 return AVX_U128_DIRTY;
16172
16173 return AVX_U128_ANY;
16174 }
16175
16176 /* Return mode that i387 must be switched into
16177 prior to the execution of insn. */
16178
16179 static int
16180 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16181 {
16182 enum attr_i387_cw mode;
16183
16184 /* The mode UNINITIALIZED is used to store control word after a
16185 function call or ASM pattern. The mode ANY specify that function
16186 has no requirements on the control word and make no changes in the
16187 bits we are interested in. */
16188
16189 if (CALL_P (insn)
16190 || (NONJUMP_INSN_P (insn)
16191 && (asm_noperands (PATTERN (insn)) >= 0
16192 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16193 return I387_CW_UNINITIALIZED;
16194
16195 if (recog_memoized (insn) < 0)
16196 return I387_CW_ANY;
16197
16198 mode = get_attr_i387_cw (insn);
16199
16200 switch (entity)
16201 {
16202 case I387_TRUNC:
16203 if (mode == I387_CW_TRUNC)
16204 return mode;
16205 break;
16206
16207 case I387_FLOOR:
16208 if (mode == I387_CW_FLOOR)
16209 return mode;
16210 break;
16211
16212 case I387_CEIL:
16213 if (mode == I387_CW_CEIL)
16214 return mode;
16215 break;
16216
16217 case I387_MASK_PM:
16218 if (mode == I387_CW_MASK_PM)
16219 return mode;
16220 break;
16221
16222 default:
16223 gcc_unreachable ();
16224 }
16225
16226 return I387_CW_ANY;
16227 }
16228
16229 /* Return mode that entity must be switched into
16230 prior to the execution of insn. */
16231
16232 static int
16233 ix86_mode_needed (int entity, rtx_insn *insn)
16234 {
16235 switch (entity)
16236 {
16237 case AVX_U128:
16238 return ix86_avx_u128_mode_needed (insn);
16239 case I387_TRUNC:
16240 case I387_FLOOR:
16241 case I387_CEIL:
16242 case I387_MASK_PM:
16243 return ix86_i387_mode_needed (entity, insn);
16244 default:
16245 gcc_unreachable ();
16246 }
16247 return 0;
16248 }
16249
16250 /* Check if a 256bit AVX register is referenced in stores. */
16251
16252 static void
16253 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16254 {
16255 if (ix86_check_avx256_register (&dest, NULL))
16256 {
16257 bool *used = (bool *) data;
16258 *used = true;
16259 }
16260 }
16261
16262 /* Calculate mode of upper 128bit AVX registers after the insn. */
16263
16264 static int
16265 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16266 {
16267 rtx pat = PATTERN (insn);
16268
16269 if (vzeroupper_operation (pat, VOIDmode)
16270 || vzeroall_operation (pat, VOIDmode))
16271 return AVX_U128_CLEAN;
16272
16273 /* We know that state is clean after CALL insn if there are no
16274 256bit registers used in the function return register. */
16275 if (CALL_P (insn))
16276 {
16277 bool avx_reg256_found = false;
16278 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16279
16280 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16281 }
16282
16283 /* Otherwise, return current mode. Remember that if insn
16284 references AVX 256bit registers, the mode was already changed
16285 to DIRTY from MODE_NEEDED. */
16286 return mode;
16287 }
16288
16289 /* Return the mode that an insn results in. */
16290
16291 int
16292 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16293 {
16294 switch (entity)
16295 {
16296 case AVX_U128:
16297 return ix86_avx_u128_mode_after (mode, insn);
16298 case I387_TRUNC:
16299 case I387_FLOOR:
16300 case I387_CEIL:
16301 case I387_MASK_PM:
16302 return mode;
16303 default:
16304 gcc_unreachable ();
16305 }
16306 }
16307
16308 static int
16309 ix86_avx_u128_mode_entry (void)
16310 {
16311 tree arg;
16312
16313 /* Entry mode is set to AVX_U128_DIRTY if there are
16314 256bit modes used in function arguments. */
16315 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16316 arg = TREE_CHAIN (arg))
16317 {
16318 rtx incoming = DECL_INCOMING_RTL (arg);
16319
16320 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16321 return AVX_U128_DIRTY;
16322 }
16323
16324 return AVX_U128_CLEAN;
16325 }
16326
16327 /* Return a mode that ENTITY is assumed to be
16328 switched to at function entry. */
16329
16330 static int
16331 ix86_mode_entry (int entity)
16332 {
16333 switch (entity)
16334 {
16335 case AVX_U128:
16336 return ix86_avx_u128_mode_entry ();
16337 case I387_TRUNC:
16338 case I387_FLOOR:
16339 case I387_CEIL:
16340 case I387_MASK_PM:
16341 return I387_CW_ANY;
16342 default:
16343 gcc_unreachable ();
16344 }
16345 }
16346
16347 static int
16348 ix86_avx_u128_mode_exit (void)
16349 {
16350 rtx reg = crtl->return_rtx;
16351
16352 /* Exit mode is set to AVX_U128_DIRTY if there are
16353 256bit modes used in the function return register. */
16354 if (reg && ix86_check_avx256_register (&reg, NULL))
16355 return AVX_U128_DIRTY;
16356
16357 return AVX_U128_CLEAN;
16358 }
16359
16360 /* Return a mode that ENTITY is assumed to be
16361 switched to at function exit. */
16362
16363 static int
16364 ix86_mode_exit (int entity)
16365 {
16366 switch (entity)
16367 {
16368 case AVX_U128:
16369 return ix86_avx_u128_mode_exit ();
16370 case I387_TRUNC:
16371 case I387_FLOOR:
16372 case I387_CEIL:
16373 case I387_MASK_PM:
16374 return I387_CW_ANY;
16375 default:
16376 gcc_unreachable ();
16377 }
16378 }
16379
16380 static int
16381 ix86_mode_priority (int, int n)
16382 {
16383 return n;
16384 }
16385
16386 /* Output code to initialize control word copies used by trunc?f?i and
16387 rounding patterns. CURRENT_MODE is set to current control word,
16388 while NEW_MODE is set to new control word. */
16389
16390 static void
16391 emit_i387_cw_initialization (int mode)
16392 {
16393 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16394 rtx new_mode;
16395
16396 enum ix86_stack_slot slot;
16397
16398 rtx reg = gen_reg_rtx (HImode);
16399
16400 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16401 emit_move_insn (reg, copy_rtx (stored_mode));
16402
16403 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16404 || optimize_insn_for_size_p ())
16405 {
16406 switch (mode)
16407 {
16408 case I387_CW_TRUNC:
16409 /* round toward zero (truncate) */
16410 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16411 slot = SLOT_CW_TRUNC;
16412 break;
16413
16414 case I387_CW_FLOOR:
16415 /* round down toward -oo */
16416 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16417 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16418 slot = SLOT_CW_FLOOR;
16419 break;
16420
16421 case I387_CW_CEIL:
16422 /* round up toward +oo */
16423 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16424 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16425 slot = SLOT_CW_CEIL;
16426 break;
16427
16428 case I387_CW_MASK_PM:
16429 /* mask precision exception for nearbyint() */
16430 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16431 slot = SLOT_CW_MASK_PM;
16432 break;
16433
16434 default:
16435 gcc_unreachable ();
16436 }
16437 }
16438 else
16439 {
16440 switch (mode)
16441 {
16442 case I387_CW_TRUNC:
16443 /* round toward zero (truncate) */
16444 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16445 slot = SLOT_CW_TRUNC;
16446 break;
16447
16448 case I387_CW_FLOOR:
16449 /* round down toward -oo */
16450 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16451 slot = SLOT_CW_FLOOR;
16452 break;
16453
16454 case I387_CW_CEIL:
16455 /* round up toward +oo */
16456 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16457 slot = SLOT_CW_CEIL;
16458 break;
16459
16460 case I387_CW_MASK_PM:
16461 /* mask precision exception for nearbyint() */
16462 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16463 slot = SLOT_CW_MASK_PM;
16464 break;
16465
16466 default:
16467 gcc_unreachable ();
16468 }
16469 }
16470
16471 gcc_assert (slot < MAX_386_STACK_LOCALS);
16472
16473 new_mode = assign_386_stack_local (HImode, slot);
16474 emit_move_insn (new_mode, reg);
16475 }
16476
16477 /* Emit vzeroupper. */
16478
16479 void
16480 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16481 {
16482 int i;
16483
16484 /* Cancel automatic vzeroupper insertion if there are
16485 live call-saved SSE registers at the insertion point. */
16486
16487 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16488 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16489 return;
16490
16491 if (TARGET_64BIT)
16492 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16493 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16494 return;
16495
16496 emit_insn (gen_avx_vzeroupper ());
16497 }
16498
16499 /* Generate one or more insns to set ENTITY to MODE. */
16500
16501 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16502 is the set of hard registers live at the point where the insn(s)
16503 are to be inserted. */
16504
16505 static void
16506 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16507 HARD_REG_SET regs_live)
16508 {
16509 switch (entity)
16510 {
16511 case AVX_U128:
16512 if (mode == AVX_U128_CLEAN)
16513 ix86_avx_emit_vzeroupper (regs_live);
16514 break;
16515 case I387_TRUNC:
16516 case I387_FLOOR:
16517 case I387_CEIL:
16518 case I387_MASK_PM:
16519 if (mode != I387_CW_ANY
16520 && mode != I387_CW_UNINITIALIZED)
16521 emit_i387_cw_initialization (mode);
16522 break;
16523 default:
16524 gcc_unreachable ();
16525 }
16526 }
16527
16528 /* Output code for INSN to convert a float to a signed int. OPERANDS
16529 are the insn operands. The output may be [HSD]Imode and the input
16530 operand may be [SDX]Fmode. */
16531
16532 const char *
16533 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16534 {
16535 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16536 int dimode_p = GET_MODE (operands[0]) == DImode;
16537 int round_mode = get_attr_i387_cw (insn);
16538
16539 /* Jump through a hoop or two for DImode, since the hardware has no
16540 non-popping instruction. We used to do this a different way, but
16541 that was somewhat fragile and broke with post-reload splitters. */
16542 if ((dimode_p || fisttp) && !stack_top_dies)
16543 output_asm_insn ("fld\t%y1", operands);
16544
16545 gcc_assert (STACK_TOP_P (operands[1]));
16546 gcc_assert (MEM_P (operands[0]));
16547 gcc_assert (GET_MODE (operands[1]) != TFmode);
16548
16549 if (fisttp)
16550 output_asm_insn ("fisttp%Z0\t%0", operands);
16551 else
16552 {
16553 if (round_mode != I387_CW_ANY)
16554 output_asm_insn ("fldcw\t%3", operands);
16555 if (stack_top_dies || dimode_p)
16556 output_asm_insn ("fistp%Z0\t%0", operands);
16557 else
16558 output_asm_insn ("fist%Z0\t%0", operands);
16559 if (round_mode != I387_CW_ANY)
16560 output_asm_insn ("fldcw\t%2", operands);
16561 }
16562
16563 return "";
16564 }
16565
16566 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16567 have the values zero or one, indicates the ffreep insn's operand
16568 from the OPERANDS array. */
16569
16570 static const char *
16571 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16572 {
16573 if (TARGET_USE_FFREEP)
16574 #ifdef HAVE_AS_IX86_FFREEP
16575 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16576 #else
16577 {
16578 static char retval[32];
16579 int regno = REGNO (operands[opno]);
16580
16581 gcc_assert (STACK_REGNO_P (regno));
16582
16583 regno -= FIRST_STACK_REG;
16584
16585 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16586 return retval;
16587 }
16588 #endif
16589
16590 return opno ? "fstp\t%y1" : "fstp\t%y0";
16591 }
16592
16593
16594 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16595 should be used. UNORDERED_P is true when fucom should be used. */
16596
16597 const char *
16598 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16599 {
16600 int stack_top_dies;
16601 rtx cmp_op0, cmp_op1;
16602 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16603
16604 if (eflags_p)
16605 {
16606 cmp_op0 = operands[0];
16607 cmp_op1 = operands[1];
16608 }
16609 else
16610 {
16611 cmp_op0 = operands[1];
16612 cmp_op1 = operands[2];
16613 }
16614
16615 if (is_sse)
16616 {
16617 if (GET_MODE (operands[0]) == SFmode)
16618 if (unordered_p)
16619 return "%vucomiss\t{%1, %0|%0, %1}";
16620 else
16621 return "%vcomiss\t{%1, %0|%0, %1}";
16622 else
16623 if (unordered_p)
16624 return "%vucomisd\t{%1, %0|%0, %1}";
16625 else
16626 return "%vcomisd\t{%1, %0|%0, %1}";
16627 }
16628
16629 gcc_assert (STACK_TOP_P (cmp_op0));
16630
16631 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16632
16633 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16634 {
16635 if (stack_top_dies)
16636 {
16637 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16638 return output_387_ffreep (operands, 1);
16639 }
16640 else
16641 return "ftst\n\tfnstsw\t%0";
16642 }
16643
16644 if (STACK_REG_P (cmp_op1)
16645 && stack_top_dies
16646 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16647 && REGNO (cmp_op1) != FIRST_STACK_REG)
16648 {
16649 /* If both the top of the 387 stack dies, and the other operand
16650 is also a stack register that dies, then this must be a
16651 `fcompp' float compare */
16652
16653 if (eflags_p)
16654 {
16655 /* There is no double popping fcomi variant. Fortunately,
16656 eflags is immune from the fstp's cc clobbering. */
16657 if (unordered_p)
16658 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16659 else
16660 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16661 return output_387_ffreep (operands, 0);
16662 }
16663 else
16664 {
16665 if (unordered_p)
16666 return "fucompp\n\tfnstsw\t%0";
16667 else
16668 return "fcompp\n\tfnstsw\t%0";
16669 }
16670 }
16671 else
16672 {
16673 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16674
16675 static const char * const alt[16] =
16676 {
16677 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16678 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16679 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16680 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16681
16682 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16683 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16684 NULL,
16685 NULL,
16686
16687 "fcomi\t{%y1, %0|%0, %y1}",
16688 "fcomip\t{%y1, %0|%0, %y1}",
16689 "fucomi\t{%y1, %0|%0, %y1}",
16690 "fucomip\t{%y1, %0|%0, %y1}",
16691
16692 NULL,
16693 NULL,
16694 NULL,
16695 NULL
16696 };
16697
16698 int mask;
16699 const char *ret;
16700
16701 mask = eflags_p << 3;
16702 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16703 mask |= unordered_p << 1;
16704 mask |= stack_top_dies;
16705
16706 gcc_assert (mask < 16);
16707 ret = alt[mask];
16708 gcc_assert (ret);
16709
16710 return ret;
16711 }
16712 }
16713
16714 void
16715 ix86_output_addr_vec_elt (FILE *file, int value)
16716 {
16717 const char *directive = ASM_LONG;
16718
16719 #ifdef ASM_QUAD
16720 if (TARGET_LP64)
16721 directive = ASM_QUAD;
16722 #else
16723 gcc_assert (!TARGET_64BIT);
16724 #endif
16725
16726 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16727 }
16728
16729 void
16730 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16731 {
16732 const char *directive = ASM_LONG;
16733
16734 #ifdef ASM_QUAD
16735 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16736 directive = ASM_QUAD;
16737 #else
16738 gcc_assert (!TARGET_64BIT);
16739 #endif
16740 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16741 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16742 fprintf (file, "%s%s%d-%s%d\n",
16743 directive, LPREFIX, value, LPREFIX, rel);
16744 else if (HAVE_AS_GOTOFF_IN_DATA)
16745 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16746 #if TARGET_MACHO
16747 else if (TARGET_MACHO)
16748 {
16749 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16750 machopic_output_function_base_name (file);
16751 putc ('\n', file);
16752 }
16753 #endif
16754 else
16755 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16756 GOT_SYMBOL_NAME, LPREFIX, value);
16757 }
16758 \f
16759 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16760 for the target. */
16761
16762 void
16763 ix86_expand_clear (rtx dest)
16764 {
16765 rtx tmp;
16766
16767 /* We play register width games, which are only valid after reload. */
16768 gcc_assert (reload_completed);
16769
16770 /* Avoid HImode and its attendant prefix byte. */
16771 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16772 dest = gen_rtx_REG (SImode, REGNO (dest));
16773 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16774
16775 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16776 {
16777 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16778 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16779 }
16780
16781 emit_insn (tmp);
16782 }
16783
16784 /* X is an unchanging MEM. If it is a constant pool reference, return
16785 the constant pool rtx, else NULL. */
16786
16787 rtx
16788 maybe_get_pool_constant (rtx x)
16789 {
16790 x = ix86_delegitimize_address (XEXP (x, 0));
16791
16792 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16793 return get_pool_constant (x);
16794
16795 return NULL_RTX;
16796 }
16797
16798 void
16799 ix86_expand_move (enum machine_mode mode, rtx operands[])
16800 {
16801 rtx op0, op1;
16802 enum tls_model model;
16803
16804 op0 = operands[0];
16805 op1 = operands[1];
16806
16807 if (GET_CODE (op1) == SYMBOL_REF)
16808 {
16809 rtx tmp;
16810
16811 model = SYMBOL_REF_TLS_MODEL (op1);
16812 if (model)
16813 {
16814 op1 = legitimize_tls_address (op1, model, true);
16815 op1 = force_operand (op1, op0);
16816 if (op1 == op0)
16817 return;
16818 op1 = convert_to_mode (mode, op1, 1);
16819 }
16820 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16821 op1 = tmp;
16822 }
16823 else if (GET_CODE (op1) == CONST
16824 && GET_CODE (XEXP (op1, 0)) == PLUS
16825 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16826 {
16827 rtx addend = XEXP (XEXP (op1, 0), 1);
16828 rtx symbol = XEXP (XEXP (op1, 0), 0);
16829 rtx tmp;
16830
16831 model = SYMBOL_REF_TLS_MODEL (symbol);
16832 if (model)
16833 tmp = legitimize_tls_address (symbol, model, true);
16834 else
16835 tmp = legitimize_pe_coff_symbol (symbol, true);
16836
16837 if (tmp)
16838 {
16839 tmp = force_operand (tmp, NULL);
16840 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16841 op0, 1, OPTAB_DIRECT);
16842 if (tmp == op0)
16843 return;
16844 op1 = convert_to_mode (mode, tmp, 1);
16845 }
16846 }
16847
16848 if ((flag_pic || MACHOPIC_INDIRECT)
16849 && symbolic_operand (op1, mode))
16850 {
16851 if (TARGET_MACHO && !TARGET_64BIT)
16852 {
16853 #if TARGET_MACHO
16854 /* dynamic-no-pic */
16855 if (MACHOPIC_INDIRECT)
16856 {
16857 rtx temp = ((reload_in_progress
16858 || ((op0 && REG_P (op0))
16859 && mode == Pmode))
16860 ? op0 : gen_reg_rtx (Pmode));
16861 op1 = machopic_indirect_data_reference (op1, temp);
16862 if (MACHOPIC_PURE)
16863 op1 = machopic_legitimize_pic_address (op1, mode,
16864 temp == op1 ? 0 : temp);
16865 }
16866 if (op0 != op1 && GET_CODE (op0) != MEM)
16867 {
16868 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16869 emit_insn (insn);
16870 return;
16871 }
16872 if (GET_CODE (op0) == MEM)
16873 op1 = force_reg (Pmode, op1);
16874 else
16875 {
16876 rtx temp = op0;
16877 if (GET_CODE (temp) != REG)
16878 temp = gen_reg_rtx (Pmode);
16879 temp = legitimize_pic_address (op1, temp);
16880 if (temp == op0)
16881 return;
16882 op1 = temp;
16883 }
16884 /* dynamic-no-pic */
16885 #endif
16886 }
16887 else
16888 {
16889 if (MEM_P (op0))
16890 op1 = force_reg (mode, op1);
16891 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16892 {
16893 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16894 op1 = legitimize_pic_address (op1, reg);
16895 if (op0 == op1)
16896 return;
16897 op1 = convert_to_mode (mode, op1, 1);
16898 }
16899 }
16900 }
16901 else
16902 {
16903 if (MEM_P (op0)
16904 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16905 || !push_operand (op0, mode))
16906 && MEM_P (op1))
16907 op1 = force_reg (mode, op1);
16908
16909 if (push_operand (op0, mode)
16910 && ! general_no_elim_operand (op1, mode))
16911 op1 = copy_to_mode_reg (mode, op1);
16912
16913 /* Force large constants in 64bit compilation into register
16914 to get them CSEed. */
16915 if (can_create_pseudo_p ()
16916 && (mode == DImode) && TARGET_64BIT
16917 && immediate_operand (op1, mode)
16918 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16919 && !register_operand (op0, mode)
16920 && optimize)
16921 op1 = copy_to_mode_reg (mode, op1);
16922
16923 if (can_create_pseudo_p ()
16924 && FLOAT_MODE_P (mode)
16925 && GET_CODE (op1) == CONST_DOUBLE)
16926 {
16927 /* If we are loading a floating point constant to a register,
16928 force the value to memory now, since we'll get better code
16929 out the back end. */
16930
16931 op1 = validize_mem (force_const_mem (mode, op1));
16932 if (!register_operand (op0, mode))
16933 {
16934 rtx temp = gen_reg_rtx (mode);
16935 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16936 emit_move_insn (op0, temp);
16937 return;
16938 }
16939 }
16940 }
16941
16942 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16943 }
16944
16945 void
16946 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16947 {
16948 rtx op0 = operands[0], op1 = operands[1];
16949 unsigned int align = GET_MODE_ALIGNMENT (mode);
16950
16951 if (push_operand (op0, VOIDmode))
16952 op0 = emit_move_resolve_push (mode, op0);
16953
16954 /* Force constants other than zero into memory. We do not know how
16955 the instructions used to build constants modify the upper 64 bits
16956 of the register, once we have that information we may be able
16957 to handle some of them more efficiently. */
16958 if (can_create_pseudo_p ()
16959 && register_operand (op0, mode)
16960 && (CONSTANT_P (op1)
16961 || (GET_CODE (op1) == SUBREG
16962 && CONSTANT_P (SUBREG_REG (op1))))
16963 && !standard_sse_constant_p (op1))
16964 op1 = validize_mem (force_const_mem (mode, op1));
16965
16966 /* We need to check memory alignment for SSE mode since attribute
16967 can make operands unaligned. */
16968 if (can_create_pseudo_p ()
16969 && SSE_REG_MODE_P (mode)
16970 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16971 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16972 {
16973 rtx tmp[2];
16974
16975 /* ix86_expand_vector_move_misalign() does not like constants ... */
16976 if (CONSTANT_P (op1)
16977 || (GET_CODE (op1) == SUBREG
16978 && CONSTANT_P (SUBREG_REG (op1))))
16979 op1 = validize_mem (force_const_mem (mode, op1));
16980
16981 /* ... nor both arguments in memory. */
16982 if (!register_operand (op0, mode)
16983 && !register_operand (op1, mode))
16984 op1 = force_reg (mode, op1);
16985
16986 tmp[0] = op0; tmp[1] = op1;
16987 ix86_expand_vector_move_misalign (mode, tmp);
16988 return;
16989 }
16990
16991 /* Make operand1 a register if it isn't already. */
16992 if (can_create_pseudo_p ()
16993 && !register_operand (op0, mode)
16994 && !register_operand (op1, mode))
16995 {
16996 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16997 return;
16998 }
16999
17000 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17001 }
17002
17003 /* Split 32-byte AVX unaligned load and store if needed. */
17004
17005 static void
17006 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17007 {
17008 rtx m;
17009 rtx (*extract) (rtx, rtx, rtx);
17010 rtx (*load_unaligned) (rtx, rtx);
17011 rtx (*store_unaligned) (rtx, rtx);
17012 enum machine_mode mode;
17013
17014 switch (GET_MODE (op0))
17015 {
17016 default:
17017 gcc_unreachable ();
17018 case V32QImode:
17019 extract = gen_avx_vextractf128v32qi;
17020 load_unaligned = gen_avx_loaddquv32qi;
17021 store_unaligned = gen_avx_storedquv32qi;
17022 mode = V16QImode;
17023 break;
17024 case V8SFmode:
17025 extract = gen_avx_vextractf128v8sf;
17026 load_unaligned = gen_avx_loadups256;
17027 store_unaligned = gen_avx_storeups256;
17028 mode = V4SFmode;
17029 break;
17030 case V4DFmode:
17031 extract = gen_avx_vextractf128v4df;
17032 load_unaligned = gen_avx_loadupd256;
17033 store_unaligned = gen_avx_storeupd256;
17034 mode = V2DFmode;
17035 break;
17036 }
17037
17038 if (MEM_P (op1))
17039 {
17040 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17041 {
17042 rtx r = gen_reg_rtx (mode);
17043 m = adjust_address (op1, mode, 0);
17044 emit_move_insn (r, m);
17045 m = adjust_address (op1, mode, 16);
17046 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17047 emit_move_insn (op0, r);
17048 }
17049 /* Normal *mov<mode>_internal pattern will handle
17050 unaligned loads just fine if misaligned_operand
17051 is true, and without the UNSPEC it can be combined
17052 with arithmetic instructions. */
17053 else if (misaligned_operand (op1, GET_MODE (op1)))
17054 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17055 else
17056 emit_insn (load_unaligned (op0, op1));
17057 }
17058 else if (MEM_P (op0))
17059 {
17060 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17061 {
17062 m = adjust_address (op0, mode, 0);
17063 emit_insn (extract (m, op1, const0_rtx));
17064 m = adjust_address (op0, mode, 16);
17065 emit_insn (extract (m, op1, const1_rtx));
17066 }
17067 else
17068 emit_insn (store_unaligned (op0, op1));
17069 }
17070 else
17071 gcc_unreachable ();
17072 }
17073
17074 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17075 straight to ix86_expand_vector_move. */
17076 /* Code generation for scalar reg-reg moves of single and double precision data:
17077 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17078 movaps reg, reg
17079 else
17080 movss reg, reg
17081 if (x86_sse_partial_reg_dependency == true)
17082 movapd reg, reg
17083 else
17084 movsd reg, reg
17085
17086 Code generation for scalar loads of double precision data:
17087 if (x86_sse_split_regs == true)
17088 movlpd mem, reg (gas syntax)
17089 else
17090 movsd mem, reg
17091
17092 Code generation for unaligned packed loads of single precision data
17093 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17094 if (x86_sse_unaligned_move_optimal)
17095 movups mem, reg
17096
17097 if (x86_sse_partial_reg_dependency == true)
17098 {
17099 xorps reg, reg
17100 movlps mem, reg
17101 movhps mem+8, reg
17102 }
17103 else
17104 {
17105 movlps mem, reg
17106 movhps mem+8, reg
17107 }
17108
17109 Code generation for unaligned packed loads of double precision data
17110 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17111 if (x86_sse_unaligned_move_optimal)
17112 movupd mem, reg
17113
17114 if (x86_sse_split_regs == true)
17115 {
17116 movlpd mem, reg
17117 movhpd mem+8, reg
17118 }
17119 else
17120 {
17121 movsd mem, reg
17122 movhpd mem+8, reg
17123 }
17124 */
17125
17126 void
17127 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17128 {
17129 rtx op0, op1, orig_op0 = NULL_RTX, m;
17130 rtx (*load_unaligned) (rtx, rtx);
17131 rtx (*store_unaligned) (rtx, rtx);
17132
17133 op0 = operands[0];
17134 op1 = operands[1];
17135
17136 if (GET_MODE_SIZE (mode) == 64)
17137 {
17138 switch (GET_MODE_CLASS (mode))
17139 {
17140 case MODE_VECTOR_INT:
17141 case MODE_INT:
17142 if (GET_MODE (op0) != V16SImode)
17143 {
17144 if (!MEM_P (op0))
17145 {
17146 orig_op0 = op0;
17147 op0 = gen_reg_rtx (V16SImode);
17148 }
17149 else
17150 op0 = gen_lowpart (V16SImode, op0);
17151 }
17152 op1 = gen_lowpart (V16SImode, op1);
17153 /* FALLTHRU */
17154
17155 case MODE_VECTOR_FLOAT:
17156 switch (GET_MODE (op0))
17157 {
17158 default:
17159 gcc_unreachable ();
17160 case V16SImode:
17161 load_unaligned = gen_avx512f_loaddquv16si;
17162 store_unaligned = gen_avx512f_storedquv16si;
17163 break;
17164 case V16SFmode:
17165 load_unaligned = gen_avx512f_loadups512;
17166 store_unaligned = gen_avx512f_storeups512;
17167 break;
17168 case V8DFmode:
17169 load_unaligned = gen_avx512f_loadupd512;
17170 store_unaligned = gen_avx512f_storeupd512;
17171 break;
17172 }
17173
17174 if (MEM_P (op1))
17175 emit_insn (load_unaligned (op0, op1));
17176 else if (MEM_P (op0))
17177 emit_insn (store_unaligned (op0, op1));
17178 else
17179 gcc_unreachable ();
17180 if (orig_op0)
17181 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17182 break;
17183
17184 default:
17185 gcc_unreachable ();
17186 }
17187
17188 return;
17189 }
17190
17191 if (TARGET_AVX
17192 && GET_MODE_SIZE (mode) == 32)
17193 {
17194 switch (GET_MODE_CLASS (mode))
17195 {
17196 case MODE_VECTOR_INT:
17197 case MODE_INT:
17198 if (GET_MODE (op0) != V32QImode)
17199 {
17200 if (!MEM_P (op0))
17201 {
17202 orig_op0 = op0;
17203 op0 = gen_reg_rtx (V32QImode);
17204 }
17205 else
17206 op0 = gen_lowpart (V32QImode, op0);
17207 }
17208 op1 = gen_lowpart (V32QImode, op1);
17209 /* FALLTHRU */
17210
17211 case MODE_VECTOR_FLOAT:
17212 ix86_avx256_split_vector_move_misalign (op0, op1);
17213 if (orig_op0)
17214 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17215 break;
17216
17217 default:
17218 gcc_unreachable ();
17219 }
17220
17221 return;
17222 }
17223
17224 if (MEM_P (op1))
17225 {
17226 /* Normal *mov<mode>_internal pattern will handle
17227 unaligned loads just fine if misaligned_operand
17228 is true, and without the UNSPEC it can be combined
17229 with arithmetic instructions. */
17230 if (TARGET_AVX
17231 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17232 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17233 && misaligned_operand (op1, GET_MODE (op1)))
17234 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17235 /* ??? If we have typed data, then it would appear that using
17236 movdqu is the only way to get unaligned data loaded with
17237 integer type. */
17238 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17239 {
17240 if (GET_MODE (op0) != V16QImode)
17241 {
17242 orig_op0 = op0;
17243 op0 = gen_reg_rtx (V16QImode);
17244 }
17245 op1 = gen_lowpart (V16QImode, op1);
17246 /* We will eventually emit movups based on insn attributes. */
17247 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17248 if (orig_op0)
17249 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17250 }
17251 else if (TARGET_SSE2 && mode == V2DFmode)
17252 {
17253 rtx zero;
17254
17255 if (TARGET_AVX
17256 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17257 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17258 || optimize_insn_for_size_p ())
17259 {
17260 /* We will eventually emit movups based on insn attributes. */
17261 emit_insn (gen_sse2_loadupd (op0, op1));
17262 return;
17263 }
17264
17265 /* When SSE registers are split into halves, we can avoid
17266 writing to the top half twice. */
17267 if (TARGET_SSE_SPLIT_REGS)
17268 {
17269 emit_clobber (op0);
17270 zero = op0;
17271 }
17272 else
17273 {
17274 /* ??? Not sure about the best option for the Intel chips.
17275 The following would seem to satisfy; the register is
17276 entirely cleared, breaking the dependency chain. We
17277 then store to the upper half, with a dependency depth
17278 of one. A rumor has it that Intel recommends two movsd
17279 followed by an unpacklpd, but this is unconfirmed. And
17280 given that the dependency depth of the unpacklpd would
17281 still be one, I'm not sure why this would be better. */
17282 zero = CONST0_RTX (V2DFmode);
17283 }
17284
17285 m = adjust_address (op1, DFmode, 0);
17286 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17287 m = adjust_address (op1, DFmode, 8);
17288 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17289 }
17290 else
17291 {
17292 rtx t;
17293
17294 if (TARGET_AVX
17295 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17296 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17297 || optimize_insn_for_size_p ())
17298 {
17299 if (GET_MODE (op0) != V4SFmode)
17300 {
17301 orig_op0 = op0;
17302 op0 = gen_reg_rtx (V4SFmode);
17303 }
17304 op1 = gen_lowpart (V4SFmode, op1);
17305 emit_insn (gen_sse_loadups (op0, op1));
17306 if (orig_op0)
17307 emit_move_insn (orig_op0,
17308 gen_lowpart (GET_MODE (orig_op0), op0));
17309 return;
17310 }
17311
17312 if (mode != V4SFmode)
17313 t = gen_reg_rtx (V4SFmode);
17314 else
17315 t = op0;
17316
17317 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17318 emit_move_insn (t, CONST0_RTX (V4SFmode));
17319 else
17320 emit_clobber (t);
17321
17322 m = adjust_address (op1, V2SFmode, 0);
17323 emit_insn (gen_sse_loadlps (t, t, m));
17324 m = adjust_address (op1, V2SFmode, 8);
17325 emit_insn (gen_sse_loadhps (t, t, m));
17326 if (mode != V4SFmode)
17327 emit_move_insn (op0, gen_lowpart (mode, t));
17328 }
17329 }
17330 else if (MEM_P (op0))
17331 {
17332 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17333 {
17334 op0 = gen_lowpart (V16QImode, op0);
17335 op1 = gen_lowpart (V16QImode, op1);
17336 /* We will eventually emit movups based on insn attributes. */
17337 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17338 }
17339 else if (TARGET_SSE2 && mode == V2DFmode)
17340 {
17341 if (TARGET_AVX
17342 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17343 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17344 || optimize_insn_for_size_p ())
17345 /* We will eventually emit movups based on insn attributes. */
17346 emit_insn (gen_sse2_storeupd (op0, op1));
17347 else
17348 {
17349 m = adjust_address (op0, DFmode, 0);
17350 emit_insn (gen_sse2_storelpd (m, op1));
17351 m = adjust_address (op0, DFmode, 8);
17352 emit_insn (gen_sse2_storehpd (m, op1));
17353 }
17354 }
17355 else
17356 {
17357 if (mode != V4SFmode)
17358 op1 = gen_lowpart (V4SFmode, op1);
17359
17360 if (TARGET_AVX
17361 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17362 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17363 || optimize_insn_for_size_p ())
17364 {
17365 op0 = gen_lowpart (V4SFmode, op0);
17366 emit_insn (gen_sse_storeups (op0, op1));
17367 }
17368 else
17369 {
17370 m = adjust_address (op0, V2SFmode, 0);
17371 emit_insn (gen_sse_storelps (m, op1));
17372 m = adjust_address (op0, V2SFmode, 8);
17373 emit_insn (gen_sse_storehps (m, op1));
17374 }
17375 }
17376 }
17377 else
17378 gcc_unreachable ();
17379 }
17380
17381 /* Helper function of ix86_fixup_binary_operands to canonicalize
17382 operand order. Returns true if the operands should be swapped. */
17383
17384 static bool
17385 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17386 rtx operands[])
17387 {
17388 rtx dst = operands[0];
17389 rtx src1 = operands[1];
17390 rtx src2 = operands[2];
17391
17392 /* If the operation is not commutative, we can't do anything. */
17393 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17394 return false;
17395
17396 /* Highest priority is that src1 should match dst. */
17397 if (rtx_equal_p (dst, src1))
17398 return false;
17399 if (rtx_equal_p (dst, src2))
17400 return true;
17401
17402 /* Next highest priority is that immediate constants come second. */
17403 if (immediate_operand (src2, mode))
17404 return false;
17405 if (immediate_operand (src1, mode))
17406 return true;
17407
17408 /* Lowest priority is that memory references should come second. */
17409 if (MEM_P (src2))
17410 return false;
17411 if (MEM_P (src1))
17412 return true;
17413
17414 return false;
17415 }
17416
17417
17418 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17419 destination to use for the operation. If different from the true
17420 destination in operands[0], a copy operation will be required. */
17421
17422 rtx
17423 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17424 rtx operands[])
17425 {
17426 rtx dst = operands[0];
17427 rtx src1 = operands[1];
17428 rtx src2 = operands[2];
17429
17430 /* Canonicalize operand order. */
17431 if (ix86_swap_binary_operands_p (code, mode, operands))
17432 {
17433 rtx temp;
17434
17435 /* It is invalid to swap operands of different modes. */
17436 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17437
17438 temp = src1;
17439 src1 = src2;
17440 src2 = temp;
17441 }
17442
17443 /* Both source operands cannot be in memory. */
17444 if (MEM_P (src1) && MEM_P (src2))
17445 {
17446 /* Optimization: Only read from memory once. */
17447 if (rtx_equal_p (src1, src2))
17448 {
17449 src2 = force_reg (mode, src2);
17450 src1 = src2;
17451 }
17452 else if (rtx_equal_p (dst, src1))
17453 src2 = force_reg (mode, src2);
17454 else
17455 src1 = force_reg (mode, src1);
17456 }
17457
17458 /* If the destination is memory, and we do not have matching source
17459 operands, do things in registers. */
17460 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17461 dst = gen_reg_rtx (mode);
17462
17463 /* Source 1 cannot be a constant. */
17464 if (CONSTANT_P (src1))
17465 src1 = force_reg (mode, src1);
17466
17467 /* Source 1 cannot be a non-matching memory. */
17468 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17469 src1 = force_reg (mode, src1);
17470
17471 /* Improve address combine. */
17472 if (code == PLUS
17473 && GET_MODE_CLASS (mode) == MODE_INT
17474 && MEM_P (src2))
17475 src2 = force_reg (mode, src2);
17476
17477 operands[1] = src1;
17478 operands[2] = src2;
17479 return dst;
17480 }
17481
17482 /* Similarly, but assume that the destination has already been
17483 set up properly. */
17484
17485 void
17486 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17487 enum machine_mode mode, rtx operands[])
17488 {
17489 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17490 gcc_assert (dst == operands[0]);
17491 }
17492
17493 /* Attempt to expand a binary operator. Make the expansion closer to the
17494 actual machine, then just general_operand, which will allow 3 separate
17495 memory references (one output, two input) in a single insn. */
17496
17497 void
17498 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17499 rtx operands[])
17500 {
17501 rtx src1, src2, dst, op, clob;
17502
17503 dst = ix86_fixup_binary_operands (code, mode, operands);
17504 src1 = operands[1];
17505 src2 = operands[2];
17506
17507 /* Emit the instruction. */
17508
17509 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17510 if (reload_in_progress)
17511 {
17512 /* Reload doesn't know about the flags register, and doesn't know that
17513 it doesn't want to clobber it. We can only do this with PLUS. */
17514 gcc_assert (code == PLUS);
17515 emit_insn (op);
17516 }
17517 else if (reload_completed
17518 && code == PLUS
17519 && !rtx_equal_p (dst, src1))
17520 {
17521 /* This is going to be an LEA; avoid splitting it later. */
17522 emit_insn (op);
17523 }
17524 else
17525 {
17526 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17527 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17528 }
17529
17530 /* Fix up the destination if needed. */
17531 if (dst != operands[0])
17532 emit_move_insn (operands[0], dst);
17533 }
17534
17535 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17536 the given OPERANDS. */
17537
17538 void
17539 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17540 rtx operands[])
17541 {
17542 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17543 if (GET_CODE (operands[1]) == SUBREG)
17544 {
17545 op1 = operands[1];
17546 op2 = operands[2];
17547 }
17548 else if (GET_CODE (operands[2]) == SUBREG)
17549 {
17550 op1 = operands[2];
17551 op2 = operands[1];
17552 }
17553 /* Optimize (__m128i) d | (__m128i) e and similar code
17554 when d and e are float vectors into float vector logical
17555 insn. In C/C++ without using intrinsics there is no other way
17556 to express vector logical operation on float vectors than
17557 to cast them temporarily to integer vectors. */
17558 if (op1
17559 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17560 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17561 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17562 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17563 && SUBREG_BYTE (op1) == 0
17564 && (GET_CODE (op2) == CONST_VECTOR
17565 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17566 && SUBREG_BYTE (op2) == 0))
17567 && can_create_pseudo_p ())
17568 {
17569 rtx dst;
17570 switch (GET_MODE (SUBREG_REG (op1)))
17571 {
17572 case V4SFmode:
17573 case V8SFmode:
17574 case V16SFmode:
17575 case V2DFmode:
17576 case V4DFmode:
17577 case V8DFmode:
17578 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17579 if (GET_CODE (op2) == CONST_VECTOR)
17580 {
17581 op2 = gen_lowpart (GET_MODE (dst), op2);
17582 op2 = force_reg (GET_MODE (dst), op2);
17583 }
17584 else
17585 {
17586 op1 = operands[1];
17587 op2 = SUBREG_REG (operands[2]);
17588 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17589 op2 = force_reg (GET_MODE (dst), op2);
17590 }
17591 op1 = SUBREG_REG (op1);
17592 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17593 op1 = force_reg (GET_MODE (dst), op1);
17594 emit_insn (gen_rtx_SET (VOIDmode, dst,
17595 gen_rtx_fmt_ee (code, GET_MODE (dst),
17596 op1, op2)));
17597 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17598 return;
17599 default:
17600 break;
17601 }
17602 }
17603 if (!nonimmediate_operand (operands[1], mode))
17604 operands[1] = force_reg (mode, operands[1]);
17605 if (!nonimmediate_operand (operands[2], mode))
17606 operands[2] = force_reg (mode, operands[2]);
17607 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17608 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17609 gen_rtx_fmt_ee (code, mode, operands[1],
17610 operands[2])));
17611 }
17612
17613 /* Return TRUE or FALSE depending on whether the binary operator meets the
17614 appropriate constraints. */
17615
17616 bool
17617 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17618 rtx operands[3])
17619 {
17620 rtx dst = operands[0];
17621 rtx src1 = operands[1];
17622 rtx src2 = operands[2];
17623
17624 /* Both source operands cannot be in memory. */
17625 if (MEM_P (src1) && MEM_P (src2))
17626 return false;
17627
17628 /* Canonicalize operand order for commutative operators. */
17629 if (ix86_swap_binary_operands_p (code, mode, operands))
17630 {
17631 rtx temp = src1;
17632 src1 = src2;
17633 src2 = temp;
17634 }
17635
17636 /* If the destination is memory, we must have a matching source operand. */
17637 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17638 return false;
17639
17640 /* Source 1 cannot be a constant. */
17641 if (CONSTANT_P (src1))
17642 return false;
17643
17644 /* Source 1 cannot be a non-matching memory. */
17645 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17646 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17647 return (code == AND
17648 && (mode == HImode
17649 || mode == SImode
17650 || (TARGET_64BIT && mode == DImode))
17651 && satisfies_constraint_L (src2));
17652
17653 return true;
17654 }
17655
17656 /* Attempt to expand a unary operator. Make the expansion closer to the
17657 actual machine, then just general_operand, which will allow 2 separate
17658 memory references (one output, one input) in a single insn. */
17659
17660 void
17661 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17662 rtx operands[])
17663 {
17664 int matching_memory;
17665 rtx src, dst, op, clob;
17666
17667 dst = operands[0];
17668 src = operands[1];
17669
17670 /* If the destination is memory, and we do not have matching source
17671 operands, do things in registers. */
17672 matching_memory = 0;
17673 if (MEM_P (dst))
17674 {
17675 if (rtx_equal_p (dst, src))
17676 matching_memory = 1;
17677 else
17678 dst = gen_reg_rtx (mode);
17679 }
17680
17681 /* When source operand is memory, destination must match. */
17682 if (MEM_P (src) && !matching_memory)
17683 src = force_reg (mode, src);
17684
17685 /* Emit the instruction. */
17686
17687 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17688 if (reload_in_progress || code == NOT)
17689 {
17690 /* Reload doesn't know about the flags register, and doesn't know that
17691 it doesn't want to clobber it. */
17692 gcc_assert (code == NOT);
17693 emit_insn (op);
17694 }
17695 else
17696 {
17697 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17698 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17699 }
17700
17701 /* Fix up the destination if needed. */
17702 if (dst != operands[0])
17703 emit_move_insn (operands[0], dst);
17704 }
17705
17706 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17707 divisor are within the range [0-255]. */
17708
17709 void
17710 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17711 bool signed_p)
17712 {
17713 rtx_code_label *end_label, *qimode_label;
17714 rtx insn, div, mod;
17715 rtx scratch, tmp0, tmp1, tmp2;
17716 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17717 rtx (*gen_zero_extend) (rtx, rtx);
17718 rtx (*gen_test_ccno_1) (rtx, rtx);
17719
17720 switch (mode)
17721 {
17722 case SImode:
17723 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17724 gen_test_ccno_1 = gen_testsi_ccno_1;
17725 gen_zero_extend = gen_zero_extendqisi2;
17726 break;
17727 case DImode:
17728 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17729 gen_test_ccno_1 = gen_testdi_ccno_1;
17730 gen_zero_extend = gen_zero_extendqidi2;
17731 break;
17732 default:
17733 gcc_unreachable ();
17734 }
17735
17736 end_label = gen_label_rtx ();
17737 qimode_label = gen_label_rtx ();
17738
17739 scratch = gen_reg_rtx (mode);
17740
17741 /* Use 8bit unsigned divimod if dividend and divisor are within
17742 the range [0-255]. */
17743 emit_move_insn (scratch, operands[2]);
17744 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17745 scratch, 1, OPTAB_DIRECT);
17746 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17747 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17748 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17749 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17750 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17751 pc_rtx);
17752 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17753 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17754 JUMP_LABEL (insn) = qimode_label;
17755
17756 /* Generate original signed/unsigned divimod. */
17757 div = gen_divmod4_1 (operands[0], operands[1],
17758 operands[2], operands[3]);
17759 emit_insn (div);
17760
17761 /* Branch to the end. */
17762 emit_jump_insn (gen_jump (end_label));
17763 emit_barrier ();
17764
17765 /* Generate 8bit unsigned divide. */
17766 emit_label (qimode_label);
17767 /* Don't use operands[0] for result of 8bit divide since not all
17768 registers support QImode ZERO_EXTRACT. */
17769 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17770 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17771 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17772 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17773
17774 if (signed_p)
17775 {
17776 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17777 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17778 }
17779 else
17780 {
17781 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17782 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17783 }
17784
17785 /* Extract remainder from AH. */
17786 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17787 if (REG_P (operands[1]))
17788 insn = emit_move_insn (operands[1], tmp1);
17789 else
17790 {
17791 /* Need a new scratch register since the old one has result
17792 of 8bit divide. */
17793 scratch = gen_reg_rtx (mode);
17794 emit_move_insn (scratch, tmp1);
17795 insn = emit_move_insn (operands[1], scratch);
17796 }
17797 set_unique_reg_note (insn, REG_EQUAL, mod);
17798
17799 /* Zero extend quotient from AL. */
17800 tmp1 = gen_lowpart (QImode, tmp0);
17801 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17802 set_unique_reg_note (insn, REG_EQUAL, div);
17803
17804 emit_label (end_label);
17805 }
17806
17807 #define LEA_MAX_STALL (3)
17808 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17809
17810 /* Increase given DISTANCE in half-cycles according to
17811 dependencies between PREV and NEXT instructions.
17812 Add 1 half-cycle if there is no dependency and
17813 go to next cycle if there is some dependecy. */
17814
17815 static unsigned int
17816 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17817 {
17818 df_ref def, use;
17819
17820 if (!prev || !next)
17821 return distance + (distance & 1) + 2;
17822
17823 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17824 return distance + 1;
17825
17826 FOR_EACH_INSN_USE (use, next)
17827 FOR_EACH_INSN_DEF (def, prev)
17828 if (!DF_REF_IS_ARTIFICIAL (def)
17829 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17830 return distance + (distance & 1) + 2;
17831
17832 return distance + 1;
17833 }
17834
17835 /* Function checks if instruction INSN defines register number
17836 REGNO1 or REGNO2. */
17837
17838 static bool
17839 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17840 rtx insn)
17841 {
17842 df_ref def;
17843
17844 FOR_EACH_INSN_DEF (def, insn)
17845 if (DF_REF_REG_DEF_P (def)
17846 && !DF_REF_IS_ARTIFICIAL (def)
17847 && (regno1 == DF_REF_REGNO (def)
17848 || regno2 == DF_REF_REGNO (def)))
17849 return true;
17850
17851 return false;
17852 }
17853
17854 /* Function checks if instruction INSN uses register number
17855 REGNO as a part of address expression. */
17856
17857 static bool
17858 insn_uses_reg_mem (unsigned int regno, rtx insn)
17859 {
17860 df_ref use;
17861
17862 FOR_EACH_INSN_USE (use, insn)
17863 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17864 return true;
17865
17866 return false;
17867 }
17868
17869 /* Search backward for non-agu definition of register number REGNO1
17870 or register number REGNO2 in basic block starting from instruction
17871 START up to head of basic block or instruction INSN.
17872
17873 Function puts true value into *FOUND var if definition was found
17874 and false otherwise.
17875
17876 Distance in half-cycles between START and found instruction or head
17877 of BB is added to DISTANCE and returned. */
17878
17879 static int
17880 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17881 rtx_insn *insn, int distance,
17882 rtx_insn *start, bool *found)
17883 {
17884 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17885 rtx_insn *prev = start;
17886 rtx_insn *next = NULL;
17887
17888 *found = false;
17889
17890 while (prev
17891 && prev != insn
17892 && distance < LEA_SEARCH_THRESHOLD)
17893 {
17894 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17895 {
17896 distance = increase_distance (prev, next, distance);
17897 if (insn_defines_reg (regno1, regno2, prev))
17898 {
17899 if (recog_memoized (prev) < 0
17900 || get_attr_type (prev) != TYPE_LEA)
17901 {
17902 *found = true;
17903 return distance;
17904 }
17905 }
17906
17907 next = prev;
17908 }
17909 if (prev == BB_HEAD (bb))
17910 break;
17911
17912 prev = PREV_INSN (prev);
17913 }
17914
17915 return distance;
17916 }
17917
17918 /* Search backward for non-agu definition of register number REGNO1
17919 or register number REGNO2 in INSN's basic block until
17920 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17921 2. Reach neighbour BBs boundary, or
17922 3. Reach agu definition.
17923 Returns the distance between the non-agu definition point and INSN.
17924 If no definition point, returns -1. */
17925
17926 static int
17927 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17928 rtx_insn *insn)
17929 {
17930 basic_block bb = BLOCK_FOR_INSN (insn);
17931 int distance = 0;
17932 bool found = false;
17933
17934 if (insn != BB_HEAD (bb))
17935 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17936 distance, PREV_INSN (insn),
17937 &found);
17938
17939 if (!found && distance < LEA_SEARCH_THRESHOLD)
17940 {
17941 edge e;
17942 edge_iterator ei;
17943 bool simple_loop = false;
17944
17945 FOR_EACH_EDGE (e, ei, bb->preds)
17946 if (e->src == bb)
17947 {
17948 simple_loop = true;
17949 break;
17950 }
17951
17952 if (simple_loop)
17953 distance = distance_non_agu_define_in_bb (regno1, regno2,
17954 insn, distance,
17955 BB_END (bb), &found);
17956 else
17957 {
17958 int shortest_dist = -1;
17959 bool found_in_bb = false;
17960
17961 FOR_EACH_EDGE (e, ei, bb->preds)
17962 {
17963 int bb_dist
17964 = distance_non_agu_define_in_bb (regno1, regno2,
17965 insn, distance,
17966 BB_END (e->src),
17967 &found_in_bb);
17968 if (found_in_bb)
17969 {
17970 if (shortest_dist < 0)
17971 shortest_dist = bb_dist;
17972 else if (bb_dist > 0)
17973 shortest_dist = MIN (bb_dist, shortest_dist);
17974
17975 found = true;
17976 }
17977 }
17978
17979 distance = shortest_dist;
17980 }
17981 }
17982
17983 /* get_attr_type may modify recog data. We want to make sure
17984 that recog data is valid for instruction INSN, on which
17985 distance_non_agu_define is called. INSN is unchanged here. */
17986 extract_insn_cached (insn);
17987
17988 if (!found)
17989 return -1;
17990
17991 return distance >> 1;
17992 }
17993
17994 /* Return the distance in half-cycles between INSN and the next
17995 insn that uses register number REGNO in memory address added
17996 to DISTANCE. Return -1 if REGNO0 is set.
17997
17998 Put true value into *FOUND if register usage was found and
17999 false otherwise.
18000 Put true value into *REDEFINED if register redefinition was
18001 found and false otherwise. */
18002
18003 static int
18004 distance_agu_use_in_bb (unsigned int regno,
18005 rtx_insn *insn, int distance, rtx_insn *start,
18006 bool *found, bool *redefined)
18007 {
18008 basic_block bb = NULL;
18009 rtx_insn *next = start;
18010 rtx_insn *prev = NULL;
18011
18012 *found = false;
18013 *redefined = false;
18014
18015 if (start != NULL_RTX)
18016 {
18017 bb = BLOCK_FOR_INSN (start);
18018 if (start != BB_HEAD (bb))
18019 /* If insn and start belong to the same bb, set prev to insn,
18020 so the call to increase_distance will increase the distance
18021 between insns by 1. */
18022 prev = insn;
18023 }
18024
18025 while (next
18026 && next != insn
18027 && distance < LEA_SEARCH_THRESHOLD)
18028 {
18029 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18030 {
18031 distance = increase_distance(prev, next, distance);
18032 if (insn_uses_reg_mem (regno, next))
18033 {
18034 /* Return DISTANCE if OP0 is used in memory
18035 address in NEXT. */
18036 *found = true;
18037 return distance;
18038 }
18039
18040 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18041 {
18042 /* Return -1 if OP0 is set in NEXT. */
18043 *redefined = true;
18044 return -1;
18045 }
18046
18047 prev = next;
18048 }
18049
18050 if (next == BB_END (bb))
18051 break;
18052
18053 next = NEXT_INSN (next);
18054 }
18055
18056 return distance;
18057 }
18058
18059 /* Return the distance between INSN and the next insn that uses
18060 register number REGNO0 in memory address. Return -1 if no such
18061 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18062
18063 static int
18064 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18065 {
18066 basic_block bb = BLOCK_FOR_INSN (insn);
18067 int distance = 0;
18068 bool found = false;
18069 bool redefined = false;
18070
18071 if (insn != BB_END (bb))
18072 distance = distance_agu_use_in_bb (regno0, insn, distance,
18073 NEXT_INSN (insn),
18074 &found, &redefined);
18075
18076 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18077 {
18078 edge e;
18079 edge_iterator ei;
18080 bool simple_loop = false;
18081
18082 FOR_EACH_EDGE (e, ei, bb->succs)
18083 if (e->dest == bb)
18084 {
18085 simple_loop = true;
18086 break;
18087 }
18088
18089 if (simple_loop)
18090 distance = distance_agu_use_in_bb (regno0, insn,
18091 distance, BB_HEAD (bb),
18092 &found, &redefined);
18093 else
18094 {
18095 int shortest_dist = -1;
18096 bool found_in_bb = false;
18097 bool redefined_in_bb = false;
18098
18099 FOR_EACH_EDGE (e, ei, bb->succs)
18100 {
18101 int bb_dist
18102 = distance_agu_use_in_bb (regno0, insn,
18103 distance, BB_HEAD (e->dest),
18104 &found_in_bb, &redefined_in_bb);
18105 if (found_in_bb)
18106 {
18107 if (shortest_dist < 0)
18108 shortest_dist = bb_dist;
18109 else if (bb_dist > 0)
18110 shortest_dist = MIN (bb_dist, shortest_dist);
18111
18112 found = true;
18113 }
18114 }
18115
18116 distance = shortest_dist;
18117 }
18118 }
18119
18120 if (!found || redefined)
18121 return -1;
18122
18123 return distance >> 1;
18124 }
18125
18126 /* Define this macro to tune LEA priority vs ADD, it take effect when
18127 there is a dilemma of choicing LEA or ADD
18128 Negative value: ADD is more preferred than LEA
18129 Zero: Netrual
18130 Positive value: LEA is more preferred than ADD*/
18131 #define IX86_LEA_PRIORITY 0
18132
18133 /* Return true if usage of lea INSN has performance advantage
18134 over a sequence of instructions. Instructions sequence has
18135 SPLIT_COST cycles higher latency than lea latency. */
18136
18137 static bool
18138 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18139 unsigned int regno2, int split_cost, bool has_scale)
18140 {
18141 int dist_define, dist_use;
18142
18143 /* For Silvermont if using a 2-source or 3-source LEA for
18144 non-destructive destination purposes, or due to wanting
18145 ability to use SCALE, the use of LEA is justified. */
18146 if (TARGET_SILVERMONT || TARGET_INTEL)
18147 {
18148 if (has_scale)
18149 return true;
18150 if (split_cost < 1)
18151 return false;
18152 if (regno0 == regno1 || regno0 == regno2)
18153 return false;
18154 return true;
18155 }
18156
18157 dist_define = distance_non_agu_define (regno1, regno2, insn);
18158 dist_use = distance_agu_use (regno0, insn);
18159
18160 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18161 {
18162 /* If there is no non AGU operand definition, no AGU
18163 operand usage and split cost is 0 then both lea
18164 and non lea variants have same priority. Currently
18165 we prefer lea for 64 bit code and non lea on 32 bit
18166 code. */
18167 if (dist_use < 0 && split_cost == 0)
18168 return TARGET_64BIT || IX86_LEA_PRIORITY;
18169 else
18170 return true;
18171 }
18172
18173 /* With longer definitions distance lea is more preferable.
18174 Here we change it to take into account splitting cost and
18175 lea priority. */
18176 dist_define += split_cost + IX86_LEA_PRIORITY;
18177
18178 /* If there is no use in memory addess then we just check
18179 that split cost exceeds AGU stall. */
18180 if (dist_use < 0)
18181 return dist_define > LEA_MAX_STALL;
18182
18183 /* If this insn has both backward non-agu dependence and forward
18184 agu dependence, the one with short distance takes effect. */
18185 return dist_define >= dist_use;
18186 }
18187
18188 /* Return true if it is legal to clobber flags by INSN and
18189 false otherwise. */
18190
18191 static bool
18192 ix86_ok_to_clobber_flags (rtx_insn *insn)
18193 {
18194 basic_block bb = BLOCK_FOR_INSN (insn);
18195 df_ref use;
18196 bitmap live;
18197
18198 while (insn)
18199 {
18200 if (NONDEBUG_INSN_P (insn))
18201 {
18202 FOR_EACH_INSN_USE (use, insn)
18203 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18204 return false;
18205
18206 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18207 return true;
18208 }
18209
18210 if (insn == BB_END (bb))
18211 break;
18212
18213 insn = NEXT_INSN (insn);
18214 }
18215
18216 live = df_get_live_out(bb);
18217 return !REGNO_REG_SET_P (live, FLAGS_REG);
18218 }
18219
18220 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18221 move and add to avoid AGU stalls. */
18222
18223 bool
18224 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18225 {
18226 unsigned int regno0, regno1, regno2;
18227
18228 /* Check if we need to optimize. */
18229 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18230 return false;
18231
18232 /* Check it is correct to split here. */
18233 if (!ix86_ok_to_clobber_flags(insn))
18234 return false;
18235
18236 regno0 = true_regnum (operands[0]);
18237 regno1 = true_regnum (operands[1]);
18238 regno2 = true_regnum (operands[2]);
18239
18240 /* We need to split only adds with non destructive
18241 destination operand. */
18242 if (regno0 == regno1 || regno0 == regno2)
18243 return false;
18244 else
18245 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18246 }
18247
18248 /* Return true if we should emit lea instruction instead of mov
18249 instruction. */
18250
18251 bool
18252 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18253 {
18254 unsigned int regno0, regno1;
18255
18256 /* Check if we need to optimize. */
18257 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18258 return false;
18259
18260 /* Use lea for reg to reg moves only. */
18261 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18262 return false;
18263
18264 regno0 = true_regnum (operands[0]);
18265 regno1 = true_regnum (operands[1]);
18266
18267 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18268 }
18269
18270 /* Return true if we need to split lea into a sequence of
18271 instructions to avoid AGU stalls. */
18272
18273 bool
18274 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18275 {
18276 unsigned int regno0, regno1, regno2;
18277 int split_cost;
18278 struct ix86_address parts;
18279 int ok;
18280
18281 /* Check we need to optimize. */
18282 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18283 return false;
18284
18285 /* The "at least two components" test below might not catch simple
18286 move or zero extension insns if parts.base is non-NULL and parts.disp
18287 is const0_rtx as the only components in the address, e.g. if the
18288 register is %rbp or %r13. As this test is much cheaper and moves or
18289 zero extensions are the common case, do this check first. */
18290 if (REG_P (operands[1])
18291 || (SImode_address_operand (operands[1], VOIDmode)
18292 && REG_P (XEXP (operands[1], 0))))
18293 return false;
18294
18295 /* Check if it is OK to split here. */
18296 if (!ix86_ok_to_clobber_flags (insn))
18297 return false;
18298
18299 ok = ix86_decompose_address (operands[1], &parts);
18300 gcc_assert (ok);
18301
18302 /* There should be at least two components in the address. */
18303 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18304 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18305 return false;
18306
18307 /* We should not split into add if non legitimate pic
18308 operand is used as displacement. */
18309 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18310 return false;
18311
18312 regno0 = true_regnum (operands[0]) ;
18313 regno1 = INVALID_REGNUM;
18314 regno2 = INVALID_REGNUM;
18315
18316 if (parts.base)
18317 regno1 = true_regnum (parts.base);
18318 if (parts.index)
18319 regno2 = true_regnum (parts.index);
18320
18321 split_cost = 0;
18322
18323 /* Compute how many cycles we will add to execution time
18324 if split lea into a sequence of instructions. */
18325 if (parts.base || parts.index)
18326 {
18327 /* Have to use mov instruction if non desctructive
18328 destination form is used. */
18329 if (regno1 != regno0 && regno2 != regno0)
18330 split_cost += 1;
18331
18332 /* Have to add index to base if both exist. */
18333 if (parts.base && parts.index)
18334 split_cost += 1;
18335
18336 /* Have to use shift and adds if scale is 2 or greater. */
18337 if (parts.scale > 1)
18338 {
18339 if (regno0 != regno1)
18340 split_cost += 1;
18341 else if (regno2 == regno0)
18342 split_cost += 4;
18343 else
18344 split_cost += parts.scale;
18345 }
18346
18347 /* Have to use add instruction with immediate if
18348 disp is non zero. */
18349 if (parts.disp && parts.disp != const0_rtx)
18350 split_cost += 1;
18351
18352 /* Subtract the price of lea. */
18353 split_cost -= 1;
18354 }
18355
18356 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18357 parts.scale > 1);
18358 }
18359
18360 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18361 matches destination. RTX includes clobber of FLAGS_REG. */
18362
18363 static void
18364 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18365 rtx dst, rtx src)
18366 {
18367 rtx op, clob;
18368
18369 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18370 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18371
18372 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18373 }
18374
18375 /* Return true if regno1 def is nearest to the insn. */
18376
18377 static bool
18378 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18379 {
18380 rtx_insn *prev = insn;
18381 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18382
18383 if (insn == start)
18384 return false;
18385 while (prev && prev != start)
18386 {
18387 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18388 {
18389 prev = PREV_INSN (prev);
18390 continue;
18391 }
18392 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18393 return true;
18394 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18395 return false;
18396 prev = PREV_INSN (prev);
18397 }
18398
18399 /* None of the regs is defined in the bb. */
18400 return false;
18401 }
18402
18403 /* Split lea instructions into a sequence of instructions
18404 which are executed on ALU to avoid AGU stalls.
18405 It is assumed that it is allowed to clobber flags register
18406 at lea position. */
18407
18408 void
18409 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18410 {
18411 unsigned int regno0, regno1, regno2;
18412 struct ix86_address parts;
18413 rtx target, tmp;
18414 int ok, adds;
18415
18416 ok = ix86_decompose_address (operands[1], &parts);
18417 gcc_assert (ok);
18418
18419 target = gen_lowpart (mode, operands[0]);
18420
18421 regno0 = true_regnum (target);
18422 regno1 = INVALID_REGNUM;
18423 regno2 = INVALID_REGNUM;
18424
18425 if (parts.base)
18426 {
18427 parts.base = gen_lowpart (mode, parts.base);
18428 regno1 = true_regnum (parts.base);
18429 }
18430
18431 if (parts.index)
18432 {
18433 parts.index = gen_lowpart (mode, parts.index);
18434 regno2 = true_regnum (parts.index);
18435 }
18436
18437 if (parts.disp)
18438 parts.disp = gen_lowpart (mode, parts.disp);
18439
18440 if (parts.scale > 1)
18441 {
18442 /* Case r1 = r1 + ... */
18443 if (regno1 == regno0)
18444 {
18445 /* If we have a case r1 = r1 + C * r2 then we
18446 should use multiplication which is very
18447 expensive. Assume cost model is wrong if we
18448 have such case here. */
18449 gcc_assert (regno2 != regno0);
18450
18451 for (adds = parts.scale; adds > 0; adds--)
18452 ix86_emit_binop (PLUS, mode, target, parts.index);
18453 }
18454 else
18455 {
18456 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18457 if (regno0 != regno2)
18458 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18459
18460 /* Use shift for scaling. */
18461 ix86_emit_binop (ASHIFT, mode, target,
18462 GEN_INT (exact_log2 (parts.scale)));
18463
18464 if (parts.base)
18465 ix86_emit_binop (PLUS, mode, target, parts.base);
18466
18467 if (parts.disp && parts.disp != const0_rtx)
18468 ix86_emit_binop (PLUS, mode, target, parts.disp);
18469 }
18470 }
18471 else if (!parts.base && !parts.index)
18472 {
18473 gcc_assert(parts.disp);
18474 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18475 }
18476 else
18477 {
18478 if (!parts.base)
18479 {
18480 if (regno0 != regno2)
18481 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18482 }
18483 else if (!parts.index)
18484 {
18485 if (regno0 != regno1)
18486 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18487 }
18488 else
18489 {
18490 if (regno0 == regno1)
18491 tmp = parts.index;
18492 else if (regno0 == regno2)
18493 tmp = parts.base;
18494 else
18495 {
18496 rtx tmp1;
18497
18498 /* Find better operand for SET instruction, depending
18499 on which definition is farther from the insn. */
18500 if (find_nearest_reg_def (insn, regno1, regno2))
18501 tmp = parts.index, tmp1 = parts.base;
18502 else
18503 tmp = parts.base, tmp1 = parts.index;
18504
18505 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18506
18507 if (parts.disp && parts.disp != const0_rtx)
18508 ix86_emit_binop (PLUS, mode, target, parts.disp);
18509
18510 ix86_emit_binop (PLUS, mode, target, tmp1);
18511 return;
18512 }
18513
18514 ix86_emit_binop (PLUS, mode, target, tmp);
18515 }
18516
18517 if (parts.disp && parts.disp != const0_rtx)
18518 ix86_emit_binop (PLUS, mode, target, parts.disp);
18519 }
18520 }
18521
18522 /* Return true if it is ok to optimize an ADD operation to LEA
18523 operation to avoid flag register consumation. For most processors,
18524 ADD is faster than LEA. For the processors like BONNELL, if the
18525 destination register of LEA holds an actual address which will be
18526 used soon, LEA is better and otherwise ADD is better. */
18527
18528 bool
18529 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18530 {
18531 unsigned int regno0 = true_regnum (operands[0]);
18532 unsigned int regno1 = true_regnum (operands[1]);
18533 unsigned int regno2 = true_regnum (operands[2]);
18534
18535 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18536 if (regno0 != regno1 && regno0 != regno2)
18537 return true;
18538
18539 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18540 return false;
18541
18542 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18543 }
18544
18545 /* Return true if destination reg of SET_BODY is shift count of
18546 USE_BODY. */
18547
18548 static bool
18549 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18550 {
18551 rtx set_dest;
18552 rtx shift_rtx;
18553 int i;
18554
18555 /* Retrieve destination of SET_BODY. */
18556 switch (GET_CODE (set_body))
18557 {
18558 case SET:
18559 set_dest = SET_DEST (set_body);
18560 if (!set_dest || !REG_P (set_dest))
18561 return false;
18562 break;
18563 case PARALLEL:
18564 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18565 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18566 use_body))
18567 return true;
18568 default:
18569 return false;
18570 break;
18571 }
18572
18573 /* Retrieve shift count of USE_BODY. */
18574 switch (GET_CODE (use_body))
18575 {
18576 case SET:
18577 shift_rtx = XEXP (use_body, 1);
18578 break;
18579 case PARALLEL:
18580 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18581 if (ix86_dep_by_shift_count_body (set_body,
18582 XVECEXP (use_body, 0, i)))
18583 return true;
18584 default:
18585 return false;
18586 break;
18587 }
18588
18589 if (shift_rtx
18590 && (GET_CODE (shift_rtx) == ASHIFT
18591 || GET_CODE (shift_rtx) == LSHIFTRT
18592 || GET_CODE (shift_rtx) == ASHIFTRT
18593 || GET_CODE (shift_rtx) == ROTATE
18594 || GET_CODE (shift_rtx) == ROTATERT))
18595 {
18596 rtx shift_count = XEXP (shift_rtx, 1);
18597
18598 /* Return true if shift count is dest of SET_BODY. */
18599 if (REG_P (shift_count))
18600 {
18601 /* Add check since it can be invoked before register
18602 allocation in pre-reload schedule. */
18603 if (reload_completed
18604 && true_regnum (set_dest) == true_regnum (shift_count))
18605 return true;
18606 else if (REGNO(set_dest) == REGNO(shift_count))
18607 return true;
18608 }
18609 }
18610
18611 return false;
18612 }
18613
18614 /* Return true if destination reg of SET_INSN is shift count of
18615 USE_INSN. */
18616
18617 bool
18618 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18619 {
18620 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18621 PATTERN (use_insn));
18622 }
18623
18624 /* Return TRUE or FALSE depending on whether the unary operator meets the
18625 appropriate constraints. */
18626
18627 bool
18628 ix86_unary_operator_ok (enum rtx_code,
18629 enum machine_mode,
18630 rtx operands[2])
18631 {
18632 /* If one of operands is memory, source and destination must match. */
18633 if ((MEM_P (operands[0])
18634 || MEM_P (operands[1]))
18635 && ! rtx_equal_p (operands[0], operands[1]))
18636 return false;
18637 return true;
18638 }
18639
18640 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18641 are ok, keeping in mind the possible movddup alternative. */
18642
18643 bool
18644 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18645 {
18646 if (MEM_P (operands[0]))
18647 return rtx_equal_p (operands[0], operands[1 + high]);
18648 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18649 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18650 return true;
18651 }
18652
18653 /* Post-reload splitter for converting an SF or DFmode value in an
18654 SSE register into an unsigned SImode. */
18655
18656 void
18657 ix86_split_convert_uns_si_sse (rtx operands[])
18658 {
18659 enum machine_mode vecmode;
18660 rtx value, large, zero_or_two31, input, two31, x;
18661
18662 large = operands[1];
18663 zero_or_two31 = operands[2];
18664 input = operands[3];
18665 two31 = operands[4];
18666 vecmode = GET_MODE (large);
18667 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18668
18669 /* Load up the value into the low element. We must ensure that the other
18670 elements are valid floats -- zero is the easiest such value. */
18671 if (MEM_P (input))
18672 {
18673 if (vecmode == V4SFmode)
18674 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18675 else
18676 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18677 }
18678 else
18679 {
18680 input = gen_rtx_REG (vecmode, REGNO (input));
18681 emit_move_insn (value, CONST0_RTX (vecmode));
18682 if (vecmode == V4SFmode)
18683 emit_insn (gen_sse_movss (value, value, input));
18684 else
18685 emit_insn (gen_sse2_movsd (value, value, input));
18686 }
18687
18688 emit_move_insn (large, two31);
18689 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18690
18691 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18692 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18693
18694 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18695 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18696
18697 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18698 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18699
18700 large = gen_rtx_REG (V4SImode, REGNO (large));
18701 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18702
18703 x = gen_rtx_REG (V4SImode, REGNO (value));
18704 if (vecmode == V4SFmode)
18705 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18706 else
18707 emit_insn (gen_sse2_cvttpd2dq (x, value));
18708 value = x;
18709
18710 emit_insn (gen_xorv4si3 (value, value, large));
18711 }
18712
18713 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18714 Expects the 64-bit DImode to be supplied in a pair of integral
18715 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18716 -mfpmath=sse, !optimize_size only. */
18717
18718 void
18719 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18720 {
18721 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18722 rtx int_xmm, fp_xmm;
18723 rtx biases, exponents;
18724 rtx x;
18725
18726 int_xmm = gen_reg_rtx (V4SImode);
18727 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18728 emit_insn (gen_movdi_to_sse (int_xmm, input));
18729 else if (TARGET_SSE_SPLIT_REGS)
18730 {
18731 emit_clobber (int_xmm);
18732 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18733 }
18734 else
18735 {
18736 x = gen_reg_rtx (V2DImode);
18737 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18738 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18739 }
18740
18741 x = gen_rtx_CONST_VECTOR (V4SImode,
18742 gen_rtvec (4, GEN_INT (0x43300000UL),
18743 GEN_INT (0x45300000UL),
18744 const0_rtx, const0_rtx));
18745 exponents = validize_mem (force_const_mem (V4SImode, x));
18746
18747 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18748 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18749
18750 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18751 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18752 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18753 (0x1.0p84 + double(fp_value_hi_xmm)).
18754 Note these exponents differ by 32. */
18755
18756 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18757
18758 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18759 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18760 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18761 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18762 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18763 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18764 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18765 biases = validize_mem (force_const_mem (V2DFmode, biases));
18766 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18767
18768 /* Add the upper and lower DFmode values together. */
18769 if (TARGET_SSE3)
18770 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18771 else
18772 {
18773 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18774 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18775 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18776 }
18777
18778 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18779 }
18780
18781 /* Not used, but eases macroization of patterns. */
18782 void
18783 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18784 {
18785 gcc_unreachable ();
18786 }
18787
18788 /* Convert an unsigned SImode value into a DFmode. Only currently used
18789 for SSE, but applicable anywhere. */
18790
18791 void
18792 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18793 {
18794 REAL_VALUE_TYPE TWO31r;
18795 rtx x, fp;
18796
18797 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18798 NULL, 1, OPTAB_DIRECT);
18799
18800 fp = gen_reg_rtx (DFmode);
18801 emit_insn (gen_floatsidf2 (fp, x));
18802
18803 real_ldexp (&TWO31r, &dconst1, 31);
18804 x = const_double_from_real_value (TWO31r, DFmode);
18805
18806 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18807 if (x != target)
18808 emit_move_insn (target, x);
18809 }
18810
18811 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18812 32-bit mode; otherwise we have a direct convert instruction. */
18813
18814 void
18815 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18816 {
18817 REAL_VALUE_TYPE TWO32r;
18818 rtx fp_lo, fp_hi, x;
18819
18820 fp_lo = gen_reg_rtx (DFmode);
18821 fp_hi = gen_reg_rtx (DFmode);
18822
18823 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18824
18825 real_ldexp (&TWO32r, &dconst1, 32);
18826 x = const_double_from_real_value (TWO32r, DFmode);
18827 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18828
18829 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18830
18831 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18832 0, OPTAB_DIRECT);
18833 if (x != target)
18834 emit_move_insn (target, x);
18835 }
18836
18837 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18838 For x86_32, -mfpmath=sse, !optimize_size only. */
18839 void
18840 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18841 {
18842 REAL_VALUE_TYPE ONE16r;
18843 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18844
18845 real_ldexp (&ONE16r, &dconst1, 16);
18846 x = const_double_from_real_value (ONE16r, SFmode);
18847 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18848 NULL, 0, OPTAB_DIRECT);
18849 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18850 NULL, 0, OPTAB_DIRECT);
18851 fp_hi = gen_reg_rtx (SFmode);
18852 fp_lo = gen_reg_rtx (SFmode);
18853 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18854 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18855 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18856 0, OPTAB_DIRECT);
18857 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18858 0, OPTAB_DIRECT);
18859 if (!rtx_equal_p (target, fp_hi))
18860 emit_move_insn (target, fp_hi);
18861 }
18862
18863 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18864 a vector of unsigned ints VAL to vector of floats TARGET. */
18865
18866 void
18867 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18868 {
18869 rtx tmp[8];
18870 REAL_VALUE_TYPE TWO16r;
18871 enum machine_mode intmode = GET_MODE (val);
18872 enum machine_mode fltmode = GET_MODE (target);
18873 rtx (*cvt) (rtx, rtx);
18874
18875 if (intmode == V4SImode)
18876 cvt = gen_floatv4siv4sf2;
18877 else
18878 cvt = gen_floatv8siv8sf2;
18879 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18880 tmp[0] = force_reg (intmode, tmp[0]);
18881 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18882 OPTAB_DIRECT);
18883 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18884 NULL_RTX, 1, OPTAB_DIRECT);
18885 tmp[3] = gen_reg_rtx (fltmode);
18886 emit_insn (cvt (tmp[3], tmp[1]));
18887 tmp[4] = gen_reg_rtx (fltmode);
18888 emit_insn (cvt (tmp[4], tmp[2]));
18889 real_ldexp (&TWO16r, &dconst1, 16);
18890 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18891 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18892 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18893 OPTAB_DIRECT);
18894 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18895 OPTAB_DIRECT);
18896 if (tmp[7] != target)
18897 emit_move_insn (target, tmp[7]);
18898 }
18899
18900 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18901 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18902 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18903 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18904
18905 rtx
18906 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18907 {
18908 REAL_VALUE_TYPE TWO31r;
18909 rtx two31r, tmp[4];
18910 enum machine_mode mode = GET_MODE (val);
18911 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18912 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18913 rtx (*cmp) (rtx, rtx, rtx, rtx);
18914 int i;
18915
18916 for (i = 0; i < 3; i++)
18917 tmp[i] = gen_reg_rtx (mode);
18918 real_ldexp (&TWO31r, &dconst1, 31);
18919 two31r = const_double_from_real_value (TWO31r, scalarmode);
18920 two31r = ix86_build_const_vector (mode, 1, two31r);
18921 two31r = force_reg (mode, two31r);
18922 switch (mode)
18923 {
18924 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18925 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18926 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18927 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18928 default: gcc_unreachable ();
18929 }
18930 tmp[3] = gen_rtx_LE (mode, two31r, val);
18931 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18932 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18933 0, OPTAB_DIRECT);
18934 if (intmode == V4SImode || TARGET_AVX2)
18935 *xorp = expand_simple_binop (intmode, ASHIFT,
18936 gen_lowpart (intmode, tmp[0]),
18937 GEN_INT (31), NULL_RTX, 0,
18938 OPTAB_DIRECT);
18939 else
18940 {
18941 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18942 two31 = ix86_build_const_vector (intmode, 1, two31);
18943 *xorp = expand_simple_binop (intmode, AND,
18944 gen_lowpart (intmode, tmp[0]),
18945 two31, NULL_RTX, 0,
18946 OPTAB_DIRECT);
18947 }
18948 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18949 0, OPTAB_DIRECT);
18950 }
18951
18952 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18953 then replicate the value for all elements of the vector
18954 register. */
18955
18956 rtx
18957 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18958 {
18959 int i, n_elt;
18960 rtvec v;
18961 enum machine_mode scalar_mode;
18962
18963 switch (mode)
18964 {
18965 case V64QImode:
18966 case V32QImode:
18967 case V16QImode:
18968 case V32HImode:
18969 case V16HImode:
18970 case V8HImode:
18971 case V16SImode:
18972 case V8SImode:
18973 case V4SImode:
18974 case V8DImode:
18975 case V4DImode:
18976 case V2DImode:
18977 gcc_assert (vect);
18978 case V16SFmode:
18979 case V8SFmode:
18980 case V4SFmode:
18981 case V8DFmode:
18982 case V4DFmode:
18983 case V2DFmode:
18984 n_elt = GET_MODE_NUNITS (mode);
18985 v = rtvec_alloc (n_elt);
18986 scalar_mode = GET_MODE_INNER (mode);
18987
18988 RTVEC_ELT (v, 0) = value;
18989
18990 for (i = 1; i < n_elt; ++i)
18991 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18992
18993 return gen_rtx_CONST_VECTOR (mode, v);
18994
18995 default:
18996 gcc_unreachable ();
18997 }
18998 }
18999
19000 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19001 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
19002 for an SSE register. If VECT is true, then replicate the mask for
19003 all elements of the vector register. If INVERT is true, then create
19004 a mask excluding the sign bit. */
19005
19006 rtx
19007 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
19008 {
19009 enum machine_mode vec_mode, imode;
19010 HOST_WIDE_INT hi, lo;
19011 int shift = 63;
19012 rtx v;
19013 rtx mask;
19014
19015 /* Find the sign bit, sign extended to 2*HWI. */
19016 switch (mode)
19017 {
19018 case V16SImode:
19019 case V16SFmode:
19020 case V8SImode:
19021 case V4SImode:
19022 case V8SFmode:
19023 case V4SFmode:
19024 vec_mode = mode;
19025 mode = GET_MODE_INNER (mode);
19026 imode = SImode;
19027 lo = 0x80000000, hi = lo < 0;
19028 break;
19029
19030 case V8DImode:
19031 case V4DImode:
19032 case V2DImode:
19033 case V8DFmode:
19034 case V4DFmode:
19035 case V2DFmode:
19036 vec_mode = mode;
19037 mode = GET_MODE_INNER (mode);
19038 imode = DImode;
19039 if (HOST_BITS_PER_WIDE_INT >= 64)
19040 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19041 else
19042 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19043 break;
19044
19045 case TImode:
19046 case TFmode:
19047 vec_mode = VOIDmode;
19048 if (HOST_BITS_PER_WIDE_INT >= 64)
19049 {
19050 imode = TImode;
19051 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19052 }
19053 else
19054 {
19055 rtvec vec;
19056
19057 imode = DImode;
19058 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19059
19060 if (invert)
19061 {
19062 lo = ~lo, hi = ~hi;
19063 v = constm1_rtx;
19064 }
19065 else
19066 v = const0_rtx;
19067
19068 mask = immed_double_const (lo, hi, imode);
19069
19070 vec = gen_rtvec (2, v, mask);
19071 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19072 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19073
19074 return v;
19075 }
19076 break;
19077
19078 default:
19079 gcc_unreachable ();
19080 }
19081
19082 if (invert)
19083 lo = ~lo, hi = ~hi;
19084
19085 /* Force this value into the low part of a fp vector constant. */
19086 mask = immed_double_const (lo, hi, imode);
19087 mask = gen_lowpart (mode, mask);
19088
19089 if (vec_mode == VOIDmode)
19090 return force_reg (mode, mask);
19091
19092 v = ix86_build_const_vector (vec_mode, vect, mask);
19093 return force_reg (vec_mode, v);
19094 }
19095
19096 /* Generate code for floating point ABS or NEG. */
19097
19098 void
19099 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19100 rtx operands[])
19101 {
19102 rtx mask, set, dst, src;
19103 bool use_sse = false;
19104 bool vector_mode = VECTOR_MODE_P (mode);
19105 enum machine_mode vmode = mode;
19106
19107 if (vector_mode)
19108 use_sse = true;
19109 else if (mode == TFmode)
19110 use_sse = true;
19111 else if (TARGET_SSE_MATH)
19112 {
19113 use_sse = SSE_FLOAT_MODE_P (mode);
19114 if (mode == SFmode)
19115 vmode = V4SFmode;
19116 else if (mode == DFmode)
19117 vmode = V2DFmode;
19118 }
19119
19120 /* NEG and ABS performed with SSE use bitwise mask operations.
19121 Create the appropriate mask now. */
19122 if (use_sse)
19123 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19124 else
19125 mask = NULL_RTX;
19126
19127 dst = operands[0];
19128 src = operands[1];
19129
19130 set = gen_rtx_fmt_e (code, mode, src);
19131 set = gen_rtx_SET (VOIDmode, dst, set);
19132
19133 if (mask)
19134 {
19135 rtx use, clob;
19136 rtvec par;
19137
19138 use = gen_rtx_USE (VOIDmode, mask);
19139 if (vector_mode)
19140 par = gen_rtvec (2, set, use);
19141 else
19142 {
19143 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19144 par = gen_rtvec (3, set, use, clob);
19145 }
19146 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19147 }
19148 else
19149 emit_insn (set);
19150 }
19151
19152 /* Expand a copysign operation. Special case operand 0 being a constant. */
19153
19154 void
19155 ix86_expand_copysign (rtx operands[])
19156 {
19157 enum machine_mode mode, vmode;
19158 rtx dest, op0, op1, mask, nmask;
19159
19160 dest = operands[0];
19161 op0 = operands[1];
19162 op1 = operands[2];
19163
19164 mode = GET_MODE (dest);
19165
19166 if (mode == SFmode)
19167 vmode = V4SFmode;
19168 else if (mode == DFmode)
19169 vmode = V2DFmode;
19170 else
19171 vmode = mode;
19172
19173 if (GET_CODE (op0) == CONST_DOUBLE)
19174 {
19175 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19176
19177 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19178 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19179
19180 if (mode == SFmode || mode == DFmode)
19181 {
19182 if (op0 == CONST0_RTX (mode))
19183 op0 = CONST0_RTX (vmode);
19184 else
19185 {
19186 rtx v = ix86_build_const_vector (vmode, false, op0);
19187
19188 op0 = force_reg (vmode, v);
19189 }
19190 }
19191 else if (op0 != CONST0_RTX (mode))
19192 op0 = force_reg (mode, op0);
19193
19194 mask = ix86_build_signbit_mask (vmode, 0, 0);
19195
19196 if (mode == SFmode)
19197 copysign_insn = gen_copysignsf3_const;
19198 else if (mode == DFmode)
19199 copysign_insn = gen_copysigndf3_const;
19200 else
19201 copysign_insn = gen_copysigntf3_const;
19202
19203 emit_insn (copysign_insn (dest, op0, op1, mask));
19204 }
19205 else
19206 {
19207 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19208
19209 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19210 mask = ix86_build_signbit_mask (vmode, 0, 0);
19211
19212 if (mode == SFmode)
19213 copysign_insn = gen_copysignsf3_var;
19214 else if (mode == DFmode)
19215 copysign_insn = gen_copysigndf3_var;
19216 else
19217 copysign_insn = gen_copysigntf3_var;
19218
19219 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19220 }
19221 }
19222
19223 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19224 be a constant, and so has already been expanded into a vector constant. */
19225
19226 void
19227 ix86_split_copysign_const (rtx operands[])
19228 {
19229 enum machine_mode mode, vmode;
19230 rtx dest, op0, mask, x;
19231
19232 dest = operands[0];
19233 op0 = operands[1];
19234 mask = operands[3];
19235
19236 mode = GET_MODE (dest);
19237 vmode = GET_MODE (mask);
19238
19239 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19240 x = gen_rtx_AND (vmode, dest, mask);
19241 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19242
19243 if (op0 != CONST0_RTX (vmode))
19244 {
19245 x = gen_rtx_IOR (vmode, dest, op0);
19246 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19247 }
19248 }
19249
19250 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19251 so we have to do two masks. */
19252
19253 void
19254 ix86_split_copysign_var (rtx operands[])
19255 {
19256 enum machine_mode mode, vmode;
19257 rtx dest, scratch, op0, op1, mask, nmask, x;
19258
19259 dest = operands[0];
19260 scratch = operands[1];
19261 op0 = operands[2];
19262 op1 = operands[3];
19263 nmask = operands[4];
19264 mask = operands[5];
19265
19266 mode = GET_MODE (dest);
19267 vmode = GET_MODE (mask);
19268
19269 if (rtx_equal_p (op0, op1))
19270 {
19271 /* Shouldn't happen often (it's useless, obviously), but when it does
19272 we'd generate incorrect code if we continue below. */
19273 emit_move_insn (dest, op0);
19274 return;
19275 }
19276
19277 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19278 {
19279 gcc_assert (REGNO (op1) == REGNO (scratch));
19280
19281 x = gen_rtx_AND (vmode, scratch, mask);
19282 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19283
19284 dest = mask;
19285 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19286 x = gen_rtx_NOT (vmode, dest);
19287 x = gen_rtx_AND (vmode, x, op0);
19288 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19289 }
19290 else
19291 {
19292 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19293 {
19294 x = gen_rtx_AND (vmode, scratch, mask);
19295 }
19296 else /* alternative 2,4 */
19297 {
19298 gcc_assert (REGNO (mask) == REGNO (scratch));
19299 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19300 x = gen_rtx_AND (vmode, scratch, op1);
19301 }
19302 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19303
19304 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19305 {
19306 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19307 x = gen_rtx_AND (vmode, dest, nmask);
19308 }
19309 else /* alternative 3,4 */
19310 {
19311 gcc_assert (REGNO (nmask) == REGNO (dest));
19312 dest = nmask;
19313 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19314 x = gen_rtx_AND (vmode, dest, op0);
19315 }
19316 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19317 }
19318
19319 x = gen_rtx_IOR (vmode, dest, scratch);
19320 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19321 }
19322
19323 /* Return TRUE or FALSE depending on whether the first SET in INSN
19324 has source and destination with matching CC modes, and that the
19325 CC mode is at least as constrained as REQ_MODE. */
19326
19327 bool
19328 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19329 {
19330 rtx set;
19331 enum machine_mode set_mode;
19332
19333 set = PATTERN (insn);
19334 if (GET_CODE (set) == PARALLEL)
19335 set = XVECEXP (set, 0, 0);
19336 gcc_assert (GET_CODE (set) == SET);
19337 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19338
19339 set_mode = GET_MODE (SET_DEST (set));
19340 switch (set_mode)
19341 {
19342 case CCNOmode:
19343 if (req_mode != CCNOmode
19344 && (req_mode != CCmode
19345 || XEXP (SET_SRC (set), 1) != const0_rtx))
19346 return false;
19347 break;
19348 case CCmode:
19349 if (req_mode == CCGCmode)
19350 return false;
19351 /* FALLTHRU */
19352 case CCGCmode:
19353 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19354 return false;
19355 /* FALLTHRU */
19356 case CCGOCmode:
19357 if (req_mode == CCZmode)
19358 return false;
19359 /* FALLTHRU */
19360 case CCZmode:
19361 break;
19362
19363 case CCAmode:
19364 case CCCmode:
19365 case CCOmode:
19366 case CCSmode:
19367 if (set_mode != req_mode)
19368 return false;
19369 break;
19370
19371 default:
19372 gcc_unreachable ();
19373 }
19374
19375 return GET_MODE (SET_SRC (set)) == set_mode;
19376 }
19377
19378 /* Generate insn patterns to do an integer compare of OPERANDS. */
19379
19380 static rtx
19381 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19382 {
19383 enum machine_mode cmpmode;
19384 rtx tmp, flags;
19385
19386 cmpmode = SELECT_CC_MODE (code, op0, op1);
19387 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19388
19389 /* This is very simple, but making the interface the same as in the
19390 FP case makes the rest of the code easier. */
19391 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19392 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19393
19394 /* Return the test that should be put into the flags user, i.e.
19395 the bcc, scc, or cmov instruction. */
19396 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19397 }
19398
19399 /* Figure out whether to use ordered or unordered fp comparisons.
19400 Return the appropriate mode to use. */
19401
19402 enum machine_mode
19403 ix86_fp_compare_mode (enum rtx_code)
19404 {
19405 /* ??? In order to make all comparisons reversible, we do all comparisons
19406 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19407 all forms trapping and nontrapping comparisons, we can make inequality
19408 comparisons trapping again, since it results in better code when using
19409 FCOM based compares. */
19410 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19411 }
19412
19413 enum machine_mode
19414 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19415 {
19416 enum machine_mode mode = GET_MODE (op0);
19417
19418 if (SCALAR_FLOAT_MODE_P (mode))
19419 {
19420 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19421 return ix86_fp_compare_mode (code);
19422 }
19423
19424 switch (code)
19425 {
19426 /* Only zero flag is needed. */
19427 case EQ: /* ZF=0 */
19428 case NE: /* ZF!=0 */
19429 return CCZmode;
19430 /* Codes needing carry flag. */
19431 case GEU: /* CF=0 */
19432 case LTU: /* CF=1 */
19433 /* Detect overflow checks. They need just the carry flag. */
19434 if (GET_CODE (op0) == PLUS
19435 && rtx_equal_p (op1, XEXP (op0, 0)))
19436 return CCCmode;
19437 else
19438 return CCmode;
19439 case GTU: /* CF=0 & ZF=0 */
19440 case LEU: /* CF=1 | ZF=1 */
19441 return CCmode;
19442 /* Codes possibly doable only with sign flag when
19443 comparing against zero. */
19444 case GE: /* SF=OF or SF=0 */
19445 case LT: /* SF<>OF or SF=1 */
19446 if (op1 == const0_rtx)
19447 return CCGOCmode;
19448 else
19449 /* For other cases Carry flag is not required. */
19450 return CCGCmode;
19451 /* Codes doable only with sign flag when comparing
19452 against zero, but we miss jump instruction for it
19453 so we need to use relational tests against overflow
19454 that thus needs to be zero. */
19455 case GT: /* ZF=0 & SF=OF */
19456 case LE: /* ZF=1 | SF<>OF */
19457 if (op1 == const0_rtx)
19458 return CCNOmode;
19459 else
19460 return CCGCmode;
19461 /* strcmp pattern do (use flags) and combine may ask us for proper
19462 mode. */
19463 case USE:
19464 return CCmode;
19465 default:
19466 gcc_unreachable ();
19467 }
19468 }
19469
19470 /* Return the fixed registers used for condition codes. */
19471
19472 static bool
19473 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19474 {
19475 *p1 = FLAGS_REG;
19476 *p2 = FPSR_REG;
19477 return true;
19478 }
19479
19480 /* If two condition code modes are compatible, return a condition code
19481 mode which is compatible with both. Otherwise, return
19482 VOIDmode. */
19483
19484 static enum machine_mode
19485 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19486 {
19487 if (m1 == m2)
19488 return m1;
19489
19490 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19491 return VOIDmode;
19492
19493 if ((m1 == CCGCmode && m2 == CCGOCmode)
19494 || (m1 == CCGOCmode && m2 == CCGCmode))
19495 return CCGCmode;
19496
19497 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19498 return m2;
19499 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19500 return m1;
19501
19502 switch (m1)
19503 {
19504 default:
19505 gcc_unreachable ();
19506
19507 case CCmode:
19508 case CCGCmode:
19509 case CCGOCmode:
19510 case CCNOmode:
19511 case CCAmode:
19512 case CCCmode:
19513 case CCOmode:
19514 case CCSmode:
19515 case CCZmode:
19516 switch (m2)
19517 {
19518 default:
19519 return VOIDmode;
19520
19521 case CCmode:
19522 case CCGCmode:
19523 case CCGOCmode:
19524 case CCNOmode:
19525 case CCAmode:
19526 case CCCmode:
19527 case CCOmode:
19528 case CCSmode:
19529 case CCZmode:
19530 return CCmode;
19531 }
19532
19533 case CCFPmode:
19534 case CCFPUmode:
19535 /* These are only compatible with themselves, which we already
19536 checked above. */
19537 return VOIDmode;
19538 }
19539 }
19540
19541
19542 /* Return a comparison we can do and that it is equivalent to
19543 swap_condition (code) apart possibly from orderedness.
19544 But, never change orderedness if TARGET_IEEE_FP, returning
19545 UNKNOWN in that case if necessary. */
19546
19547 static enum rtx_code
19548 ix86_fp_swap_condition (enum rtx_code code)
19549 {
19550 switch (code)
19551 {
19552 case GT: /* GTU - CF=0 & ZF=0 */
19553 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19554 case GE: /* GEU - CF=0 */
19555 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19556 case UNLT: /* LTU - CF=1 */
19557 return TARGET_IEEE_FP ? UNKNOWN : GT;
19558 case UNLE: /* LEU - CF=1 | ZF=1 */
19559 return TARGET_IEEE_FP ? UNKNOWN : GE;
19560 default:
19561 return swap_condition (code);
19562 }
19563 }
19564
19565 /* Return cost of comparison CODE using the best strategy for performance.
19566 All following functions do use number of instructions as a cost metrics.
19567 In future this should be tweaked to compute bytes for optimize_size and
19568 take into account performance of various instructions on various CPUs. */
19569
19570 static int
19571 ix86_fp_comparison_cost (enum rtx_code code)
19572 {
19573 int arith_cost;
19574
19575 /* The cost of code using bit-twiddling on %ah. */
19576 switch (code)
19577 {
19578 case UNLE:
19579 case UNLT:
19580 case LTGT:
19581 case GT:
19582 case GE:
19583 case UNORDERED:
19584 case ORDERED:
19585 case UNEQ:
19586 arith_cost = 4;
19587 break;
19588 case LT:
19589 case NE:
19590 case EQ:
19591 case UNGE:
19592 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19593 break;
19594 case LE:
19595 case UNGT:
19596 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19597 break;
19598 default:
19599 gcc_unreachable ();
19600 }
19601
19602 switch (ix86_fp_comparison_strategy (code))
19603 {
19604 case IX86_FPCMP_COMI:
19605 return arith_cost > 4 ? 3 : 2;
19606 case IX86_FPCMP_SAHF:
19607 return arith_cost > 4 ? 4 : 3;
19608 default:
19609 return arith_cost;
19610 }
19611 }
19612
19613 /* Return strategy to use for floating-point. We assume that fcomi is always
19614 preferrable where available, since that is also true when looking at size
19615 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19616
19617 enum ix86_fpcmp_strategy
19618 ix86_fp_comparison_strategy (enum rtx_code)
19619 {
19620 /* Do fcomi/sahf based test when profitable. */
19621
19622 if (TARGET_CMOVE)
19623 return IX86_FPCMP_COMI;
19624
19625 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19626 return IX86_FPCMP_SAHF;
19627
19628 return IX86_FPCMP_ARITH;
19629 }
19630
19631 /* Swap, force into registers, or otherwise massage the two operands
19632 to a fp comparison. The operands are updated in place; the new
19633 comparison code is returned. */
19634
19635 static enum rtx_code
19636 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19637 {
19638 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19639 rtx op0 = *pop0, op1 = *pop1;
19640 enum machine_mode op_mode = GET_MODE (op0);
19641 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19642
19643 /* All of the unordered compare instructions only work on registers.
19644 The same is true of the fcomi compare instructions. The XFmode
19645 compare instructions require registers except when comparing
19646 against zero or when converting operand 1 from fixed point to
19647 floating point. */
19648
19649 if (!is_sse
19650 && (fpcmp_mode == CCFPUmode
19651 || (op_mode == XFmode
19652 && ! (standard_80387_constant_p (op0) == 1
19653 || standard_80387_constant_p (op1) == 1)
19654 && GET_CODE (op1) != FLOAT)
19655 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19656 {
19657 op0 = force_reg (op_mode, op0);
19658 op1 = force_reg (op_mode, op1);
19659 }
19660 else
19661 {
19662 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19663 things around if they appear profitable, otherwise force op0
19664 into a register. */
19665
19666 if (standard_80387_constant_p (op0) == 0
19667 || (MEM_P (op0)
19668 && ! (standard_80387_constant_p (op1) == 0
19669 || MEM_P (op1))))
19670 {
19671 enum rtx_code new_code = ix86_fp_swap_condition (code);
19672 if (new_code != UNKNOWN)
19673 {
19674 rtx tmp;
19675 tmp = op0, op0 = op1, op1 = tmp;
19676 code = new_code;
19677 }
19678 }
19679
19680 if (!REG_P (op0))
19681 op0 = force_reg (op_mode, op0);
19682
19683 if (CONSTANT_P (op1))
19684 {
19685 int tmp = standard_80387_constant_p (op1);
19686 if (tmp == 0)
19687 op1 = validize_mem (force_const_mem (op_mode, op1));
19688 else if (tmp == 1)
19689 {
19690 if (TARGET_CMOVE)
19691 op1 = force_reg (op_mode, op1);
19692 }
19693 else
19694 op1 = force_reg (op_mode, op1);
19695 }
19696 }
19697
19698 /* Try to rearrange the comparison to make it cheaper. */
19699 if (ix86_fp_comparison_cost (code)
19700 > ix86_fp_comparison_cost (swap_condition (code))
19701 && (REG_P (op1) || can_create_pseudo_p ()))
19702 {
19703 rtx tmp;
19704 tmp = op0, op0 = op1, op1 = tmp;
19705 code = swap_condition (code);
19706 if (!REG_P (op0))
19707 op0 = force_reg (op_mode, op0);
19708 }
19709
19710 *pop0 = op0;
19711 *pop1 = op1;
19712 return code;
19713 }
19714
19715 /* Convert comparison codes we use to represent FP comparison to integer
19716 code that will result in proper branch. Return UNKNOWN if no such code
19717 is available. */
19718
19719 enum rtx_code
19720 ix86_fp_compare_code_to_integer (enum rtx_code code)
19721 {
19722 switch (code)
19723 {
19724 case GT:
19725 return GTU;
19726 case GE:
19727 return GEU;
19728 case ORDERED:
19729 case UNORDERED:
19730 return code;
19731 break;
19732 case UNEQ:
19733 return EQ;
19734 break;
19735 case UNLT:
19736 return LTU;
19737 break;
19738 case UNLE:
19739 return LEU;
19740 break;
19741 case LTGT:
19742 return NE;
19743 break;
19744 default:
19745 return UNKNOWN;
19746 }
19747 }
19748
19749 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19750
19751 static rtx
19752 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19753 {
19754 enum machine_mode fpcmp_mode, intcmp_mode;
19755 rtx tmp, tmp2;
19756
19757 fpcmp_mode = ix86_fp_compare_mode (code);
19758 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19759
19760 /* Do fcomi/sahf based test when profitable. */
19761 switch (ix86_fp_comparison_strategy (code))
19762 {
19763 case IX86_FPCMP_COMI:
19764 intcmp_mode = fpcmp_mode;
19765 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19766 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19767 tmp);
19768 emit_insn (tmp);
19769 break;
19770
19771 case IX86_FPCMP_SAHF:
19772 intcmp_mode = fpcmp_mode;
19773 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19774 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19775 tmp);
19776
19777 if (!scratch)
19778 scratch = gen_reg_rtx (HImode);
19779 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19780 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19781 break;
19782
19783 case IX86_FPCMP_ARITH:
19784 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19785 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19786 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19787 if (!scratch)
19788 scratch = gen_reg_rtx (HImode);
19789 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19790
19791 /* In the unordered case, we have to check C2 for NaN's, which
19792 doesn't happen to work out to anything nice combination-wise.
19793 So do some bit twiddling on the value we've got in AH to come
19794 up with an appropriate set of condition codes. */
19795
19796 intcmp_mode = CCNOmode;
19797 switch (code)
19798 {
19799 case GT:
19800 case UNGT:
19801 if (code == GT || !TARGET_IEEE_FP)
19802 {
19803 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19804 code = EQ;
19805 }
19806 else
19807 {
19808 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19809 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19810 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19811 intcmp_mode = CCmode;
19812 code = GEU;
19813 }
19814 break;
19815 case LT:
19816 case UNLT:
19817 if (code == LT && TARGET_IEEE_FP)
19818 {
19819 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19820 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19821 intcmp_mode = CCmode;
19822 code = EQ;
19823 }
19824 else
19825 {
19826 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19827 code = NE;
19828 }
19829 break;
19830 case GE:
19831 case UNGE:
19832 if (code == GE || !TARGET_IEEE_FP)
19833 {
19834 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19835 code = EQ;
19836 }
19837 else
19838 {
19839 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19840 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19841 code = NE;
19842 }
19843 break;
19844 case LE:
19845 case UNLE:
19846 if (code == LE && TARGET_IEEE_FP)
19847 {
19848 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19849 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19850 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19851 intcmp_mode = CCmode;
19852 code = LTU;
19853 }
19854 else
19855 {
19856 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19857 code = NE;
19858 }
19859 break;
19860 case EQ:
19861 case UNEQ:
19862 if (code == EQ && TARGET_IEEE_FP)
19863 {
19864 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19865 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19866 intcmp_mode = CCmode;
19867 code = EQ;
19868 }
19869 else
19870 {
19871 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19872 code = NE;
19873 }
19874 break;
19875 case NE:
19876 case LTGT:
19877 if (code == NE && TARGET_IEEE_FP)
19878 {
19879 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19880 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19881 GEN_INT (0x40)));
19882 code = NE;
19883 }
19884 else
19885 {
19886 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19887 code = EQ;
19888 }
19889 break;
19890
19891 case UNORDERED:
19892 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19893 code = NE;
19894 break;
19895 case ORDERED:
19896 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19897 code = EQ;
19898 break;
19899
19900 default:
19901 gcc_unreachable ();
19902 }
19903 break;
19904
19905 default:
19906 gcc_unreachable();
19907 }
19908
19909 /* Return the test that should be put into the flags user, i.e.
19910 the bcc, scc, or cmov instruction. */
19911 return gen_rtx_fmt_ee (code, VOIDmode,
19912 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19913 const0_rtx);
19914 }
19915
19916 static rtx
19917 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19918 {
19919 rtx ret;
19920
19921 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19922 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19923
19924 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19925 {
19926 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19927 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19928 }
19929 else
19930 ret = ix86_expand_int_compare (code, op0, op1);
19931
19932 return ret;
19933 }
19934
19935 void
19936 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19937 {
19938 enum machine_mode mode = GET_MODE (op0);
19939 rtx tmp;
19940
19941 switch (mode)
19942 {
19943 case SFmode:
19944 case DFmode:
19945 case XFmode:
19946 case QImode:
19947 case HImode:
19948 case SImode:
19949 simple:
19950 tmp = ix86_expand_compare (code, op0, op1);
19951 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19952 gen_rtx_LABEL_REF (VOIDmode, label),
19953 pc_rtx);
19954 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19955 return;
19956
19957 case DImode:
19958 if (TARGET_64BIT)
19959 goto simple;
19960 case TImode:
19961 /* Expand DImode branch into multiple compare+branch. */
19962 {
19963 rtx lo[2], hi[2];
19964 rtx_code_label *label2;
19965 enum rtx_code code1, code2, code3;
19966 enum machine_mode submode;
19967
19968 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19969 {
19970 tmp = op0, op0 = op1, op1 = tmp;
19971 code = swap_condition (code);
19972 }
19973
19974 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19975 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19976
19977 submode = mode == DImode ? SImode : DImode;
19978
19979 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19980 avoid two branches. This costs one extra insn, so disable when
19981 optimizing for size. */
19982
19983 if ((code == EQ || code == NE)
19984 && (!optimize_insn_for_size_p ()
19985 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19986 {
19987 rtx xor0, xor1;
19988
19989 xor1 = hi[0];
19990 if (hi[1] != const0_rtx)
19991 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19992 NULL_RTX, 0, OPTAB_WIDEN);
19993
19994 xor0 = lo[0];
19995 if (lo[1] != const0_rtx)
19996 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19997 NULL_RTX, 0, OPTAB_WIDEN);
19998
19999 tmp = expand_binop (submode, ior_optab, xor1, xor0,
20000 NULL_RTX, 0, OPTAB_WIDEN);
20001
20002 ix86_expand_branch (code, tmp, const0_rtx, label);
20003 return;
20004 }
20005
20006 /* Otherwise, if we are doing less-than or greater-or-equal-than,
20007 op1 is a constant and the low word is zero, then we can just
20008 examine the high word. Similarly for low word -1 and
20009 less-or-equal-than or greater-than. */
20010
20011 if (CONST_INT_P (hi[1]))
20012 switch (code)
20013 {
20014 case LT: case LTU: case GE: case GEU:
20015 if (lo[1] == const0_rtx)
20016 {
20017 ix86_expand_branch (code, hi[0], hi[1], label);
20018 return;
20019 }
20020 break;
20021 case LE: case LEU: case GT: case GTU:
20022 if (lo[1] == constm1_rtx)
20023 {
20024 ix86_expand_branch (code, hi[0], hi[1], label);
20025 return;
20026 }
20027 break;
20028 default:
20029 break;
20030 }
20031
20032 /* Otherwise, we need two or three jumps. */
20033
20034 label2 = gen_label_rtx ();
20035
20036 code1 = code;
20037 code2 = swap_condition (code);
20038 code3 = unsigned_condition (code);
20039
20040 switch (code)
20041 {
20042 case LT: case GT: case LTU: case GTU:
20043 break;
20044
20045 case LE: code1 = LT; code2 = GT; break;
20046 case GE: code1 = GT; code2 = LT; break;
20047 case LEU: code1 = LTU; code2 = GTU; break;
20048 case GEU: code1 = GTU; code2 = LTU; break;
20049
20050 case EQ: code1 = UNKNOWN; code2 = NE; break;
20051 case NE: code2 = UNKNOWN; break;
20052
20053 default:
20054 gcc_unreachable ();
20055 }
20056
20057 /*
20058 * a < b =>
20059 * if (hi(a) < hi(b)) goto true;
20060 * if (hi(a) > hi(b)) goto false;
20061 * if (lo(a) < lo(b)) goto true;
20062 * false:
20063 */
20064
20065 if (code1 != UNKNOWN)
20066 ix86_expand_branch (code1, hi[0], hi[1], label);
20067 if (code2 != UNKNOWN)
20068 ix86_expand_branch (code2, hi[0], hi[1], label2);
20069
20070 ix86_expand_branch (code3, lo[0], lo[1], label);
20071
20072 if (code2 != UNKNOWN)
20073 emit_label (label2);
20074 return;
20075 }
20076
20077 default:
20078 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20079 goto simple;
20080 }
20081 }
20082
20083 /* Split branch based on floating point condition. */
20084 void
20085 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20086 rtx target1, rtx target2, rtx tmp)
20087 {
20088 rtx condition;
20089 rtx i;
20090
20091 if (target2 != pc_rtx)
20092 {
20093 rtx tmp = target2;
20094 code = reverse_condition_maybe_unordered (code);
20095 target2 = target1;
20096 target1 = tmp;
20097 }
20098
20099 condition = ix86_expand_fp_compare (code, op1, op2,
20100 tmp);
20101
20102 i = emit_jump_insn (gen_rtx_SET
20103 (VOIDmode, pc_rtx,
20104 gen_rtx_IF_THEN_ELSE (VOIDmode,
20105 condition, target1, target2)));
20106 if (split_branch_probability >= 0)
20107 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20108 }
20109
20110 void
20111 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20112 {
20113 rtx ret;
20114
20115 gcc_assert (GET_MODE (dest) == QImode);
20116
20117 ret = ix86_expand_compare (code, op0, op1);
20118 PUT_MODE (ret, QImode);
20119 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20120 }
20121
20122 /* Expand comparison setting or clearing carry flag. Return true when
20123 successful and set pop for the operation. */
20124 static bool
20125 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20126 {
20127 enum machine_mode mode =
20128 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20129
20130 /* Do not handle double-mode compares that go through special path. */
20131 if (mode == (TARGET_64BIT ? TImode : DImode))
20132 return false;
20133
20134 if (SCALAR_FLOAT_MODE_P (mode))
20135 {
20136 rtx compare_op;
20137 rtx_insn *compare_seq;
20138
20139 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20140
20141 /* Shortcut: following common codes never translate
20142 into carry flag compares. */
20143 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20144 || code == ORDERED || code == UNORDERED)
20145 return false;
20146
20147 /* These comparisons require zero flag; swap operands so they won't. */
20148 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20149 && !TARGET_IEEE_FP)
20150 {
20151 rtx tmp = op0;
20152 op0 = op1;
20153 op1 = tmp;
20154 code = swap_condition (code);
20155 }
20156
20157 /* Try to expand the comparison and verify that we end up with
20158 carry flag based comparison. This fails to be true only when
20159 we decide to expand comparison using arithmetic that is not
20160 too common scenario. */
20161 start_sequence ();
20162 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20163 compare_seq = get_insns ();
20164 end_sequence ();
20165
20166 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20167 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20168 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20169 else
20170 code = GET_CODE (compare_op);
20171
20172 if (code != LTU && code != GEU)
20173 return false;
20174
20175 emit_insn (compare_seq);
20176 *pop = compare_op;
20177 return true;
20178 }
20179
20180 if (!INTEGRAL_MODE_P (mode))
20181 return false;
20182
20183 switch (code)
20184 {
20185 case LTU:
20186 case GEU:
20187 break;
20188
20189 /* Convert a==0 into (unsigned)a<1. */
20190 case EQ:
20191 case NE:
20192 if (op1 != const0_rtx)
20193 return false;
20194 op1 = const1_rtx;
20195 code = (code == EQ ? LTU : GEU);
20196 break;
20197
20198 /* Convert a>b into b<a or a>=b-1. */
20199 case GTU:
20200 case LEU:
20201 if (CONST_INT_P (op1))
20202 {
20203 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20204 /* Bail out on overflow. We still can swap operands but that
20205 would force loading of the constant into register. */
20206 if (op1 == const0_rtx
20207 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20208 return false;
20209 code = (code == GTU ? GEU : LTU);
20210 }
20211 else
20212 {
20213 rtx tmp = op1;
20214 op1 = op0;
20215 op0 = tmp;
20216 code = (code == GTU ? LTU : GEU);
20217 }
20218 break;
20219
20220 /* Convert a>=0 into (unsigned)a<0x80000000. */
20221 case LT:
20222 case GE:
20223 if (mode == DImode || op1 != const0_rtx)
20224 return false;
20225 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20226 code = (code == LT ? GEU : LTU);
20227 break;
20228 case LE:
20229 case GT:
20230 if (mode == DImode || op1 != constm1_rtx)
20231 return false;
20232 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20233 code = (code == LE ? GEU : LTU);
20234 break;
20235
20236 default:
20237 return false;
20238 }
20239 /* Swapping operands may cause constant to appear as first operand. */
20240 if (!nonimmediate_operand (op0, VOIDmode))
20241 {
20242 if (!can_create_pseudo_p ())
20243 return false;
20244 op0 = force_reg (mode, op0);
20245 }
20246 *pop = ix86_expand_compare (code, op0, op1);
20247 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20248 return true;
20249 }
20250
20251 bool
20252 ix86_expand_int_movcc (rtx operands[])
20253 {
20254 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20255 rtx_insn *compare_seq;
20256 rtx compare_op;
20257 enum machine_mode mode = GET_MODE (operands[0]);
20258 bool sign_bit_compare_p = false;
20259 rtx op0 = XEXP (operands[1], 0);
20260 rtx op1 = XEXP (operands[1], 1);
20261
20262 if (GET_MODE (op0) == TImode
20263 || (GET_MODE (op0) == DImode
20264 && !TARGET_64BIT))
20265 return false;
20266
20267 start_sequence ();
20268 compare_op = ix86_expand_compare (code, op0, op1);
20269 compare_seq = get_insns ();
20270 end_sequence ();
20271
20272 compare_code = GET_CODE (compare_op);
20273
20274 if ((op1 == const0_rtx && (code == GE || code == LT))
20275 || (op1 == constm1_rtx && (code == GT || code == LE)))
20276 sign_bit_compare_p = true;
20277
20278 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20279 HImode insns, we'd be swallowed in word prefix ops. */
20280
20281 if ((mode != HImode || TARGET_FAST_PREFIX)
20282 && (mode != (TARGET_64BIT ? TImode : DImode))
20283 && CONST_INT_P (operands[2])
20284 && CONST_INT_P (operands[3]))
20285 {
20286 rtx out = operands[0];
20287 HOST_WIDE_INT ct = INTVAL (operands[2]);
20288 HOST_WIDE_INT cf = INTVAL (operands[3]);
20289 HOST_WIDE_INT diff;
20290
20291 diff = ct - cf;
20292 /* Sign bit compares are better done using shifts than we do by using
20293 sbb. */
20294 if (sign_bit_compare_p
20295 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20296 {
20297 /* Detect overlap between destination and compare sources. */
20298 rtx tmp = out;
20299
20300 if (!sign_bit_compare_p)
20301 {
20302 rtx flags;
20303 bool fpcmp = false;
20304
20305 compare_code = GET_CODE (compare_op);
20306
20307 flags = XEXP (compare_op, 0);
20308
20309 if (GET_MODE (flags) == CCFPmode
20310 || GET_MODE (flags) == CCFPUmode)
20311 {
20312 fpcmp = true;
20313 compare_code
20314 = ix86_fp_compare_code_to_integer (compare_code);
20315 }
20316
20317 /* To simplify rest of code, restrict to the GEU case. */
20318 if (compare_code == LTU)
20319 {
20320 HOST_WIDE_INT tmp = ct;
20321 ct = cf;
20322 cf = tmp;
20323 compare_code = reverse_condition (compare_code);
20324 code = reverse_condition (code);
20325 }
20326 else
20327 {
20328 if (fpcmp)
20329 PUT_CODE (compare_op,
20330 reverse_condition_maybe_unordered
20331 (GET_CODE (compare_op)));
20332 else
20333 PUT_CODE (compare_op,
20334 reverse_condition (GET_CODE (compare_op)));
20335 }
20336 diff = ct - cf;
20337
20338 if (reg_overlap_mentioned_p (out, op0)
20339 || reg_overlap_mentioned_p (out, op1))
20340 tmp = gen_reg_rtx (mode);
20341
20342 if (mode == DImode)
20343 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20344 else
20345 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20346 flags, compare_op));
20347 }
20348 else
20349 {
20350 if (code == GT || code == GE)
20351 code = reverse_condition (code);
20352 else
20353 {
20354 HOST_WIDE_INT tmp = ct;
20355 ct = cf;
20356 cf = tmp;
20357 diff = ct - cf;
20358 }
20359 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20360 }
20361
20362 if (diff == 1)
20363 {
20364 /*
20365 * cmpl op0,op1
20366 * sbbl dest,dest
20367 * [addl dest, ct]
20368 *
20369 * Size 5 - 8.
20370 */
20371 if (ct)
20372 tmp = expand_simple_binop (mode, PLUS,
20373 tmp, GEN_INT (ct),
20374 copy_rtx (tmp), 1, OPTAB_DIRECT);
20375 }
20376 else if (cf == -1)
20377 {
20378 /*
20379 * cmpl op0,op1
20380 * sbbl dest,dest
20381 * orl $ct, dest
20382 *
20383 * Size 8.
20384 */
20385 tmp = expand_simple_binop (mode, IOR,
20386 tmp, GEN_INT (ct),
20387 copy_rtx (tmp), 1, OPTAB_DIRECT);
20388 }
20389 else if (diff == -1 && ct)
20390 {
20391 /*
20392 * cmpl op0,op1
20393 * sbbl dest,dest
20394 * notl dest
20395 * [addl dest, cf]
20396 *
20397 * Size 8 - 11.
20398 */
20399 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20400 if (cf)
20401 tmp = expand_simple_binop (mode, PLUS,
20402 copy_rtx (tmp), GEN_INT (cf),
20403 copy_rtx (tmp), 1, OPTAB_DIRECT);
20404 }
20405 else
20406 {
20407 /*
20408 * cmpl op0,op1
20409 * sbbl dest,dest
20410 * [notl dest]
20411 * andl cf - ct, dest
20412 * [addl dest, ct]
20413 *
20414 * Size 8 - 11.
20415 */
20416
20417 if (cf == 0)
20418 {
20419 cf = ct;
20420 ct = 0;
20421 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20422 }
20423
20424 tmp = expand_simple_binop (mode, AND,
20425 copy_rtx (tmp),
20426 gen_int_mode (cf - ct, mode),
20427 copy_rtx (tmp), 1, OPTAB_DIRECT);
20428 if (ct)
20429 tmp = expand_simple_binop (mode, PLUS,
20430 copy_rtx (tmp), GEN_INT (ct),
20431 copy_rtx (tmp), 1, OPTAB_DIRECT);
20432 }
20433
20434 if (!rtx_equal_p (tmp, out))
20435 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20436
20437 return true;
20438 }
20439
20440 if (diff < 0)
20441 {
20442 enum machine_mode cmp_mode = GET_MODE (op0);
20443
20444 HOST_WIDE_INT tmp;
20445 tmp = ct, ct = cf, cf = tmp;
20446 diff = -diff;
20447
20448 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20449 {
20450 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20451
20452 /* We may be reversing unordered compare to normal compare, that
20453 is not valid in general (we may convert non-trapping condition
20454 to trapping one), however on i386 we currently emit all
20455 comparisons unordered. */
20456 compare_code = reverse_condition_maybe_unordered (compare_code);
20457 code = reverse_condition_maybe_unordered (code);
20458 }
20459 else
20460 {
20461 compare_code = reverse_condition (compare_code);
20462 code = reverse_condition (code);
20463 }
20464 }
20465
20466 compare_code = UNKNOWN;
20467 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20468 && CONST_INT_P (op1))
20469 {
20470 if (op1 == const0_rtx
20471 && (code == LT || code == GE))
20472 compare_code = code;
20473 else if (op1 == constm1_rtx)
20474 {
20475 if (code == LE)
20476 compare_code = LT;
20477 else if (code == GT)
20478 compare_code = GE;
20479 }
20480 }
20481
20482 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20483 if (compare_code != UNKNOWN
20484 && GET_MODE (op0) == GET_MODE (out)
20485 && (cf == -1 || ct == -1))
20486 {
20487 /* If lea code below could be used, only optimize
20488 if it results in a 2 insn sequence. */
20489
20490 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20491 || diff == 3 || diff == 5 || diff == 9)
20492 || (compare_code == LT && ct == -1)
20493 || (compare_code == GE && cf == -1))
20494 {
20495 /*
20496 * notl op1 (if necessary)
20497 * sarl $31, op1
20498 * orl cf, op1
20499 */
20500 if (ct != -1)
20501 {
20502 cf = ct;
20503 ct = -1;
20504 code = reverse_condition (code);
20505 }
20506
20507 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20508
20509 out = expand_simple_binop (mode, IOR,
20510 out, GEN_INT (cf),
20511 out, 1, OPTAB_DIRECT);
20512 if (out != operands[0])
20513 emit_move_insn (operands[0], out);
20514
20515 return true;
20516 }
20517 }
20518
20519
20520 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20521 || diff == 3 || diff == 5 || diff == 9)
20522 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20523 && (mode != DImode
20524 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20525 {
20526 /*
20527 * xorl dest,dest
20528 * cmpl op1,op2
20529 * setcc dest
20530 * lea cf(dest*(ct-cf)),dest
20531 *
20532 * Size 14.
20533 *
20534 * This also catches the degenerate setcc-only case.
20535 */
20536
20537 rtx tmp;
20538 int nops;
20539
20540 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20541
20542 nops = 0;
20543 /* On x86_64 the lea instruction operates on Pmode, so we need
20544 to get arithmetics done in proper mode to match. */
20545 if (diff == 1)
20546 tmp = copy_rtx (out);
20547 else
20548 {
20549 rtx out1;
20550 out1 = copy_rtx (out);
20551 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20552 nops++;
20553 if (diff & 1)
20554 {
20555 tmp = gen_rtx_PLUS (mode, tmp, out1);
20556 nops++;
20557 }
20558 }
20559 if (cf != 0)
20560 {
20561 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20562 nops++;
20563 }
20564 if (!rtx_equal_p (tmp, out))
20565 {
20566 if (nops == 1)
20567 out = force_operand (tmp, copy_rtx (out));
20568 else
20569 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20570 }
20571 if (!rtx_equal_p (out, operands[0]))
20572 emit_move_insn (operands[0], copy_rtx (out));
20573
20574 return true;
20575 }
20576
20577 /*
20578 * General case: Jumpful:
20579 * xorl dest,dest cmpl op1, op2
20580 * cmpl op1, op2 movl ct, dest
20581 * setcc dest jcc 1f
20582 * decl dest movl cf, dest
20583 * andl (cf-ct),dest 1:
20584 * addl ct,dest
20585 *
20586 * Size 20. Size 14.
20587 *
20588 * This is reasonably steep, but branch mispredict costs are
20589 * high on modern cpus, so consider failing only if optimizing
20590 * for space.
20591 */
20592
20593 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20594 && BRANCH_COST (optimize_insn_for_speed_p (),
20595 false) >= 2)
20596 {
20597 if (cf == 0)
20598 {
20599 enum machine_mode cmp_mode = GET_MODE (op0);
20600
20601 cf = ct;
20602 ct = 0;
20603
20604 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20605 {
20606 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20607
20608 /* We may be reversing unordered compare to normal compare,
20609 that is not valid in general (we may convert non-trapping
20610 condition to trapping one), however on i386 we currently
20611 emit all comparisons unordered. */
20612 code = reverse_condition_maybe_unordered (code);
20613 }
20614 else
20615 {
20616 code = reverse_condition (code);
20617 if (compare_code != UNKNOWN)
20618 compare_code = reverse_condition (compare_code);
20619 }
20620 }
20621
20622 if (compare_code != UNKNOWN)
20623 {
20624 /* notl op1 (if needed)
20625 sarl $31, op1
20626 andl (cf-ct), op1
20627 addl ct, op1
20628
20629 For x < 0 (resp. x <= -1) there will be no notl,
20630 so if possible swap the constants to get rid of the
20631 complement.
20632 True/false will be -1/0 while code below (store flag
20633 followed by decrement) is 0/-1, so the constants need
20634 to be exchanged once more. */
20635
20636 if (compare_code == GE || !cf)
20637 {
20638 code = reverse_condition (code);
20639 compare_code = LT;
20640 }
20641 else
20642 {
20643 HOST_WIDE_INT tmp = cf;
20644 cf = ct;
20645 ct = tmp;
20646 }
20647
20648 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20649 }
20650 else
20651 {
20652 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20653
20654 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20655 constm1_rtx,
20656 copy_rtx (out), 1, OPTAB_DIRECT);
20657 }
20658
20659 out = expand_simple_binop (mode, AND, copy_rtx (out),
20660 gen_int_mode (cf - ct, mode),
20661 copy_rtx (out), 1, OPTAB_DIRECT);
20662 if (ct)
20663 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20664 copy_rtx (out), 1, OPTAB_DIRECT);
20665 if (!rtx_equal_p (out, operands[0]))
20666 emit_move_insn (operands[0], copy_rtx (out));
20667
20668 return true;
20669 }
20670 }
20671
20672 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20673 {
20674 /* Try a few things more with specific constants and a variable. */
20675
20676 optab op;
20677 rtx var, orig_out, out, tmp;
20678
20679 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20680 return false;
20681
20682 /* If one of the two operands is an interesting constant, load a
20683 constant with the above and mask it in with a logical operation. */
20684
20685 if (CONST_INT_P (operands[2]))
20686 {
20687 var = operands[3];
20688 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20689 operands[3] = constm1_rtx, op = and_optab;
20690 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20691 operands[3] = const0_rtx, op = ior_optab;
20692 else
20693 return false;
20694 }
20695 else if (CONST_INT_P (operands[3]))
20696 {
20697 var = operands[2];
20698 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20699 operands[2] = constm1_rtx, op = and_optab;
20700 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20701 operands[2] = const0_rtx, op = ior_optab;
20702 else
20703 return false;
20704 }
20705 else
20706 return false;
20707
20708 orig_out = operands[0];
20709 tmp = gen_reg_rtx (mode);
20710 operands[0] = tmp;
20711
20712 /* Recurse to get the constant loaded. */
20713 if (ix86_expand_int_movcc (operands) == 0)
20714 return false;
20715
20716 /* Mask in the interesting variable. */
20717 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20718 OPTAB_WIDEN);
20719 if (!rtx_equal_p (out, orig_out))
20720 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20721
20722 return true;
20723 }
20724
20725 /*
20726 * For comparison with above,
20727 *
20728 * movl cf,dest
20729 * movl ct,tmp
20730 * cmpl op1,op2
20731 * cmovcc tmp,dest
20732 *
20733 * Size 15.
20734 */
20735
20736 if (! nonimmediate_operand (operands[2], mode))
20737 operands[2] = force_reg (mode, operands[2]);
20738 if (! nonimmediate_operand (operands[3], mode))
20739 operands[3] = force_reg (mode, operands[3]);
20740
20741 if (! register_operand (operands[2], VOIDmode)
20742 && (mode == QImode
20743 || ! register_operand (operands[3], VOIDmode)))
20744 operands[2] = force_reg (mode, operands[2]);
20745
20746 if (mode == QImode
20747 && ! register_operand (operands[3], VOIDmode))
20748 operands[3] = force_reg (mode, operands[3]);
20749
20750 emit_insn (compare_seq);
20751 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20752 gen_rtx_IF_THEN_ELSE (mode,
20753 compare_op, operands[2],
20754 operands[3])));
20755 return true;
20756 }
20757
20758 /* Swap, force into registers, or otherwise massage the two operands
20759 to an sse comparison with a mask result. Thus we differ a bit from
20760 ix86_prepare_fp_compare_args which expects to produce a flags result.
20761
20762 The DEST operand exists to help determine whether to commute commutative
20763 operators. The POP0/POP1 operands are updated in place. The new
20764 comparison code is returned, or UNKNOWN if not implementable. */
20765
20766 static enum rtx_code
20767 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20768 rtx *pop0, rtx *pop1)
20769 {
20770 rtx tmp;
20771
20772 switch (code)
20773 {
20774 case LTGT:
20775 case UNEQ:
20776 /* AVX supports all the needed comparisons. */
20777 if (TARGET_AVX)
20778 break;
20779 /* We have no LTGT as an operator. We could implement it with
20780 NE & ORDERED, but this requires an extra temporary. It's
20781 not clear that it's worth it. */
20782 return UNKNOWN;
20783
20784 case LT:
20785 case LE:
20786 case UNGT:
20787 case UNGE:
20788 /* These are supported directly. */
20789 break;
20790
20791 case EQ:
20792 case NE:
20793 case UNORDERED:
20794 case ORDERED:
20795 /* AVX has 3 operand comparisons, no need to swap anything. */
20796 if (TARGET_AVX)
20797 break;
20798 /* For commutative operators, try to canonicalize the destination
20799 operand to be first in the comparison - this helps reload to
20800 avoid extra moves. */
20801 if (!dest || !rtx_equal_p (dest, *pop1))
20802 break;
20803 /* FALLTHRU */
20804
20805 case GE:
20806 case GT:
20807 case UNLE:
20808 case UNLT:
20809 /* These are not supported directly before AVX, and furthermore
20810 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20811 comparison operands to transform into something that is
20812 supported. */
20813 tmp = *pop0;
20814 *pop0 = *pop1;
20815 *pop1 = tmp;
20816 code = swap_condition (code);
20817 break;
20818
20819 default:
20820 gcc_unreachable ();
20821 }
20822
20823 return code;
20824 }
20825
20826 /* Detect conditional moves that exactly match min/max operational
20827 semantics. Note that this is IEEE safe, as long as we don't
20828 interchange the operands.
20829
20830 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20831 and TRUE if the operation is successful and instructions are emitted. */
20832
20833 static bool
20834 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20835 rtx cmp_op1, rtx if_true, rtx if_false)
20836 {
20837 enum machine_mode mode;
20838 bool is_min;
20839 rtx tmp;
20840
20841 if (code == LT)
20842 ;
20843 else if (code == UNGE)
20844 {
20845 tmp = if_true;
20846 if_true = if_false;
20847 if_false = tmp;
20848 }
20849 else
20850 return false;
20851
20852 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20853 is_min = true;
20854 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20855 is_min = false;
20856 else
20857 return false;
20858
20859 mode = GET_MODE (dest);
20860
20861 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20862 but MODE may be a vector mode and thus not appropriate. */
20863 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20864 {
20865 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20866 rtvec v;
20867
20868 if_true = force_reg (mode, if_true);
20869 v = gen_rtvec (2, if_true, if_false);
20870 tmp = gen_rtx_UNSPEC (mode, v, u);
20871 }
20872 else
20873 {
20874 code = is_min ? SMIN : SMAX;
20875 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20876 }
20877
20878 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20879 return true;
20880 }
20881
20882 /* Expand an sse vector comparison. Return the register with the result. */
20883
20884 static rtx
20885 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20886 rtx op_true, rtx op_false)
20887 {
20888 enum machine_mode mode = GET_MODE (dest);
20889 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20890
20891 /* In general case result of comparison can differ from operands' type. */
20892 enum machine_mode cmp_mode;
20893
20894 /* In AVX512F the result of comparison is an integer mask. */
20895 bool maskcmp = false;
20896 rtx x;
20897
20898 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20899 {
20900 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20901 gcc_assert (cmp_mode != BLKmode);
20902
20903 maskcmp = true;
20904 }
20905 else
20906 cmp_mode = cmp_ops_mode;
20907
20908
20909 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20910 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20911 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20912
20913 if (optimize
20914 || reg_overlap_mentioned_p (dest, op_true)
20915 || reg_overlap_mentioned_p (dest, op_false))
20916 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20917
20918 /* Compare patterns for int modes are unspec in AVX512F only. */
20919 if (maskcmp && (code == GT || code == EQ))
20920 {
20921 rtx (*gen)(rtx, rtx, rtx);
20922
20923 switch (cmp_ops_mode)
20924 {
20925 case V16SImode:
20926 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20927 break;
20928 case V8DImode:
20929 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20930 break;
20931 default:
20932 gen = NULL;
20933 }
20934
20935 if (gen)
20936 {
20937 emit_insn (gen (dest, cmp_op0, cmp_op1));
20938 return dest;
20939 }
20940 }
20941 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20942
20943 if (cmp_mode != mode && !maskcmp)
20944 {
20945 x = force_reg (cmp_ops_mode, x);
20946 convert_move (dest, x, false);
20947 }
20948 else
20949 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20950
20951 return dest;
20952 }
20953
20954 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20955 operations. This is used for both scalar and vector conditional moves. */
20956
20957 static void
20958 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20959 {
20960 enum machine_mode mode = GET_MODE (dest);
20961 enum machine_mode cmpmode = GET_MODE (cmp);
20962
20963 /* In AVX512F the result of comparison is an integer mask. */
20964 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20965
20966 rtx t2, t3, x;
20967
20968 if (vector_all_ones_operand (op_true, mode)
20969 && rtx_equal_p (op_false, CONST0_RTX (mode))
20970 && !maskcmp)
20971 {
20972 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20973 }
20974 else if (op_false == CONST0_RTX (mode)
20975 && !maskcmp)
20976 {
20977 op_true = force_reg (mode, op_true);
20978 x = gen_rtx_AND (mode, cmp, op_true);
20979 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20980 }
20981 else if (op_true == CONST0_RTX (mode)
20982 && !maskcmp)
20983 {
20984 op_false = force_reg (mode, op_false);
20985 x = gen_rtx_NOT (mode, cmp);
20986 x = gen_rtx_AND (mode, x, op_false);
20987 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20988 }
20989 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20990 && !maskcmp)
20991 {
20992 op_false = force_reg (mode, op_false);
20993 x = gen_rtx_IOR (mode, cmp, op_false);
20994 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20995 }
20996 else if (TARGET_XOP
20997 && !maskcmp)
20998 {
20999 op_true = force_reg (mode, op_true);
21000
21001 if (!nonimmediate_operand (op_false, mode))
21002 op_false = force_reg (mode, op_false);
21003
21004 emit_insn (gen_rtx_SET (mode, dest,
21005 gen_rtx_IF_THEN_ELSE (mode, cmp,
21006 op_true,
21007 op_false)));
21008 }
21009 else
21010 {
21011 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21012 rtx d = dest;
21013
21014 if (!nonimmediate_operand (op_true, mode))
21015 op_true = force_reg (mode, op_true);
21016
21017 op_false = force_reg (mode, op_false);
21018
21019 switch (mode)
21020 {
21021 case V4SFmode:
21022 if (TARGET_SSE4_1)
21023 gen = gen_sse4_1_blendvps;
21024 break;
21025 case V2DFmode:
21026 if (TARGET_SSE4_1)
21027 gen = gen_sse4_1_blendvpd;
21028 break;
21029 case V16QImode:
21030 case V8HImode:
21031 case V4SImode:
21032 case V2DImode:
21033 if (TARGET_SSE4_1)
21034 {
21035 gen = gen_sse4_1_pblendvb;
21036 if (mode != V16QImode)
21037 d = gen_reg_rtx (V16QImode);
21038 op_false = gen_lowpart (V16QImode, op_false);
21039 op_true = gen_lowpart (V16QImode, op_true);
21040 cmp = gen_lowpart (V16QImode, cmp);
21041 }
21042 break;
21043 case V8SFmode:
21044 if (TARGET_AVX)
21045 gen = gen_avx_blendvps256;
21046 break;
21047 case V4DFmode:
21048 if (TARGET_AVX)
21049 gen = gen_avx_blendvpd256;
21050 break;
21051 case V32QImode:
21052 case V16HImode:
21053 case V8SImode:
21054 case V4DImode:
21055 if (TARGET_AVX2)
21056 {
21057 gen = gen_avx2_pblendvb;
21058 if (mode != V32QImode)
21059 d = gen_reg_rtx (V32QImode);
21060 op_false = gen_lowpart (V32QImode, op_false);
21061 op_true = gen_lowpart (V32QImode, op_true);
21062 cmp = gen_lowpart (V32QImode, cmp);
21063 }
21064 break;
21065
21066 case V64QImode:
21067 gen = gen_avx512bw_blendmv64qi;
21068 break;
21069 case V32HImode:
21070 gen = gen_avx512bw_blendmv32hi;
21071 break;
21072 case V16SImode:
21073 gen = gen_avx512f_blendmv16si;
21074 break;
21075 case V8DImode:
21076 gen = gen_avx512f_blendmv8di;
21077 break;
21078 case V8DFmode:
21079 gen = gen_avx512f_blendmv8df;
21080 break;
21081 case V16SFmode:
21082 gen = gen_avx512f_blendmv16sf;
21083 break;
21084
21085 default:
21086 break;
21087 }
21088
21089 if (gen != NULL)
21090 {
21091 emit_insn (gen (d, op_false, op_true, cmp));
21092 if (d != dest)
21093 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21094 }
21095 else
21096 {
21097 op_true = force_reg (mode, op_true);
21098
21099 t2 = gen_reg_rtx (mode);
21100 if (optimize)
21101 t3 = gen_reg_rtx (mode);
21102 else
21103 t3 = dest;
21104
21105 x = gen_rtx_AND (mode, op_true, cmp);
21106 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21107
21108 x = gen_rtx_NOT (mode, cmp);
21109 x = gen_rtx_AND (mode, x, op_false);
21110 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21111
21112 x = gen_rtx_IOR (mode, t3, t2);
21113 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21114 }
21115 }
21116 }
21117
21118 /* Expand a floating-point conditional move. Return true if successful. */
21119
21120 bool
21121 ix86_expand_fp_movcc (rtx operands[])
21122 {
21123 enum machine_mode mode = GET_MODE (operands[0]);
21124 enum rtx_code code = GET_CODE (operands[1]);
21125 rtx tmp, compare_op;
21126 rtx op0 = XEXP (operands[1], 0);
21127 rtx op1 = XEXP (operands[1], 1);
21128
21129 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21130 {
21131 enum machine_mode cmode;
21132
21133 /* Since we've no cmove for sse registers, don't force bad register
21134 allocation just to gain access to it. Deny movcc when the
21135 comparison mode doesn't match the move mode. */
21136 cmode = GET_MODE (op0);
21137 if (cmode == VOIDmode)
21138 cmode = GET_MODE (op1);
21139 if (cmode != mode)
21140 return false;
21141
21142 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21143 if (code == UNKNOWN)
21144 return false;
21145
21146 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21147 operands[2], operands[3]))
21148 return true;
21149
21150 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21151 operands[2], operands[3]);
21152 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21153 return true;
21154 }
21155
21156 if (GET_MODE (op0) == TImode
21157 || (GET_MODE (op0) == DImode
21158 && !TARGET_64BIT))
21159 return false;
21160
21161 /* The floating point conditional move instructions don't directly
21162 support conditions resulting from a signed integer comparison. */
21163
21164 compare_op = ix86_expand_compare (code, op0, op1);
21165 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21166 {
21167 tmp = gen_reg_rtx (QImode);
21168 ix86_expand_setcc (tmp, code, op0, op1);
21169
21170 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21171 }
21172
21173 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21174 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21175 operands[2], operands[3])));
21176
21177 return true;
21178 }
21179
21180 /* Expand a floating-point vector conditional move; a vcond operation
21181 rather than a movcc operation. */
21182
21183 bool
21184 ix86_expand_fp_vcond (rtx operands[])
21185 {
21186 enum rtx_code code = GET_CODE (operands[3]);
21187 rtx cmp;
21188
21189 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21190 &operands[4], &operands[5]);
21191 if (code == UNKNOWN)
21192 {
21193 rtx temp;
21194 switch (GET_CODE (operands[3]))
21195 {
21196 case LTGT:
21197 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21198 operands[5], operands[0], operands[0]);
21199 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21200 operands[5], operands[1], operands[2]);
21201 code = AND;
21202 break;
21203 case UNEQ:
21204 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21205 operands[5], operands[0], operands[0]);
21206 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21207 operands[5], operands[1], operands[2]);
21208 code = IOR;
21209 break;
21210 default:
21211 gcc_unreachable ();
21212 }
21213 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21214 OPTAB_DIRECT);
21215 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21216 return true;
21217 }
21218
21219 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21220 operands[5], operands[1], operands[2]))
21221 return true;
21222
21223 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21224 operands[1], operands[2]);
21225 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21226 return true;
21227 }
21228
21229 /* Expand a signed/unsigned integral vector conditional move. */
21230
21231 bool
21232 ix86_expand_int_vcond (rtx operands[])
21233 {
21234 enum machine_mode data_mode = GET_MODE (operands[0]);
21235 enum machine_mode mode = GET_MODE (operands[4]);
21236 enum rtx_code code = GET_CODE (operands[3]);
21237 bool negate = false;
21238 rtx x, cop0, cop1;
21239
21240 cop0 = operands[4];
21241 cop1 = operands[5];
21242
21243 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21244 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21245 if ((code == LT || code == GE)
21246 && data_mode == mode
21247 && cop1 == CONST0_RTX (mode)
21248 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21249 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21250 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21251 && (GET_MODE_SIZE (data_mode) == 16
21252 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21253 {
21254 rtx negop = operands[2 - (code == LT)];
21255 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21256 if (negop == CONST1_RTX (data_mode))
21257 {
21258 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21259 operands[0], 1, OPTAB_DIRECT);
21260 if (res != operands[0])
21261 emit_move_insn (operands[0], res);
21262 return true;
21263 }
21264 else if (GET_MODE_INNER (data_mode) != DImode
21265 && vector_all_ones_operand (negop, data_mode))
21266 {
21267 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21268 operands[0], 0, OPTAB_DIRECT);
21269 if (res != operands[0])
21270 emit_move_insn (operands[0], res);
21271 return true;
21272 }
21273 }
21274
21275 if (!nonimmediate_operand (cop1, mode))
21276 cop1 = force_reg (mode, cop1);
21277 if (!general_operand (operands[1], data_mode))
21278 operands[1] = force_reg (data_mode, operands[1]);
21279 if (!general_operand (operands[2], data_mode))
21280 operands[2] = force_reg (data_mode, operands[2]);
21281
21282 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21283 if (TARGET_XOP
21284 && (mode == V16QImode || mode == V8HImode
21285 || mode == V4SImode || mode == V2DImode))
21286 ;
21287 else
21288 {
21289 /* Canonicalize the comparison to EQ, GT, GTU. */
21290 switch (code)
21291 {
21292 case EQ:
21293 case GT:
21294 case GTU:
21295 break;
21296
21297 case NE:
21298 case LE:
21299 case LEU:
21300 code = reverse_condition (code);
21301 negate = true;
21302 break;
21303
21304 case GE:
21305 case GEU:
21306 code = reverse_condition (code);
21307 negate = true;
21308 /* FALLTHRU */
21309
21310 case LT:
21311 case LTU:
21312 code = swap_condition (code);
21313 x = cop0, cop0 = cop1, cop1 = x;
21314 break;
21315
21316 default:
21317 gcc_unreachable ();
21318 }
21319
21320 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21321 if (mode == V2DImode)
21322 {
21323 switch (code)
21324 {
21325 case EQ:
21326 /* SSE4.1 supports EQ. */
21327 if (!TARGET_SSE4_1)
21328 return false;
21329 break;
21330
21331 case GT:
21332 case GTU:
21333 /* SSE4.2 supports GT/GTU. */
21334 if (!TARGET_SSE4_2)
21335 return false;
21336 break;
21337
21338 default:
21339 gcc_unreachable ();
21340 }
21341 }
21342
21343 /* Unsigned parallel compare is not supported by the hardware.
21344 Play some tricks to turn this into a signed comparison
21345 against 0. */
21346 if (code == GTU)
21347 {
21348 cop0 = force_reg (mode, cop0);
21349
21350 switch (mode)
21351 {
21352 case V16SImode:
21353 case V8DImode:
21354 case V8SImode:
21355 case V4DImode:
21356 case V4SImode:
21357 case V2DImode:
21358 {
21359 rtx t1, t2, mask;
21360 rtx (*gen_sub3) (rtx, rtx, rtx);
21361
21362 switch (mode)
21363 {
21364 case V16SImode: gen_sub3 = gen_subv16si3; break;
21365 case V8DImode: gen_sub3 = gen_subv8di3; break;
21366 case V8SImode: gen_sub3 = gen_subv8si3; break;
21367 case V4DImode: gen_sub3 = gen_subv4di3; break;
21368 case V4SImode: gen_sub3 = gen_subv4si3; break;
21369 case V2DImode: gen_sub3 = gen_subv2di3; break;
21370 default:
21371 gcc_unreachable ();
21372 }
21373 /* Subtract (-(INT MAX) - 1) from both operands to make
21374 them signed. */
21375 mask = ix86_build_signbit_mask (mode, true, false);
21376 t1 = gen_reg_rtx (mode);
21377 emit_insn (gen_sub3 (t1, cop0, mask));
21378
21379 t2 = gen_reg_rtx (mode);
21380 emit_insn (gen_sub3 (t2, cop1, mask));
21381
21382 cop0 = t1;
21383 cop1 = t2;
21384 code = GT;
21385 }
21386 break;
21387
21388 case V64QImode:
21389 case V32HImode:
21390 case V32QImode:
21391 case V16HImode:
21392 case V16QImode:
21393 case V8HImode:
21394 /* Perform a parallel unsigned saturating subtraction. */
21395 x = gen_reg_rtx (mode);
21396 emit_insn (gen_rtx_SET (VOIDmode, x,
21397 gen_rtx_US_MINUS (mode, cop0, cop1)));
21398
21399 cop0 = x;
21400 cop1 = CONST0_RTX (mode);
21401 code = EQ;
21402 negate = !negate;
21403 break;
21404
21405 default:
21406 gcc_unreachable ();
21407 }
21408 }
21409 }
21410
21411 /* Allow the comparison to be done in one mode, but the movcc to
21412 happen in another mode. */
21413 if (data_mode == mode)
21414 {
21415 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21416 operands[1+negate], operands[2-negate]);
21417 }
21418 else
21419 {
21420 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21421 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21422 operands[1+negate], operands[2-negate]);
21423 if (GET_MODE (x) == mode)
21424 x = gen_lowpart (data_mode, x);
21425 }
21426
21427 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21428 operands[2-negate]);
21429 return true;
21430 }
21431
21432 /* AVX512F does support 64-byte integer vector operations,
21433 thus the longest vector we are faced with is V64QImode. */
21434 #define MAX_VECT_LEN 64
21435
21436 struct expand_vec_perm_d
21437 {
21438 rtx target, op0, op1;
21439 unsigned char perm[MAX_VECT_LEN];
21440 enum machine_mode vmode;
21441 unsigned char nelt;
21442 bool one_operand_p;
21443 bool testing_p;
21444 };
21445
21446 static bool
21447 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
21448 struct expand_vec_perm_d *d)
21449 {
21450 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21451 expander, so args are either in d, or in op0, op1 etc. */
21452 enum machine_mode mode = GET_MODE (d ? d->op0 : op0);
21453 enum machine_mode maskmode = mode;
21454 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21455
21456 switch (mode)
21457 {
21458 case V8HImode:
21459 if (TARGET_AVX512VL && TARGET_AVX512BW)
21460 gen = gen_avx512vl_vpermi2varv8hi3;
21461 break;
21462 case V16HImode:
21463 if (TARGET_AVX512VL && TARGET_AVX512BW)
21464 gen = gen_avx512vl_vpermi2varv16hi3;
21465 break;
21466 case V32HImode:
21467 if (TARGET_AVX512BW)
21468 gen = gen_avx512bw_vpermi2varv32hi3;
21469 break;
21470 case V4SImode:
21471 if (TARGET_AVX512VL)
21472 gen = gen_avx512vl_vpermi2varv4si3;
21473 break;
21474 case V8SImode:
21475 if (TARGET_AVX512VL)
21476 gen = gen_avx512vl_vpermi2varv8si3;
21477 break;
21478 case V16SImode:
21479 if (TARGET_AVX512F)
21480 gen = gen_avx512f_vpermi2varv16si3;
21481 break;
21482 case V4SFmode:
21483 if (TARGET_AVX512VL)
21484 {
21485 gen = gen_avx512vl_vpermi2varv4sf3;
21486 maskmode = V4SImode;
21487 }
21488 break;
21489 case V8SFmode:
21490 if (TARGET_AVX512VL)
21491 {
21492 gen = gen_avx512vl_vpermi2varv8sf3;
21493 maskmode = V8SImode;
21494 }
21495 break;
21496 case V16SFmode:
21497 if (TARGET_AVX512F)
21498 {
21499 gen = gen_avx512f_vpermi2varv16sf3;
21500 maskmode = V16SImode;
21501 }
21502 break;
21503 case V2DImode:
21504 if (TARGET_AVX512VL)
21505 gen = gen_avx512vl_vpermi2varv2di3;
21506 break;
21507 case V4DImode:
21508 if (TARGET_AVX512VL)
21509 gen = gen_avx512vl_vpermi2varv4di3;
21510 break;
21511 case V8DImode:
21512 if (TARGET_AVX512F)
21513 gen = gen_avx512f_vpermi2varv8di3;
21514 break;
21515 case V2DFmode:
21516 if (TARGET_AVX512VL)
21517 {
21518 gen = gen_avx512vl_vpermi2varv2df3;
21519 maskmode = V2DImode;
21520 }
21521 break;
21522 case V4DFmode:
21523 if (TARGET_AVX512VL)
21524 {
21525 gen = gen_avx512vl_vpermi2varv4df3;
21526 maskmode = V4DImode;
21527 }
21528 break;
21529 case V8DFmode:
21530 if (TARGET_AVX512F)
21531 {
21532 gen = gen_avx512f_vpermi2varv8df3;
21533 maskmode = V8DImode;
21534 }
21535 break;
21536 default:
21537 break;
21538 }
21539
21540 if (gen == NULL)
21541 return false;
21542
21543 /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
21544 expander, so args are either in d, or in op0, op1 etc. */
21545 if (d)
21546 {
21547 rtx vec[64];
21548 target = d->target;
21549 op0 = d->op0;
21550 op1 = d->op1;
21551 for (int i = 0; i < d->nelt; ++i)
21552 vec[i] = GEN_INT (d->perm[i]);
21553 mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
21554 }
21555
21556 emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
21557 return true;
21558 }
21559
21560 /* Expand a variable vector permutation. */
21561
21562 void
21563 ix86_expand_vec_perm (rtx operands[])
21564 {
21565 rtx target = operands[0];
21566 rtx op0 = operands[1];
21567 rtx op1 = operands[2];
21568 rtx mask = operands[3];
21569 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21570 enum machine_mode mode = GET_MODE (op0);
21571 enum machine_mode maskmode = GET_MODE (mask);
21572 int w, e, i;
21573 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21574
21575 /* Number of elements in the vector. */
21576 w = GET_MODE_NUNITS (mode);
21577 e = GET_MODE_UNIT_SIZE (mode);
21578 gcc_assert (w <= 64);
21579
21580 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
21581 return;
21582
21583 if (TARGET_AVX2)
21584 {
21585 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21586 {
21587 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21588 an constant shuffle operand. With a tiny bit of effort we can
21589 use VPERMD instead. A re-interpretation stall for V4DFmode is
21590 unfortunate but there's no avoiding it.
21591 Similarly for V16HImode we don't have instructions for variable
21592 shuffling, while for V32QImode we can use after preparing suitable
21593 masks vpshufb; vpshufb; vpermq; vpor. */
21594
21595 if (mode == V16HImode)
21596 {
21597 maskmode = mode = V32QImode;
21598 w = 32;
21599 e = 1;
21600 }
21601 else
21602 {
21603 maskmode = mode = V8SImode;
21604 w = 8;
21605 e = 4;
21606 }
21607 t1 = gen_reg_rtx (maskmode);
21608
21609 /* Replicate the low bits of the V4DImode mask into V8SImode:
21610 mask = { A B C D }
21611 t1 = { A A B B C C D D }. */
21612 for (i = 0; i < w / 2; ++i)
21613 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21614 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21615 vt = force_reg (maskmode, vt);
21616 mask = gen_lowpart (maskmode, mask);
21617 if (maskmode == V8SImode)
21618 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21619 else
21620 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21621
21622 /* Multiply the shuffle indicies by two. */
21623 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21624 OPTAB_DIRECT);
21625
21626 /* Add one to the odd shuffle indicies:
21627 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21628 for (i = 0; i < w / 2; ++i)
21629 {
21630 vec[i * 2] = const0_rtx;
21631 vec[i * 2 + 1] = const1_rtx;
21632 }
21633 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21634 vt = validize_mem (force_const_mem (maskmode, vt));
21635 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21636 OPTAB_DIRECT);
21637
21638 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21639 operands[3] = mask = t1;
21640 target = gen_reg_rtx (mode);
21641 op0 = gen_lowpart (mode, op0);
21642 op1 = gen_lowpart (mode, op1);
21643 }
21644
21645 switch (mode)
21646 {
21647 case V8SImode:
21648 /* The VPERMD and VPERMPS instructions already properly ignore
21649 the high bits of the shuffle elements. No need for us to
21650 perform an AND ourselves. */
21651 if (one_operand_shuffle)
21652 {
21653 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21654 if (target != operands[0])
21655 emit_move_insn (operands[0],
21656 gen_lowpart (GET_MODE (operands[0]), target));
21657 }
21658 else
21659 {
21660 t1 = gen_reg_rtx (V8SImode);
21661 t2 = gen_reg_rtx (V8SImode);
21662 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21663 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21664 goto merge_two;
21665 }
21666 return;
21667
21668 case V8SFmode:
21669 mask = gen_lowpart (V8SImode, mask);
21670 if (one_operand_shuffle)
21671 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21672 else
21673 {
21674 t1 = gen_reg_rtx (V8SFmode);
21675 t2 = gen_reg_rtx (V8SFmode);
21676 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21677 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21678 goto merge_two;
21679 }
21680 return;
21681
21682 case V4SImode:
21683 /* By combining the two 128-bit input vectors into one 256-bit
21684 input vector, we can use VPERMD and VPERMPS for the full
21685 two-operand shuffle. */
21686 t1 = gen_reg_rtx (V8SImode);
21687 t2 = gen_reg_rtx (V8SImode);
21688 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21689 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21690 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21691 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21692 return;
21693
21694 case V4SFmode:
21695 t1 = gen_reg_rtx (V8SFmode);
21696 t2 = gen_reg_rtx (V8SImode);
21697 mask = gen_lowpart (V4SImode, mask);
21698 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21699 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21700 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21701 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21702 return;
21703
21704 case V32QImode:
21705 t1 = gen_reg_rtx (V32QImode);
21706 t2 = gen_reg_rtx (V32QImode);
21707 t3 = gen_reg_rtx (V32QImode);
21708 vt2 = GEN_INT (-128);
21709 for (i = 0; i < 32; i++)
21710 vec[i] = vt2;
21711 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21712 vt = force_reg (V32QImode, vt);
21713 for (i = 0; i < 32; i++)
21714 vec[i] = i < 16 ? vt2 : const0_rtx;
21715 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21716 vt2 = force_reg (V32QImode, vt2);
21717 /* From mask create two adjusted masks, which contain the same
21718 bits as mask in the low 7 bits of each vector element.
21719 The first mask will have the most significant bit clear
21720 if it requests element from the same 128-bit lane
21721 and MSB set if it requests element from the other 128-bit lane.
21722 The second mask will have the opposite values of the MSB,
21723 and additionally will have its 128-bit lanes swapped.
21724 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21725 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21726 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21727 stands for other 12 bytes. */
21728 /* The bit whether element is from the same lane or the other
21729 lane is bit 4, so shift it up by 3 to the MSB position. */
21730 t5 = gen_reg_rtx (V4DImode);
21731 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21732 GEN_INT (3)));
21733 /* Clear MSB bits from the mask just in case it had them set. */
21734 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21735 /* After this t1 will have MSB set for elements from other lane. */
21736 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21737 /* Clear bits other than MSB. */
21738 emit_insn (gen_andv32qi3 (t1, t1, vt));
21739 /* Or in the lower bits from mask into t3. */
21740 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21741 /* And invert MSB bits in t1, so MSB is set for elements from the same
21742 lane. */
21743 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21744 /* Swap 128-bit lanes in t3. */
21745 t6 = gen_reg_rtx (V4DImode);
21746 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21747 const2_rtx, GEN_INT (3),
21748 const0_rtx, const1_rtx));
21749 /* And or in the lower bits from mask into t1. */
21750 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21751 if (one_operand_shuffle)
21752 {
21753 /* Each of these shuffles will put 0s in places where
21754 element from the other 128-bit lane is needed, otherwise
21755 will shuffle in the requested value. */
21756 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21757 gen_lowpart (V32QImode, t6)));
21758 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21759 /* For t3 the 128-bit lanes are swapped again. */
21760 t7 = gen_reg_rtx (V4DImode);
21761 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21762 const2_rtx, GEN_INT (3),
21763 const0_rtx, const1_rtx));
21764 /* And oring both together leads to the result. */
21765 emit_insn (gen_iorv32qi3 (target, t1,
21766 gen_lowpart (V32QImode, t7)));
21767 if (target != operands[0])
21768 emit_move_insn (operands[0],
21769 gen_lowpart (GET_MODE (operands[0]), target));
21770 return;
21771 }
21772
21773 t4 = gen_reg_rtx (V32QImode);
21774 /* Similarly to the above one_operand_shuffle code,
21775 just for repeated twice for each operand. merge_two:
21776 code will merge the two results together. */
21777 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21778 gen_lowpart (V32QImode, t6)));
21779 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21780 gen_lowpart (V32QImode, t6)));
21781 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21782 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21783 t7 = gen_reg_rtx (V4DImode);
21784 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21785 const2_rtx, GEN_INT (3),
21786 const0_rtx, const1_rtx));
21787 t8 = gen_reg_rtx (V4DImode);
21788 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21789 const2_rtx, GEN_INT (3),
21790 const0_rtx, const1_rtx));
21791 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21792 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21793 t1 = t4;
21794 t2 = t3;
21795 goto merge_two;
21796
21797 default:
21798 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21799 break;
21800 }
21801 }
21802
21803 if (TARGET_XOP)
21804 {
21805 /* The XOP VPPERM insn supports three inputs. By ignoring the
21806 one_operand_shuffle special case, we avoid creating another
21807 set of constant vectors in memory. */
21808 one_operand_shuffle = false;
21809
21810 /* mask = mask & {2*w-1, ...} */
21811 vt = GEN_INT (2*w - 1);
21812 }
21813 else
21814 {
21815 /* mask = mask & {w-1, ...} */
21816 vt = GEN_INT (w - 1);
21817 }
21818
21819 for (i = 0; i < w; i++)
21820 vec[i] = vt;
21821 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21822 mask = expand_simple_binop (maskmode, AND, mask, vt,
21823 NULL_RTX, 0, OPTAB_DIRECT);
21824
21825 /* For non-QImode operations, convert the word permutation control
21826 into a byte permutation control. */
21827 if (mode != V16QImode)
21828 {
21829 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21830 GEN_INT (exact_log2 (e)),
21831 NULL_RTX, 0, OPTAB_DIRECT);
21832
21833 /* Convert mask to vector of chars. */
21834 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21835
21836 /* Replicate each of the input bytes into byte positions:
21837 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21838 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21839 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21840 for (i = 0; i < 16; ++i)
21841 vec[i] = GEN_INT (i/e * e);
21842 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21843 vt = validize_mem (force_const_mem (V16QImode, vt));
21844 if (TARGET_XOP)
21845 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21846 else
21847 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21848
21849 /* Convert it into the byte positions by doing
21850 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21851 for (i = 0; i < 16; ++i)
21852 vec[i] = GEN_INT (i % e);
21853 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21854 vt = validize_mem (force_const_mem (V16QImode, vt));
21855 emit_insn (gen_addv16qi3 (mask, mask, vt));
21856 }
21857
21858 /* The actual shuffle operations all operate on V16QImode. */
21859 op0 = gen_lowpart (V16QImode, op0);
21860 op1 = gen_lowpart (V16QImode, op1);
21861
21862 if (TARGET_XOP)
21863 {
21864 if (GET_MODE (target) != V16QImode)
21865 target = gen_reg_rtx (V16QImode);
21866 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21867 if (target != operands[0])
21868 emit_move_insn (operands[0],
21869 gen_lowpart (GET_MODE (operands[0]), target));
21870 }
21871 else if (one_operand_shuffle)
21872 {
21873 if (GET_MODE (target) != V16QImode)
21874 target = gen_reg_rtx (V16QImode);
21875 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21876 if (target != operands[0])
21877 emit_move_insn (operands[0],
21878 gen_lowpart (GET_MODE (operands[0]), target));
21879 }
21880 else
21881 {
21882 rtx xops[6];
21883 bool ok;
21884
21885 /* Shuffle the two input vectors independently. */
21886 t1 = gen_reg_rtx (V16QImode);
21887 t2 = gen_reg_rtx (V16QImode);
21888 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21889 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21890
21891 merge_two:
21892 /* Then merge them together. The key is whether any given control
21893 element contained a bit set that indicates the second word. */
21894 mask = operands[3];
21895 vt = GEN_INT (w);
21896 if (maskmode == V2DImode && !TARGET_SSE4_1)
21897 {
21898 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21899 more shuffle to convert the V2DI input mask into a V4SI
21900 input mask. At which point the masking that expand_int_vcond
21901 will work as desired. */
21902 rtx t3 = gen_reg_rtx (V4SImode);
21903 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21904 const0_rtx, const0_rtx,
21905 const2_rtx, const2_rtx));
21906 mask = t3;
21907 maskmode = V4SImode;
21908 e = w = 4;
21909 }
21910
21911 for (i = 0; i < w; i++)
21912 vec[i] = vt;
21913 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21914 vt = force_reg (maskmode, vt);
21915 mask = expand_simple_binop (maskmode, AND, mask, vt,
21916 NULL_RTX, 0, OPTAB_DIRECT);
21917
21918 if (GET_MODE (target) != mode)
21919 target = gen_reg_rtx (mode);
21920 xops[0] = target;
21921 xops[1] = gen_lowpart (mode, t2);
21922 xops[2] = gen_lowpart (mode, t1);
21923 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21924 xops[4] = mask;
21925 xops[5] = vt;
21926 ok = ix86_expand_int_vcond (xops);
21927 gcc_assert (ok);
21928 if (target != operands[0])
21929 emit_move_insn (operands[0],
21930 gen_lowpart (GET_MODE (operands[0]), target));
21931 }
21932 }
21933
21934 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21935 true if we should do zero extension, else sign extension. HIGH_P is
21936 true if we want the N/2 high elements, else the low elements. */
21937
21938 void
21939 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21940 {
21941 enum machine_mode imode = GET_MODE (src);
21942 rtx tmp;
21943
21944 if (TARGET_SSE4_1)
21945 {
21946 rtx (*unpack)(rtx, rtx);
21947 rtx (*extract)(rtx, rtx) = NULL;
21948 enum machine_mode halfmode = BLKmode;
21949
21950 switch (imode)
21951 {
21952 case V64QImode:
21953 if (unsigned_p)
21954 unpack = gen_avx512bw_zero_extendv32qiv32hi2;
21955 else
21956 unpack = gen_avx512bw_sign_extendv32qiv32hi2;
21957 halfmode = V32QImode;
21958 extract
21959 = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
21960 break;
21961 case V32QImode:
21962 if (unsigned_p)
21963 unpack = gen_avx2_zero_extendv16qiv16hi2;
21964 else
21965 unpack = gen_avx2_sign_extendv16qiv16hi2;
21966 halfmode = V16QImode;
21967 extract
21968 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21969 break;
21970 case V32HImode:
21971 if (unsigned_p)
21972 unpack = gen_avx512f_zero_extendv16hiv16si2;
21973 else
21974 unpack = gen_avx512f_sign_extendv16hiv16si2;
21975 halfmode = V16HImode;
21976 extract
21977 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21978 break;
21979 case V16HImode:
21980 if (unsigned_p)
21981 unpack = gen_avx2_zero_extendv8hiv8si2;
21982 else
21983 unpack = gen_avx2_sign_extendv8hiv8si2;
21984 halfmode = V8HImode;
21985 extract
21986 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21987 break;
21988 case V16SImode:
21989 if (unsigned_p)
21990 unpack = gen_avx512f_zero_extendv8siv8di2;
21991 else
21992 unpack = gen_avx512f_sign_extendv8siv8di2;
21993 halfmode = V8SImode;
21994 extract
21995 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21996 break;
21997 case V8SImode:
21998 if (unsigned_p)
21999 unpack = gen_avx2_zero_extendv4siv4di2;
22000 else
22001 unpack = gen_avx2_sign_extendv4siv4di2;
22002 halfmode = V4SImode;
22003 extract
22004 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22005 break;
22006 case V16QImode:
22007 if (unsigned_p)
22008 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22009 else
22010 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22011 break;
22012 case V8HImode:
22013 if (unsigned_p)
22014 unpack = gen_sse4_1_zero_extendv4hiv4si2;
22015 else
22016 unpack = gen_sse4_1_sign_extendv4hiv4si2;
22017 break;
22018 case V4SImode:
22019 if (unsigned_p)
22020 unpack = gen_sse4_1_zero_extendv2siv2di2;
22021 else
22022 unpack = gen_sse4_1_sign_extendv2siv2di2;
22023 break;
22024 default:
22025 gcc_unreachable ();
22026 }
22027
22028 if (GET_MODE_SIZE (imode) >= 32)
22029 {
22030 tmp = gen_reg_rtx (halfmode);
22031 emit_insn (extract (tmp, src));
22032 }
22033 else if (high_p)
22034 {
22035 /* Shift higher 8 bytes to lower 8 bytes. */
22036 tmp = gen_reg_rtx (V1TImode);
22037 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22038 GEN_INT (64)));
22039 tmp = gen_lowpart (imode, tmp);
22040 }
22041 else
22042 tmp = src;
22043
22044 emit_insn (unpack (dest, tmp));
22045 }
22046 else
22047 {
22048 rtx (*unpack)(rtx, rtx, rtx);
22049
22050 switch (imode)
22051 {
22052 case V16QImode:
22053 if (high_p)
22054 unpack = gen_vec_interleave_highv16qi;
22055 else
22056 unpack = gen_vec_interleave_lowv16qi;
22057 break;
22058 case V8HImode:
22059 if (high_p)
22060 unpack = gen_vec_interleave_highv8hi;
22061 else
22062 unpack = gen_vec_interleave_lowv8hi;
22063 break;
22064 case V4SImode:
22065 if (high_p)
22066 unpack = gen_vec_interleave_highv4si;
22067 else
22068 unpack = gen_vec_interleave_lowv4si;
22069 break;
22070 default:
22071 gcc_unreachable ();
22072 }
22073
22074 if (unsigned_p)
22075 tmp = force_reg (imode, CONST0_RTX (imode));
22076 else
22077 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22078 src, pc_rtx, pc_rtx);
22079
22080 rtx tmp2 = gen_reg_rtx (imode);
22081 emit_insn (unpack (tmp2, src, tmp));
22082 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22083 }
22084 }
22085
22086 /* Expand conditional increment or decrement using adb/sbb instructions.
22087 The default case using setcc followed by the conditional move can be
22088 done by generic code. */
22089 bool
22090 ix86_expand_int_addcc (rtx operands[])
22091 {
22092 enum rtx_code code = GET_CODE (operands[1]);
22093 rtx flags;
22094 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22095 rtx compare_op;
22096 rtx val = const0_rtx;
22097 bool fpcmp = false;
22098 enum machine_mode mode;
22099 rtx op0 = XEXP (operands[1], 0);
22100 rtx op1 = XEXP (operands[1], 1);
22101
22102 if (operands[3] != const1_rtx
22103 && operands[3] != constm1_rtx)
22104 return false;
22105 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22106 return false;
22107 code = GET_CODE (compare_op);
22108
22109 flags = XEXP (compare_op, 0);
22110
22111 if (GET_MODE (flags) == CCFPmode
22112 || GET_MODE (flags) == CCFPUmode)
22113 {
22114 fpcmp = true;
22115 code = ix86_fp_compare_code_to_integer (code);
22116 }
22117
22118 if (code != LTU)
22119 {
22120 val = constm1_rtx;
22121 if (fpcmp)
22122 PUT_CODE (compare_op,
22123 reverse_condition_maybe_unordered
22124 (GET_CODE (compare_op)));
22125 else
22126 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22127 }
22128
22129 mode = GET_MODE (operands[0]);
22130
22131 /* Construct either adc or sbb insn. */
22132 if ((code == LTU) == (operands[3] == constm1_rtx))
22133 {
22134 switch (mode)
22135 {
22136 case QImode:
22137 insn = gen_subqi3_carry;
22138 break;
22139 case HImode:
22140 insn = gen_subhi3_carry;
22141 break;
22142 case SImode:
22143 insn = gen_subsi3_carry;
22144 break;
22145 case DImode:
22146 insn = gen_subdi3_carry;
22147 break;
22148 default:
22149 gcc_unreachable ();
22150 }
22151 }
22152 else
22153 {
22154 switch (mode)
22155 {
22156 case QImode:
22157 insn = gen_addqi3_carry;
22158 break;
22159 case HImode:
22160 insn = gen_addhi3_carry;
22161 break;
22162 case SImode:
22163 insn = gen_addsi3_carry;
22164 break;
22165 case DImode:
22166 insn = gen_adddi3_carry;
22167 break;
22168 default:
22169 gcc_unreachable ();
22170 }
22171 }
22172 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22173
22174 return true;
22175 }
22176
22177
22178 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22179 but works for floating pointer parameters and nonoffsetable memories.
22180 For pushes, it returns just stack offsets; the values will be saved
22181 in the right order. Maximally three parts are generated. */
22182
22183 static int
22184 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22185 {
22186 int size;
22187
22188 if (!TARGET_64BIT)
22189 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22190 else
22191 size = (GET_MODE_SIZE (mode) + 4) / 8;
22192
22193 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22194 gcc_assert (size >= 2 && size <= 4);
22195
22196 /* Optimize constant pool reference to immediates. This is used by fp
22197 moves, that force all constants to memory to allow combining. */
22198 if (MEM_P (operand) && MEM_READONLY_P (operand))
22199 {
22200 rtx tmp = maybe_get_pool_constant (operand);
22201 if (tmp)
22202 operand = tmp;
22203 }
22204
22205 if (MEM_P (operand) && !offsettable_memref_p (operand))
22206 {
22207 /* The only non-offsetable memories we handle are pushes. */
22208 int ok = push_operand (operand, VOIDmode);
22209
22210 gcc_assert (ok);
22211
22212 operand = copy_rtx (operand);
22213 PUT_MODE (operand, word_mode);
22214 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22215 return size;
22216 }
22217
22218 if (GET_CODE (operand) == CONST_VECTOR)
22219 {
22220 enum machine_mode imode = int_mode_for_mode (mode);
22221 /* Caution: if we looked through a constant pool memory above,
22222 the operand may actually have a different mode now. That's
22223 ok, since we want to pun this all the way back to an integer. */
22224 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22225 gcc_assert (operand != NULL);
22226 mode = imode;
22227 }
22228
22229 if (!TARGET_64BIT)
22230 {
22231 if (mode == DImode)
22232 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22233 else
22234 {
22235 int i;
22236
22237 if (REG_P (operand))
22238 {
22239 gcc_assert (reload_completed);
22240 for (i = 0; i < size; i++)
22241 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22242 }
22243 else if (offsettable_memref_p (operand))
22244 {
22245 operand = adjust_address (operand, SImode, 0);
22246 parts[0] = operand;
22247 for (i = 1; i < size; i++)
22248 parts[i] = adjust_address (operand, SImode, 4 * i);
22249 }
22250 else if (GET_CODE (operand) == CONST_DOUBLE)
22251 {
22252 REAL_VALUE_TYPE r;
22253 long l[4];
22254
22255 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22256 switch (mode)
22257 {
22258 case TFmode:
22259 real_to_target (l, &r, mode);
22260 parts[3] = gen_int_mode (l[3], SImode);
22261 parts[2] = gen_int_mode (l[2], SImode);
22262 break;
22263 case XFmode:
22264 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22265 long double may not be 80-bit. */
22266 real_to_target (l, &r, mode);
22267 parts[2] = gen_int_mode (l[2], SImode);
22268 break;
22269 case DFmode:
22270 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22271 break;
22272 default:
22273 gcc_unreachable ();
22274 }
22275 parts[1] = gen_int_mode (l[1], SImode);
22276 parts[0] = gen_int_mode (l[0], SImode);
22277 }
22278 else
22279 gcc_unreachable ();
22280 }
22281 }
22282 else
22283 {
22284 if (mode == TImode)
22285 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22286 if (mode == XFmode || mode == TFmode)
22287 {
22288 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22289 if (REG_P (operand))
22290 {
22291 gcc_assert (reload_completed);
22292 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22293 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22294 }
22295 else if (offsettable_memref_p (operand))
22296 {
22297 operand = adjust_address (operand, DImode, 0);
22298 parts[0] = operand;
22299 parts[1] = adjust_address (operand, upper_mode, 8);
22300 }
22301 else if (GET_CODE (operand) == CONST_DOUBLE)
22302 {
22303 REAL_VALUE_TYPE r;
22304 long l[4];
22305
22306 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22307 real_to_target (l, &r, mode);
22308
22309 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22310 if (HOST_BITS_PER_WIDE_INT >= 64)
22311 parts[0]
22312 = gen_int_mode
22313 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22314 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22315 DImode);
22316 else
22317 parts[0] = immed_double_const (l[0], l[1], DImode);
22318
22319 if (upper_mode == SImode)
22320 parts[1] = gen_int_mode (l[2], SImode);
22321 else if (HOST_BITS_PER_WIDE_INT >= 64)
22322 parts[1]
22323 = gen_int_mode
22324 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22325 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22326 DImode);
22327 else
22328 parts[1] = immed_double_const (l[2], l[3], DImode);
22329 }
22330 else
22331 gcc_unreachable ();
22332 }
22333 }
22334
22335 return size;
22336 }
22337
22338 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22339 Return false when normal moves are needed; true when all required
22340 insns have been emitted. Operands 2-4 contain the input values
22341 int the correct order; operands 5-7 contain the output values. */
22342
22343 void
22344 ix86_split_long_move (rtx operands[])
22345 {
22346 rtx part[2][4];
22347 int nparts, i, j;
22348 int push = 0;
22349 int collisions = 0;
22350 enum machine_mode mode = GET_MODE (operands[0]);
22351 bool collisionparts[4];
22352
22353 /* The DFmode expanders may ask us to move double.
22354 For 64bit target this is single move. By hiding the fact
22355 here we simplify i386.md splitters. */
22356 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22357 {
22358 /* Optimize constant pool reference to immediates. This is used by
22359 fp moves, that force all constants to memory to allow combining. */
22360
22361 if (MEM_P (operands[1])
22362 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22363 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22364 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22365 if (push_operand (operands[0], VOIDmode))
22366 {
22367 operands[0] = copy_rtx (operands[0]);
22368 PUT_MODE (operands[0], word_mode);
22369 }
22370 else
22371 operands[0] = gen_lowpart (DImode, operands[0]);
22372 operands[1] = gen_lowpart (DImode, operands[1]);
22373 emit_move_insn (operands[0], operands[1]);
22374 return;
22375 }
22376
22377 /* The only non-offsettable memory we handle is push. */
22378 if (push_operand (operands[0], VOIDmode))
22379 push = 1;
22380 else
22381 gcc_assert (!MEM_P (operands[0])
22382 || offsettable_memref_p (operands[0]));
22383
22384 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22385 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22386
22387 /* When emitting push, take care for source operands on the stack. */
22388 if (push && MEM_P (operands[1])
22389 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22390 {
22391 rtx src_base = XEXP (part[1][nparts - 1], 0);
22392
22393 /* Compensate for the stack decrement by 4. */
22394 if (!TARGET_64BIT && nparts == 3
22395 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22396 src_base = plus_constant (Pmode, src_base, 4);
22397
22398 /* src_base refers to the stack pointer and is
22399 automatically decreased by emitted push. */
22400 for (i = 0; i < nparts; i++)
22401 part[1][i] = change_address (part[1][i],
22402 GET_MODE (part[1][i]), src_base);
22403 }
22404
22405 /* We need to do copy in the right order in case an address register
22406 of the source overlaps the destination. */
22407 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22408 {
22409 rtx tmp;
22410
22411 for (i = 0; i < nparts; i++)
22412 {
22413 collisionparts[i]
22414 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22415 if (collisionparts[i])
22416 collisions++;
22417 }
22418
22419 /* Collision in the middle part can be handled by reordering. */
22420 if (collisions == 1 && nparts == 3 && collisionparts [1])
22421 {
22422 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22423 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22424 }
22425 else if (collisions == 1
22426 && nparts == 4
22427 && (collisionparts [1] || collisionparts [2]))
22428 {
22429 if (collisionparts [1])
22430 {
22431 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22432 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22433 }
22434 else
22435 {
22436 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22437 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22438 }
22439 }
22440
22441 /* If there are more collisions, we can't handle it by reordering.
22442 Do an lea to the last part and use only one colliding move. */
22443 else if (collisions > 1)
22444 {
22445 rtx base;
22446
22447 collisions = 1;
22448
22449 base = part[0][nparts - 1];
22450
22451 /* Handle the case when the last part isn't valid for lea.
22452 Happens in 64-bit mode storing the 12-byte XFmode. */
22453 if (GET_MODE (base) != Pmode)
22454 base = gen_rtx_REG (Pmode, REGNO (base));
22455
22456 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22457 part[1][0] = replace_equiv_address (part[1][0], base);
22458 for (i = 1; i < nparts; i++)
22459 {
22460 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22461 part[1][i] = replace_equiv_address (part[1][i], tmp);
22462 }
22463 }
22464 }
22465
22466 if (push)
22467 {
22468 if (!TARGET_64BIT)
22469 {
22470 if (nparts == 3)
22471 {
22472 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22473 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22474 stack_pointer_rtx, GEN_INT (-4)));
22475 emit_move_insn (part[0][2], part[1][2]);
22476 }
22477 else if (nparts == 4)
22478 {
22479 emit_move_insn (part[0][3], part[1][3]);
22480 emit_move_insn (part[0][2], part[1][2]);
22481 }
22482 }
22483 else
22484 {
22485 /* In 64bit mode we don't have 32bit push available. In case this is
22486 register, it is OK - we will just use larger counterpart. We also
22487 retype memory - these comes from attempt to avoid REX prefix on
22488 moving of second half of TFmode value. */
22489 if (GET_MODE (part[1][1]) == SImode)
22490 {
22491 switch (GET_CODE (part[1][1]))
22492 {
22493 case MEM:
22494 part[1][1] = adjust_address (part[1][1], DImode, 0);
22495 break;
22496
22497 case REG:
22498 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22499 break;
22500
22501 default:
22502 gcc_unreachable ();
22503 }
22504
22505 if (GET_MODE (part[1][0]) == SImode)
22506 part[1][0] = part[1][1];
22507 }
22508 }
22509 emit_move_insn (part[0][1], part[1][1]);
22510 emit_move_insn (part[0][0], part[1][0]);
22511 return;
22512 }
22513
22514 /* Choose correct order to not overwrite the source before it is copied. */
22515 if ((REG_P (part[0][0])
22516 && REG_P (part[1][1])
22517 && (REGNO (part[0][0]) == REGNO (part[1][1])
22518 || (nparts == 3
22519 && REGNO (part[0][0]) == REGNO (part[1][2]))
22520 || (nparts == 4
22521 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22522 || (collisions > 0
22523 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22524 {
22525 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22526 {
22527 operands[2 + i] = part[0][j];
22528 operands[6 + i] = part[1][j];
22529 }
22530 }
22531 else
22532 {
22533 for (i = 0; i < nparts; i++)
22534 {
22535 operands[2 + i] = part[0][i];
22536 operands[6 + i] = part[1][i];
22537 }
22538 }
22539
22540 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22541 if (optimize_insn_for_size_p ())
22542 {
22543 for (j = 0; j < nparts - 1; j++)
22544 if (CONST_INT_P (operands[6 + j])
22545 && operands[6 + j] != const0_rtx
22546 && REG_P (operands[2 + j]))
22547 for (i = j; i < nparts - 1; i++)
22548 if (CONST_INT_P (operands[7 + i])
22549 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22550 operands[7 + i] = operands[2 + j];
22551 }
22552
22553 for (i = 0; i < nparts; i++)
22554 emit_move_insn (operands[2 + i], operands[6 + i]);
22555
22556 return;
22557 }
22558
22559 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22560 left shift by a constant, either using a single shift or
22561 a sequence of add instructions. */
22562
22563 static void
22564 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22565 {
22566 rtx (*insn)(rtx, rtx, rtx);
22567
22568 if (count == 1
22569 || (count * ix86_cost->add <= ix86_cost->shift_const
22570 && !optimize_insn_for_size_p ()))
22571 {
22572 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22573 while (count-- > 0)
22574 emit_insn (insn (operand, operand, operand));
22575 }
22576 else
22577 {
22578 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22579 emit_insn (insn (operand, operand, GEN_INT (count)));
22580 }
22581 }
22582
22583 void
22584 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22585 {
22586 rtx (*gen_ashl3)(rtx, rtx, rtx);
22587 rtx (*gen_shld)(rtx, rtx, rtx);
22588 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22589
22590 rtx low[2], high[2];
22591 int count;
22592
22593 if (CONST_INT_P (operands[2]))
22594 {
22595 split_double_mode (mode, operands, 2, low, high);
22596 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22597
22598 if (count >= half_width)
22599 {
22600 emit_move_insn (high[0], low[1]);
22601 emit_move_insn (low[0], const0_rtx);
22602
22603 if (count > half_width)
22604 ix86_expand_ashl_const (high[0], count - half_width, mode);
22605 }
22606 else
22607 {
22608 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22609
22610 if (!rtx_equal_p (operands[0], operands[1]))
22611 emit_move_insn (operands[0], operands[1]);
22612
22613 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22614 ix86_expand_ashl_const (low[0], count, mode);
22615 }
22616 return;
22617 }
22618
22619 split_double_mode (mode, operands, 1, low, high);
22620
22621 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22622
22623 if (operands[1] == const1_rtx)
22624 {
22625 /* Assuming we've chosen a QImode capable registers, then 1 << N
22626 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22627 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22628 {
22629 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22630
22631 ix86_expand_clear (low[0]);
22632 ix86_expand_clear (high[0]);
22633 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22634
22635 d = gen_lowpart (QImode, low[0]);
22636 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22637 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22638 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22639
22640 d = gen_lowpart (QImode, high[0]);
22641 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22642 s = gen_rtx_NE (QImode, flags, const0_rtx);
22643 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22644 }
22645
22646 /* Otherwise, we can get the same results by manually performing
22647 a bit extract operation on bit 5/6, and then performing the two
22648 shifts. The two methods of getting 0/1 into low/high are exactly
22649 the same size. Avoiding the shift in the bit extract case helps
22650 pentium4 a bit; no one else seems to care much either way. */
22651 else
22652 {
22653 enum machine_mode half_mode;
22654 rtx (*gen_lshr3)(rtx, rtx, rtx);
22655 rtx (*gen_and3)(rtx, rtx, rtx);
22656 rtx (*gen_xor3)(rtx, rtx, rtx);
22657 HOST_WIDE_INT bits;
22658 rtx x;
22659
22660 if (mode == DImode)
22661 {
22662 half_mode = SImode;
22663 gen_lshr3 = gen_lshrsi3;
22664 gen_and3 = gen_andsi3;
22665 gen_xor3 = gen_xorsi3;
22666 bits = 5;
22667 }
22668 else
22669 {
22670 half_mode = DImode;
22671 gen_lshr3 = gen_lshrdi3;
22672 gen_and3 = gen_anddi3;
22673 gen_xor3 = gen_xordi3;
22674 bits = 6;
22675 }
22676
22677 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22678 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22679 else
22680 x = gen_lowpart (half_mode, operands[2]);
22681 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22682
22683 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22684 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22685 emit_move_insn (low[0], high[0]);
22686 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22687 }
22688
22689 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22690 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22691 return;
22692 }
22693
22694 if (operands[1] == constm1_rtx)
22695 {
22696 /* For -1 << N, we can avoid the shld instruction, because we
22697 know that we're shifting 0...31/63 ones into a -1. */
22698 emit_move_insn (low[0], constm1_rtx);
22699 if (optimize_insn_for_size_p ())
22700 emit_move_insn (high[0], low[0]);
22701 else
22702 emit_move_insn (high[0], constm1_rtx);
22703 }
22704 else
22705 {
22706 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22707
22708 if (!rtx_equal_p (operands[0], operands[1]))
22709 emit_move_insn (operands[0], operands[1]);
22710
22711 split_double_mode (mode, operands, 1, low, high);
22712 emit_insn (gen_shld (high[0], low[0], operands[2]));
22713 }
22714
22715 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22716
22717 if (TARGET_CMOVE && scratch)
22718 {
22719 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22720 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22721
22722 ix86_expand_clear (scratch);
22723 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22724 }
22725 else
22726 {
22727 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22728 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22729
22730 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22731 }
22732 }
22733
22734 void
22735 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22736 {
22737 rtx (*gen_ashr3)(rtx, rtx, rtx)
22738 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22739 rtx (*gen_shrd)(rtx, rtx, rtx);
22740 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22741
22742 rtx low[2], high[2];
22743 int count;
22744
22745 if (CONST_INT_P (operands[2]))
22746 {
22747 split_double_mode (mode, operands, 2, low, high);
22748 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22749
22750 if (count == GET_MODE_BITSIZE (mode) - 1)
22751 {
22752 emit_move_insn (high[0], high[1]);
22753 emit_insn (gen_ashr3 (high[0], high[0],
22754 GEN_INT (half_width - 1)));
22755 emit_move_insn (low[0], high[0]);
22756
22757 }
22758 else if (count >= half_width)
22759 {
22760 emit_move_insn (low[0], high[1]);
22761 emit_move_insn (high[0], low[0]);
22762 emit_insn (gen_ashr3 (high[0], high[0],
22763 GEN_INT (half_width - 1)));
22764
22765 if (count > half_width)
22766 emit_insn (gen_ashr3 (low[0], low[0],
22767 GEN_INT (count - half_width)));
22768 }
22769 else
22770 {
22771 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22772
22773 if (!rtx_equal_p (operands[0], operands[1]))
22774 emit_move_insn (operands[0], operands[1]);
22775
22776 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22777 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22778 }
22779 }
22780 else
22781 {
22782 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22783
22784 if (!rtx_equal_p (operands[0], operands[1]))
22785 emit_move_insn (operands[0], operands[1]);
22786
22787 split_double_mode (mode, operands, 1, low, high);
22788
22789 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22790 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22791
22792 if (TARGET_CMOVE && scratch)
22793 {
22794 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22795 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22796
22797 emit_move_insn (scratch, high[0]);
22798 emit_insn (gen_ashr3 (scratch, scratch,
22799 GEN_INT (half_width - 1)));
22800 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22801 scratch));
22802 }
22803 else
22804 {
22805 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22806 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22807
22808 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22809 }
22810 }
22811 }
22812
22813 void
22814 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22815 {
22816 rtx (*gen_lshr3)(rtx, rtx, rtx)
22817 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22818 rtx (*gen_shrd)(rtx, rtx, rtx);
22819 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22820
22821 rtx low[2], high[2];
22822 int count;
22823
22824 if (CONST_INT_P (operands[2]))
22825 {
22826 split_double_mode (mode, operands, 2, low, high);
22827 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22828
22829 if (count >= half_width)
22830 {
22831 emit_move_insn (low[0], high[1]);
22832 ix86_expand_clear (high[0]);
22833
22834 if (count > half_width)
22835 emit_insn (gen_lshr3 (low[0], low[0],
22836 GEN_INT (count - half_width)));
22837 }
22838 else
22839 {
22840 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22841
22842 if (!rtx_equal_p (operands[0], operands[1]))
22843 emit_move_insn (operands[0], operands[1]);
22844
22845 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22846 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22847 }
22848 }
22849 else
22850 {
22851 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22852
22853 if (!rtx_equal_p (operands[0], operands[1]))
22854 emit_move_insn (operands[0], operands[1]);
22855
22856 split_double_mode (mode, operands, 1, low, high);
22857
22858 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22859 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22860
22861 if (TARGET_CMOVE && scratch)
22862 {
22863 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22864 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22865
22866 ix86_expand_clear (scratch);
22867 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22868 scratch));
22869 }
22870 else
22871 {
22872 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22873 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22874
22875 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22876 }
22877 }
22878 }
22879
22880 /* Predict just emitted jump instruction to be taken with probability PROB. */
22881 static void
22882 predict_jump (int prob)
22883 {
22884 rtx insn = get_last_insn ();
22885 gcc_assert (JUMP_P (insn));
22886 add_int_reg_note (insn, REG_BR_PROB, prob);
22887 }
22888
22889 /* Helper function for the string operations below. Dest VARIABLE whether
22890 it is aligned to VALUE bytes. If true, jump to the label. */
22891 static rtx_code_label *
22892 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22893 {
22894 rtx_code_label *label = gen_label_rtx ();
22895 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22896 if (GET_MODE (variable) == DImode)
22897 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22898 else
22899 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22900 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22901 1, label);
22902 if (epilogue)
22903 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22904 else
22905 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22906 return label;
22907 }
22908
22909 /* Adjust COUNTER by the VALUE. */
22910 static void
22911 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22912 {
22913 rtx (*gen_add)(rtx, rtx, rtx)
22914 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22915
22916 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22917 }
22918
22919 /* Zero extend possibly SImode EXP to Pmode register. */
22920 rtx
22921 ix86_zero_extend_to_Pmode (rtx exp)
22922 {
22923 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22924 }
22925
22926 /* Divide COUNTREG by SCALE. */
22927 static rtx
22928 scale_counter (rtx countreg, int scale)
22929 {
22930 rtx sc;
22931
22932 if (scale == 1)
22933 return countreg;
22934 if (CONST_INT_P (countreg))
22935 return GEN_INT (INTVAL (countreg) / scale);
22936 gcc_assert (REG_P (countreg));
22937
22938 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22939 GEN_INT (exact_log2 (scale)),
22940 NULL, 1, OPTAB_DIRECT);
22941 return sc;
22942 }
22943
22944 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22945 DImode for constant loop counts. */
22946
22947 static enum machine_mode
22948 counter_mode (rtx count_exp)
22949 {
22950 if (GET_MODE (count_exp) != VOIDmode)
22951 return GET_MODE (count_exp);
22952 if (!CONST_INT_P (count_exp))
22953 return Pmode;
22954 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22955 return DImode;
22956 return SImode;
22957 }
22958
22959 /* Copy the address to a Pmode register. This is used for x32 to
22960 truncate DImode TLS address to a SImode register. */
22961
22962 static rtx
22963 ix86_copy_addr_to_reg (rtx addr)
22964 {
22965 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22966 return copy_addr_to_reg (addr);
22967 else
22968 {
22969 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22970 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22971 }
22972 }
22973
22974 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22975 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22976 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22977 memory by VALUE (supposed to be in MODE).
22978
22979 The size is rounded down to whole number of chunk size moved at once.
22980 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22981
22982
22983 static void
22984 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22985 rtx destptr, rtx srcptr, rtx value,
22986 rtx count, enum machine_mode mode, int unroll,
22987 int expected_size, bool issetmem)
22988 {
22989 rtx_code_label *out_label, *top_label;
22990 rtx iter, tmp;
22991 enum machine_mode iter_mode = counter_mode (count);
22992 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22993 rtx piece_size = GEN_INT (piece_size_n);
22994 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22995 rtx size;
22996 int i;
22997
22998 top_label = gen_label_rtx ();
22999 out_label = gen_label_rtx ();
23000 iter = gen_reg_rtx (iter_mode);
23001
23002 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23003 NULL, 1, OPTAB_DIRECT);
23004 /* Those two should combine. */
23005 if (piece_size == const1_rtx)
23006 {
23007 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23008 true, out_label);
23009 predict_jump (REG_BR_PROB_BASE * 10 / 100);
23010 }
23011 emit_move_insn (iter, const0_rtx);
23012
23013 emit_label (top_label);
23014
23015 tmp = convert_modes (Pmode, iter_mode, iter, true);
23016
23017 /* This assert could be relaxed - in this case we'll need to compute
23018 smallest power of two, containing in PIECE_SIZE_N and pass it to
23019 offset_address. */
23020 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23021 destmem = offset_address (destmem, tmp, piece_size_n);
23022 destmem = adjust_address (destmem, mode, 0);
23023
23024 if (!issetmem)
23025 {
23026 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23027 srcmem = adjust_address (srcmem, mode, 0);
23028
23029 /* When unrolling for chips that reorder memory reads and writes,
23030 we can save registers by using single temporary.
23031 Also using 4 temporaries is overkill in 32bit mode. */
23032 if (!TARGET_64BIT && 0)
23033 {
23034 for (i = 0; i < unroll; i++)
23035 {
23036 if (i)
23037 {
23038 destmem =
23039 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23040 srcmem =
23041 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23042 }
23043 emit_move_insn (destmem, srcmem);
23044 }
23045 }
23046 else
23047 {
23048 rtx tmpreg[4];
23049 gcc_assert (unroll <= 4);
23050 for (i = 0; i < unroll; i++)
23051 {
23052 tmpreg[i] = gen_reg_rtx (mode);
23053 if (i)
23054 {
23055 srcmem =
23056 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23057 }
23058 emit_move_insn (tmpreg[i], srcmem);
23059 }
23060 for (i = 0; i < unroll; i++)
23061 {
23062 if (i)
23063 {
23064 destmem =
23065 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23066 }
23067 emit_move_insn (destmem, tmpreg[i]);
23068 }
23069 }
23070 }
23071 else
23072 for (i = 0; i < unroll; i++)
23073 {
23074 if (i)
23075 destmem =
23076 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23077 emit_move_insn (destmem, value);
23078 }
23079
23080 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23081 true, OPTAB_LIB_WIDEN);
23082 if (tmp != iter)
23083 emit_move_insn (iter, tmp);
23084
23085 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23086 true, top_label);
23087 if (expected_size != -1)
23088 {
23089 expected_size /= GET_MODE_SIZE (mode) * unroll;
23090 if (expected_size == 0)
23091 predict_jump (0);
23092 else if (expected_size > REG_BR_PROB_BASE)
23093 predict_jump (REG_BR_PROB_BASE - 1);
23094 else
23095 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23096 }
23097 else
23098 predict_jump (REG_BR_PROB_BASE * 80 / 100);
23099 iter = ix86_zero_extend_to_Pmode (iter);
23100 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23101 true, OPTAB_LIB_WIDEN);
23102 if (tmp != destptr)
23103 emit_move_insn (destptr, tmp);
23104 if (!issetmem)
23105 {
23106 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23107 true, OPTAB_LIB_WIDEN);
23108 if (tmp != srcptr)
23109 emit_move_insn (srcptr, tmp);
23110 }
23111 emit_label (out_label);
23112 }
23113
23114 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23115 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23116 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23117 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23118 ORIG_VALUE is the original value passed to memset to fill the memory with.
23119 Other arguments have same meaning as for previous function. */
23120
23121 static void
23122 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23123 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23124 rtx count,
23125 enum machine_mode mode, bool issetmem)
23126 {
23127 rtx destexp;
23128 rtx srcexp;
23129 rtx countreg;
23130 HOST_WIDE_INT rounded_count;
23131
23132 /* If possible, it is shorter to use rep movs.
23133 TODO: Maybe it is better to move this logic to decide_alg. */
23134 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23135 && (!issetmem || orig_value == const0_rtx))
23136 mode = SImode;
23137
23138 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23139 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23140
23141 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23142 GET_MODE_SIZE (mode)));
23143 if (mode != QImode)
23144 {
23145 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23146 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23147 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23148 }
23149 else
23150 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23151 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23152 {
23153 rounded_count = (INTVAL (count)
23154 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23155 destmem = shallow_copy_rtx (destmem);
23156 set_mem_size (destmem, rounded_count);
23157 }
23158 else if (MEM_SIZE_KNOWN_P (destmem))
23159 clear_mem_size (destmem);
23160
23161 if (issetmem)
23162 {
23163 value = force_reg (mode, gen_lowpart (mode, value));
23164 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23165 }
23166 else
23167 {
23168 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23169 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23170 if (mode != QImode)
23171 {
23172 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23173 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23174 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23175 }
23176 else
23177 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23178 if (CONST_INT_P (count))
23179 {
23180 rounded_count = (INTVAL (count)
23181 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23182 srcmem = shallow_copy_rtx (srcmem);
23183 set_mem_size (srcmem, rounded_count);
23184 }
23185 else
23186 {
23187 if (MEM_SIZE_KNOWN_P (srcmem))
23188 clear_mem_size (srcmem);
23189 }
23190 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23191 destexp, srcexp));
23192 }
23193 }
23194
23195 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23196 DESTMEM.
23197 SRC is passed by pointer to be updated on return.
23198 Return value is updated DST. */
23199 static rtx
23200 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23201 HOST_WIDE_INT size_to_move)
23202 {
23203 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23204 enum insn_code code;
23205 enum machine_mode move_mode;
23206 int piece_size, i;
23207
23208 /* Find the widest mode in which we could perform moves.
23209 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23210 it until move of such size is supported. */
23211 piece_size = 1 << floor_log2 (size_to_move);
23212 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23213 code = optab_handler (mov_optab, move_mode);
23214 while (code == CODE_FOR_nothing && piece_size > 1)
23215 {
23216 piece_size >>= 1;
23217 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23218 code = optab_handler (mov_optab, move_mode);
23219 }
23220
23221 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23222 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23223 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23224 {
23225 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23226 move_mode = mode_for_vector (word_mode, nunits);
23227 code = optab_handler (mov_optab, move_mode);
23228 if (code == CODE_FOR_nothing)
23229 {
23230 move_mode = word_mode;
23231 piece_size = GET_MODE_SIZE (move_mode);
23232 code = optab_handler (mov_optab, move_mode);
23233 }
23234 }
23235 gcc_assert (code != CODE_FOR_nothing);
23236
23237 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23238 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23239
23240 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23241 gcc_assert (size_to_move % piece_size == 0);
23242 adjust = GEN_INT (piece_size);
23243 for (i = 0; i < size_to_move; i += piece_size)
23244 {
23245 /* We move from memory to memory, so we'll need to do it via
23246 a temporary register. */
23247 tempreg = gen_reg_rtx (move_mode);
23248 emit_insn (GEN_FCN (code) (tempreg, src));
23249 emit_insn (GEN_FCN (code) (dst, tempreg));
23250
23251 emit_move_insn (destptr,
23252 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23253 emit_move_insn (srcptr,
23254 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23255
23256 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23257 piece_size);
23258 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23259 piece_size);
23260 }
23261
23262 /* Update DST and SRC rtx. */
23263 *srcmem = src;
23264 return dst;
23265 }
23266
23267 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23268 static void
23269 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23270 rtx destptr, rtx srcptr, rtx count, int max_size)
23271 {
23272 rtx src, dest;
23273 if (CONST_INT_P (count))
23274 {
23275 HOST_WIDE_INT countval = INTVAL (count);
23276 HOST_WIDE_INT epilogue_size = countval % max_size;
23277 int i;
23278
23279 /* For now MAX_SIZE should be a power of 2. This assert could be
23280 relaxed, but it'll require a bit more complicated epilogue
23281 expanding. */
23282 gcc_assert ((max_size & (max_size - 1)) == 0);
23283 for (i = max_size; i >= 1; i >>= 1)
23284 {
23285 if (epilogue_size & i)
23286 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23287 }
23288 return;
23289 }
23290 if (max_size > 8)
23291 {
23292 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23293 count, 1, OPTAB_DIRECT);
23294 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23295 count, QImode, 1, 4, false);
23296 return;
23297 }
23298
23299 /* When there are stringops, we can cheaply increase dest and src pointers.
23300 Otherwise we save code size by maintaining offset (zero is readily
23301 available from preceding rep operation) and using x86 addressing modes.
23302 */
23303 if (TARGET_SINGLE_STRINGOP)
23304 {
23305 if (max_size > 4)
23306 {
23307 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23308 src = change_address (srcmem, SImode, srcptr);
23309 dest = change_address (destmem, SImode, destptr);
23310 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23311 emit_label (label);
23312 LABEL_NUSES (label) = 1;
23313 }
23314 if (max_size > 2)
23315 {
23316 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23317 src = change_address (srcmem, HImode, srcptr);
23318 dest = change_address (destmem, HImode, destptr);
23319 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23320 emit_label (label);
23321 LABEL_NUSES (label) = 1;
23322 }
23323 if (max_size > 1)
23324 {
23325 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23326 src = change_address (srcmem, QImode, srcptr);
23327 dest = change_address (destmem, QImode, destptr);
23328 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23329 emit_label (label);
23330 LABEL_NUSES (label) = 1;
23331 }
23332 }
23333 else
23334 {
23335 rtx offset = force_reg (Pmode, const0_rtx);
23336 rtx tmp;
23337
23338 if (max_size > 4)
23339 {
23340 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23341 src = change_address (srcmem, SImode, srcptr);
23342 dest = change_address (destmem, SImode, destptr);
23343 emit_move_insn (dest, src);
23344 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23345 true, OPTAB_LIB_WIDEN);
23346 if (tmp != offset)
23347 emit_move_insn (offset, tmp);
23348 emit_label (label);
23349 LABEL_NUSES (label) = 1;
23350 }
23351 if (max_size > 2)
23352 {
23353 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23354 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23355 src = change_address (srcmem, HImode, tmp);
23356 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23357 dest = change_address (destmem, HImode, tmp);
23358 emit_move_insn (dest, src);
23359 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23360 true, OPTAB_LIB_WIDEN);
23361 if (tmp != offset)
23362 emit_move_insn (offset, tmp);
23363 emit_label (label);
23364 LABEL_NUSES (label) = 1;
23365 }
23366 if (max_size > 1)
23367 {
23368 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23369 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23370 src = change_address (srcmem, QImode, tmp);
23371 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23372 dest = change_address (destmem, QImode, tmp);
23373 emit_move_insn (dest, src);
23374 emit_label (label);
23375 LABEL_NUSES (label) = 1;
23376 }
23377 }
23378 }
23379
23380 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23381 with value PROMOTED_VAL.
23382 SRC is passed by pointer to be updated on return.
23383 Return value is updated DST. */
23384 static rtx
23385 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23386 HOST_WIDE_INT size_to_move)
23387 {
23388 rtx dst = destmem, adjust;
23389 enum insn_code code;
23390 enum machine_mode move_mode;
23391 int piece_size, i;
23392
23393 /* Find the widest mode in which we could perform moves.
23394 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23395 it until move of such size is supported. */
23396 move_mode = GET_MODE (promoted_val);
23397 if (move_mode == VOIDmode)
23398 move_mode = QImode;
23399 if (size_to_move < GET_MODE_SIZE (move_mode))
23400 {
23401 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23402 promoted_val = gen_lowpart (move_mode, promoted_val);
23403 }
23404 piece_size = GET_MODE_SIZE (move_mode);
23405 code = optab_handler (mov_optab, move_mode);
23406 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23407
23408 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23409
23410 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23411 gcc_assert (size_to_move % piece_size == 0);
23412 adjust = GEN_INT (piece_size);
23413 for (i = 0; i < size_to_move; i += piece_size)
23414 {
23415 if (piece_size <= GET_MODE_SIZE (word_mode))
23416 {
23417 emit_insn (gen_strset (destptr, dst, promoted_val));
23418 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23419 piece_size);
23420 continue;
23421 }
23422
23423 emit_insn (GEN_FCN (code) (dst, promoted_val));
23424
23425 emit_move_insn (destptr,
23426 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23427
23428 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23429 piece_size);
23430 }
23431
23432 /* Update DST rtx. */
23433 return dst;
23434 }
23435 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23436 static void
23437 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23438 rtx count, int max_size)
23439 {
23440 count =
23441 expand_simple_binop (counter_mode (count), AND, count,
23442 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23443 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23444 gen_lowpart (QImode, value), count, QImode,
23445 1, max_size / 2, true);
23446 }
23447
23448 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23449 static void
23450 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23451 rtx count, int max_size)
23452 {
23453 rtx dest;
23454
23455 if (CONST_INT_P (count))
23456 {
23457 HOST_WIDE_INT countval = INTVAL (count);
23458 HOST_WIDE_INT epilogue_size = countval % max_size;
23459 int i;
23460
23461 /* For now MAX_SIZE should be a power of 2. This assert could be
23462 relaxed, but it'll require a bit more complicated epilogue
23463 expanding. */
23464 gcc_assert ((max_size & (max_size - 1)) == 0);
23465 for (i = max_size; i >= 1; i >>= 1)
23466 {
23467 if (epilogue_size & i)
23468 {
23469 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23470 destmem = emit_memset (destmem, destptr, vec_value, i);
23471 else
23472 destmem = emit_memset (destmem, destptr, value, i);
23473 }
23474 }
23475 return;
23476 }
23477 if (max_size > 32)
23478 {
23479 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23480 return;
23481 }
23482 if (max_size > 16)
23483 {
23484 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23485 if (TARGET_64BIT)
23486 {
23487 dest = change_address (destmem, DImode, destptr);
23488 emit_insn (gen_strset (destptr, dest, value));
23489 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23490 emit_insn (gen_strset (destptr, dest, value));
23491 }
23492 else
23493 {
23494 dest = change_address (destmem, SImode, destptr);
23495 emit_insn (gen_strset (destptr, dest, value));
23496 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23497 emit_insn (gen_strset (destptr, dest, value));
23498 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23499 emit_insn (gen_strset (destptr, dest, value));
23500 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23501 emit_insn (gen_strset (destptr, dest, value));
23502 }
23503 emit_label (label);
23504 LABEL_NUSES (label) = 1;
23505 }
23506 if (max_size > 8)
23507 {
23508 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23509 if (TARGET_64BIT)
23510 {
23511 dest = change_address (destmem, DImode, destptr);
23512 emit_insn (gen_strset (destptr, dest, value));
23513 }
23514 else
23515 {
23516 dest = change_address (destmem, SImode, destptr);
23517 emit_insn (gen_strset (destptr, dest, value));
23518 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23519 emit_insn (gen_strset (destptr, dest, value));
23520 }
23521 emit_label (label);
23522 LABEL_NUSES (label) = 1;
23523 }
23524 if (max_size > 4)
23525 {
23526 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23527 dest = change_address (destmem, SImode, destptr);
23528 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23529 emit_label (label);
23530 LABEL_NUSES (label) = 1;
23531 }
23532 if (max_size > 2)
23533 {
23534 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23535 dest = change_address (destmem, HImode, destptr);
23536 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23537 emit_label (label);
23538 LABEL_NUSES (label) = 1;
23539 }
23540 if (max_size > 1)
23541 {
23542 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23543 dest = change_address (destmem, QImode, destptr);
23544 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23545 emit_label (label);
23546 LABEL_NUSES (label) = 1;
23547 }
23548 }
23549
23550 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23551 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23552 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23553 ignored.
23554 Return value is updated DESTMEM. */
23555 static rtx
23556 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23557 rtx destptr, rtx srcptr, rtx value,
23558 rtx vec_value, rtx count, int align,
23559 int desired_alignment, bool issetmem)
23560 {
23561 int i;
23562 for (i = 1; i < desired_alignment; i <<= 1)
23563 {
23564 if (align <= i)
23565 {
23566 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23567 if (issetmem)
23568 {
23569 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23570 destmem = emit_memset (destmem, destptr, vec_value, i);
23571 else
23572 destmem = emit_memset (destmem, destptr, value, i);
23573 }
23574 else
23575 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23576 ix86_adjust_counter (count, i);
23577 emit_label (label);
23578 LABEL_NUSES (label) = 1;
23579 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23580 }
23581 }
23582 return destmem;
23583 }
23584
23585 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23586 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23587 and jump to DONE_LABEL. */
23588 static void
23589 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23590 rtx destptr, rtx srcptr,
23591 rtx value, rtx vec_value,
23592 rtx count, int size,
23593 rtx done_label, bool issetmem)
23594 {
23595 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23596 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23597 rtx modesize;
23598 int n;
23599
23600 /* If we do not have vector value to copy, we must reduce size. */
23601 if (issetmem)
23602 {
23603 if (!vec_value)
23604 {
23605 if (GET_MODE (value) == VOIDmode && size > 8)
23606 mode = Pmode;
23607 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23608 mode = GET_MODE (value);
23609 }
23610 else
23611 mode = GET_MODE (vec_value), value = vec_value;
23612 }
23613 else
23614 {
23615 /* Choose appropriate vector mode. */
23616 if (size >= 32)
23617 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23618 else if (size >= 16)
23619 mode = TARGET_SSE ? V16QImode : DImode;
23620 srcmem = change_address (srcmem, mode, srcptr);
23621 }
23622 destmem = change_address (destmem, mode, destptr);
23623 modesize = GEN_INT (GET_MODE_SIZE (mode));
23624 gcc_assert (GET_MODE_SIZE (mode) <= size);
23625 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23626 {
23627 if (issetmem)
23628 emit_move_insn (destmem, gen_lowpart (mode, value));
23629 else
23630 {
23631 emit_move_insn (destmem, srcmem);
23632 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23633 }
23634 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23635 }
23636
23637 destmem = offset_address (destmem, count, 1);
23638 destmem = offset_address (destmem, GEN_INT (-2 * size),
23639 GET_MODE_SIZE (mode));
23640 if (!issetmem)
23641 {
23642 srcmem = offset_address (srcmem, count, 1);
23643 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23644 GET_MODE_SIZE (mode));
23645 }
23646 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23647 {
23648 if (issetmem)
23649 emit_move_insn (destmem, gen_lowpart (mode, value));
23650 else
23651 {
23652 emit_move_insn (destmem, srcmem);
23653 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23654 }
23655 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23656 }
23657 emit_jump_insn (gen_jump (done_label));
23658 emit_barrier ();
23659
23660 emit_label (label);
23661 LABEL_NUSES (label) = 1;
23662 }
23663
23664 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23665 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23666 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23667 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23668 DONE_LABEL is a label after the whole copying sequence. The label is created
23669 on demand if *DONE_LABEL is NULL.
23670 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23671 bounds after the initial copies.
23672
23673 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23674 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23675 we will dispatch to a library call for large blocks.
23676
23677 In pseudocode we do:
23678
23679 if (COUNT < SIZE)
23680 {
23681 Assume that SIZE is 4. Bigger sizes are handled analogously
23682 if (COUNT & 4)
23683 {
23684 copy 4 bytes from SRCPTR to DESTPTR
23685 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23686 goto done_label
23687 }
23688 if (!COUNT)
23689 goto done_label;
23690 copy 1 byte from SRCPTR to DESTPTR
23691 if (COUNT & 2)
23692 {
23693 copy 2 bytes from SRCPTR to DESTPTR
23694 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23695 }
23696 }
23697 else
23698 {
23699 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23700 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23701
23702 OLD_DESPTR = DESTPTR;
23703 Align DESTPTR up to DESIRED_ALIGN
23704 SRCPTR += DESTPTR - OLD_DESTPTR
23705 COUNT -= DEST_PTR - OLD_DESTPTR
23706 if (DYNAMIC_CHECK)
23707 Round COUNT down to multiple of SIZE
23708 << optional caller supplied zero size guard is here >>
23709 << optional caller suppplied dynamic check is here >>
23710 << caller supplied main copy loop is here >>
23711 }
23712 done_label:
23713 */
23714 static void
23715 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23716 rtx *destptr, rtx *srcptr,
23717 enum machine_mode mode,
23718 rtx value, rtx vec_value,
23719 rtx *count,
23720 rtx_code_label **done_label,
23721 int size,
23722 int desired_align,
23723 int align,
23724 unsigned HOST_WIDE_INT *min_size,
23725 bool dynamic_check,
23726 bool issetmem)
23727 {
23728 rtx_code_label *loop_label = NULL, *label;
23729 int n;
23730 rtx modesize;
23731 int prolog_size = 0;
23732 rtx mode_value;
23733
23734 /* Chose proper value to copy. */
23735 if (issetmem && VECTOR_MODE_P (mode))
23736 mode_value = vec_value;
23737 else
23738 mode_value = value;
23739 gcc_assert (GET_MODE_SIZE (mode) <= size);
23740
23741 /* See if block is big or small, handle small blocks. */
23742 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23743 {
23744 int size2 = size;
23745 loop_label = gen_label_rtx ();
23746
23747 if (!*done_label)
23748 *done_label = gen_label_rtx ();
23749
23750 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23751 1, loop_label);
23752 size2 >>= 1;
23753
23754 /* Handle sizes > 3. */
23755 for (;size2 > 2; size2 >>= 1)
23756 expand_small_movmem_or_setmem (destmem, srcmem,
23757 *destptr, *srcptr,
23758 value, vec_value,
23759 *count,
23760 size2, *done_label, issetmem);
23761 /* Nothing to copy? Jump to DONE_LABEL if so */
23762 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23763 1, *done_label);
23764
23765 /* Do a byte copy. */
23766 destmem = change_address (destmem, QImode, *destptr);
23767 if (issetmem)
23768 emit_move_insn (destmem, gen_lowpart (QImode, value));
23769 else
23770 {
23771 srcmem = change_address (srcmem, QImode, *srcptr);
23772 emit_move_insn (destmem, srcmem);
23773 }
23774
23775 /* Handle sizes 2 and 3. */
23776 label = ix86_expand_aligntest (*count, 2, false);
23777 destmem = change_address (destmem, HImode, *destptr);
23778 destmem = offset_address (destmem, *count, 1);
23779 destmem = offset_address (destmem, GEN_INT (-2), 2);
23780 if (issetmem)
23781 emit_move_insn (destmem, gen_lowpart (HImode, value));
23782 else
23783 {
23784 srcmem = change_address (srcmem, HImode, *srcptr);
23785 srcmem = offset_address (srcmem, *count, 1);
23786 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23787 emit_move_insn (destmem, srcmem);
23788 }
23789
23790 emit_label (label);
23791 LABEL_NUSES (label) = 1;
23792 emit_jump_insn (gen_jump (*done_label));
23793 emit_barrier ();
23794 }
23795 else
23796 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23797 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23798
23799 /* Start memcpy for COUNT >= SIZE. */
23800 if (loop_label)
23801 {
23802 emit_label (loop_label);
23803 LABEL_NUSES (loop_label) = 1;
23804 }
23805
23806 /* Copy first desired_align bytes. */
23807 if (!issetmem)
23808 srcmem = change_address (srcmem, mode, *srcptr);
23809 destmem = change_address (destmem, mode, *destptr);
23810 modesize = GEN_INT (GET_MODE_SIZE (mode));
23811 for (n = 0; prolog_size < desired_align - align; n++)
23812 {
23813 if (issetmem)
23814 emit_move_insn (destmem, mode_value);
23815 else
23816 {
23817 emit_move_insn (destmem, srcmem);
23818 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23819 }
23820 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23821 prolog_size += GET_MODE_SIZE (mode);
23822 }
23823
23824
23825 /* Copy last SIZE bytes. */
23826 destmem = offset_address (destmem, *count, 1);
23827 destmem = offset_address (destmem,
23828 GEN_INT (-size - prolog_size),
23829 1);
23830 if (issetmem)
23831 emit_move_insn (destmem, mode_value);
23832 else
23833 {
23834 srcmem = offset_address (srcmem, *count, 1);
23835 srcmem = offset_address (srcmem,
23836 GEN_INT (-size - prolog_size),
23837 1);
23838 emit_move_insn (destmem, srcmem);
23839 }
23840 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23841 {
23842 destmem = offset_address (destmem, modesize, 1);
23843 if (issetmem)
23844 emit_move_insn (destmem, mode_value);
23845 else
23846 {
23847 srcmem = offset_address (srcmem, modesize, 1);
23848 emit_move_insn (destmem, srcmem);
23849 }
23850 }
23851
23852 /* Align destination. */
23853 if (desired_align > 1 && desired_align > align)
23854 {
23855 rtx saveddest = *destptr;
23856
23857 gcc_assert (desired_align <= size);
23858 /* Align destptr up, place it to new register. */
23859 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23860 GEN_INT (prolog_size),
23861 NULL_RTX, 1, OPTAB_DIRECT);
23862 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23863 GEN_INT (-desired_align),
23864 *destptr, 1, OPTAB_DIRECT);
23865 /* See how many bytes we skipped. */
23866 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23867 *destptr,
23868 saveddest, 1, OPTAB_DIRECT);
23869 /* Adjust srcptr and count. */
23870 if (!issetmem)
23871 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23872 *srcptr, 1, OPTAB_DIRECT);
23873 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23874 saveddest, *count, 1, OPTAB_DIRECT);
23875 /* We copied at most size + prolog_size. */
23876 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23877 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23878 else
23879 *min_size = 0;
23880
23881 /* Our loops always round down the bock size, but for dispatch to library
23882 we need precise value. */
23883 if (dynamic_check)
23884 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23885 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23886 }
23887 else
23888 {
23889 gcc_assert (prolog_size == 0);
23890 /* Decrease count, so we won't end up copying last word twice. */
23891 if (!CONST_INT_P (*count))
23892 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23893 constm1_rtx, *count, 1, OPTAB_DIRECT);
23894 else
23895 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23896 if (*min_size)
23897 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23898 }
23899 }
23900
23901
23902 /* This function is like the previous one, except here we know how many bytes
23903 need to be copied. That allows us to update alignment not only of DST, which
23904 is returned, but also of SRC, which is passed as a pointer for that
23905 reason. */
23906 static rtx
23907 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23908 rtx srcreg, rtx value, rtx vec_value,
23909 int desired_align, int align_bytes,
23910 bool issetmem)
23911 {
23912 rtx src = NULL;
23913 rtx orig_dst = dst;
23914 rtx orig_src = NULL;
23915 int piece_size = 1;
23916 int copied_bytes = 0;
23917
23918 if (!issetmem)
23919 {
23920 gcc_assert (srcp != NULL);
23921 src = *srcp;
23922 orig_src = src;
23923 }
23924
23925 for (piece_size = 1;
23926 piece_size <= desired_align && copied_bytes < align_bytes;
23927 piece_size <<= 1)
23928 {
23929 if (align_bytes & piece_size)
23930 {
23931 if (issetmem)
23932 {
23933 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23934 dst = emit_memset (dst, destreg, vec_value, piece_size);
23935 else
23936 dst = emit_memset (dst, destreg, value, piece_size);
23937 }
23938 else
23939 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23940 copied_bytes += piece_size;
23941 }
23942 }
23943 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23944 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23945 if (MEM_SIZE_KNOWN_P (orig_dst))
23946 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23947
23948 if (!issetmem)
23949 {
23950 int src_align_bytes = get_mem_align_offset (src, desired_align
23951 * BITS_PER_UNIT);
23952 if (src_align_bytes >= 0)
23953 src_align_bytes = desired_align - src_align_bytes;
23954 if (src_align_bytes >= 0)
23955 {
23956 unsigned int src_align;
23957 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23958 {
23959 if ((src_align_bytes & (src_align - 1))
23960 == (align_bytes & (src_align - 1)))
23961 break;
23962 }
23963 if (src_align > (unsigned int) desired_align)
23964 src_align = desired_align;
23965 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23966 set_mem_align (src, src_align * BITS_PER_UNIT);
23967 }
23968 if (MEM_SIZE_KNOWN_P (orig_src))
23969 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23970 *srcp = src;
23971 }
23972
23973 return dst;
23974 }
23975
23976 /* Return true if ALG can be used in current context.
23977 Assume we expand memset if MEMSET is true. */
23978 static bool
23979 alg_usable_p (enum stringop_alg alg, bool memset)
23980 {
23981 if (alg == no_stringop)
23982 return false;
23983 if (alg == vector_loop)
23984 return TARGET_SSE || TARGET_AVX;
23985 /* Algorithms using the rep prefix want at least edi and ecx;
23986 additionally, memset wants eax and memcpy wants esi. Don't
23987 consider such algorithms if the user has appropriated those
23988 registers for their own purposes. */
23989 if (alg == rep_prefix_1_byte
23990 || alg == rep_prefix_4_byte
23991 || alg == rep_prefix_8_byte)
23992 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23993 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23994 return true;
23995 }
23996
23997 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23998 static enum stringop_alg
23999 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24000 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24001 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
24002 {
24003 const struct stringop_algs * algs;
24004 bool optimize_for_speed;
24005 int max = 0;
24006 const struct processor_costs *cost;
24007 int i;
24008 bool any_alg_usable_p = false;
24009
24010 *noalign = false;
24011 *dynamic_check = -1;
24012
24013 /* Even if the string operation call is cold, we still might spend a lot
24014 of time processing large blocks. */
24015 if (optimize_function_for_size_p (cfun)
24016 || (optimize_insn_for_size_p ()
24017 && (max_size < 256
24018 || (expected_size != -1 && expected_size < 256))))
24019 optimize_for_speed = false;
24020 else
24021 optimize_for_speed = true;
24022
24023 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24024 if (memset)
24025 algs = &cost->memset[TARGET_64BIT != 0];
24026 else
24027 algs = &cost->memcpy[TARGET_64BIT != 0];
24028
24029 /* See maximal size for user defined algorithm. */
24030 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24031 {
24032 enum stringop_alg candidate = algs->size[i].alg;
24033 bool usable = alg_usable_p (candidate, memset);
24034 any_alg_usable_p |= usable;
24035
24036 if (candidate != libcall && candidate && usable)
24037 max = algs->size[i].max;
24038 }
24039
24040 /* If expected size is not known but max size is small enough
24041 so inline version is a win, set expected size into
24042 the range. */
24043 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24044 && expected_size == -1)
24045 expected_size = min_size / 2 + max_size / 2;
24046
24047 /* If user specified the algorithm, honnor it if possible. */
24048 if (ix86_stringop_alg != no_stringop
24049 && alg_usable_p (ix86_stringop_alg, memset))
24050 return ix86_stringop_alg;
24051 /* rep; movq or rep; movl is the smallest variant. */
24052 else if (!optimize_for_speed)
24053 {
24054 *noalign = true;
24055 if (!count || (count & 3) || (memset && !zero_memset))
24056 return alg_usable_p (rep_prefix_1_byte, memset)
24057 ? rep_prefix_1_byte : loop_1_byte;
24058 else
24059 return alg_usable_p (rep_prefix_4_byte, memset)
24060 ? rep_prefix_4_byte : loop;
24061 }
24062 /* Very tiny blocks are best handled via the loop, REP is expensive to
24063 setup. */
24064 else if (expected_size != -1 && expected_size < 4)
24065 return loop_1_byte;
24066 else if (expected_size != -1)
24067 {
24068 enum stringop_alg alg = libcall;
24069 bool alg_noalign = false;
24070 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24071 {
24072 /* We get here if the algorithms that were not libcall-based
24073 were rep-prefix based and we are unable to use rep prefixes
24074 based on global register usage. Break out of the loop and
24075 use the heuristic below. */
24076 if (algs->size[i].max == 0)
24077 break;
24078 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24079 {
24080 enum stringop_alg candidate = algs->size[i].alg;
24081
24082 if (candidate != libcall && alg_usable_p (candidate, memset))
24083 {
24084 alg = candidate;
24085 alg_noalign = algs->size[i].noalign;
24086 }
24087 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24088 last non-libcall inline algorithm. */
24089 if (TARGET_INLINE_ALL_STRINGOPS)
24090 {
24091 /* When the current size is best to be copied by a libcall,
24092 but we are still forced to inline, run the heuristic below
24093 that will pick code for medium sized blocks. */
24094 if (alg != libcall)
24095 {
24096 *noalign = alg_noalign;
24097 return alg;
24098 }
24099 break;
24100 }
24101 else if (alg_usable_p (candidate, memset))
24102 {
24103 *noalign = algs->size[i].noalign;
24104 return candidate;
24105 }
24106 }
24107 }
24108 }
24109 /* When asked to inline the call anyway, try to pick meaningful choice.
24110 We look for maximal size of block that is faster to copy by hand and
24111 take blocks of at most of that size guessing that average size will
24112 be roughly half of the block.
24113
24114 If this turns out to be bad, we might simply specify the preferred
24115 choice in ix86_costs. */
24116 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24117 && (algs->unknown_size == libcall
24118 || !alg_usable_p (algs->unknown_size, memset)))
24119 {
24120 enum stringop_alg alg;
24121
24122 /* If there aren't any usable algorithms, then recursing on
24123 smaller sizes isn't going to find anything. Just return the
24124 simple byte-at-a-time copy loop. */
24125 if (!any_alg_usable_p)
24126 {
24127 /* Pick something reasonable. */
24128 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24129 *dynamic_check = 128;
24130 return loop_1_byte;
24131 }
24132 if (max <= 0)
24133 max = 4096;
24134 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24135 zero_memset, dynamic_check, noalign);
24136 gcc_assert (*dynamic_check == -1);
24137 gcc_assert (alg != libcall);
24138 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24139 *dynamic_check = max;
24140 return alg;
24141 }
24142 return (alg_usable_p (algs->unknown_size, memset)
24143 ? algs->unknown_size : libcall);
24144 }
24145
24146 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24147 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24148 static int
24149 decide_alignment (int align,
24150 enum stringop_alg alg,
24151 int expected_size,
24152 enum machine_mode move_mode)
24153 {
24154 int desired_align = 0;
24155
24156 gcc_assert (alg != no_stringop);
24157
24158 if (alg == libcall)
24159 return 0;
24160 if (move_mode == VOIDmode)
24161 return 0;
24162
24163 desired_align = GET_MODE_SIZE (move_mode);
24164 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24165 copying whole cacheline at once. */
24166 if (TARGET_PENTIUMPRO
24167 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24168 desired_align = 8;
24169
24170 if (optimize_size)
24171 desired_align = 1;
24172 if (desired_align < align)
24173 desired_align = align;
24174 if (expected_size != -1 && expected_size < 4)
24175 desired_align = align;
24176
24177 return desired_align;
24178 }
24179
24180
24181 /* Helper function for memcpy. For QImode value 0xXY produce
24182 0xXYXYXYXY of wide specified by MODE. This is essentially
24183 a * 0x10101010, but we can do slightly better than
24184 synth_mult by unwinding the sequence by hand on CPUs with
24185 slow multiply. */
24186 static rtx
24187 promote_duplicated_reg (enum machine_mode mode, rtx val)
24188 {
24189 enum machine_mode valmode = GET_MODE (val);
24190 rtx tmp;
24191 int nops = mode == DImode ? 3 : 2;
24192
24193 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24194 if (val == const0_rtx)
24195 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24196 if (CONST_INT_P (val))
24197 {
24198 HOST_WIDE_INT v = INTVAL (val) & 255;
24199
24200 v |= v << 8;
24201 v |= v << 16;
24202 if (mode == DImode)
24203 v |= (v << 16) << 16;
24204 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24205 }
24206
24207 if (valmode == VOIDmode)
24208 valmode = QImode;
24209 if (valmode != QImode)
24210 val = gen_lowpart (QImode, val);
24211 if (mode == QImode)
24212 return val;
24213 if (!TARGET_PARTIAL_REG_STALL)
24214 nops--;
24215 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24216 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24217 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24218 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24219 {
24220 rtx reg = convert_modes (mode, QImode, val, true);
24221 tmp = promote_duplicated_reg (mode, const1_rtx);
24222 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24223 OPTAB_DIRECT);
24224 }
24225 else
24226 {
24227 rtx reg = convert_modes (mode, QImode, val, true);
24228
24229 if (!TARGET_PARTIAL_REG_STALL)
24230 if (mode == SImode)
24231 emit_insn (gen_movsi_insv_1 (reg, reg));
24232 else
24233 emit_insn (gen_movdi_insv_1 (reg, reg));
24234 else
24235 {
24236 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24237 NULL, 1, OPTAB_DIRECT);
24238 reg =
24239 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24240 }
24241 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24242 NULL, 1, OPTAB_DIRECT);
24243 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24244 if (mode == SImode)
24245 return reg;
24246 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24247 NULL, 1, OPTAB_DIRECT);
24248 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24249 return reg;
24250 }
24251 }
24252
24253 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24254 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24255 alignment from ALIGN to DESIRED_ALIGN. */
24256 static rtx
24257 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24258 int align)
24259 {
24260 rtx promoted_val;
24261
24262 if (TARGET_64BIT
24263 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24264 promoted_val = promote_duplicated_reg (DImode, val);
24265 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24266 promoted_val = promote_duplicated_reg (SImode, val);
24267 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24268 promoted_val = promote_duplicated_reg (HImode, val);
24269 else
24270 promoted_val = val;
24271
24272 return promoted_val;
24273 }
24274
24275 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24276 operations when profitable. The code depends upon architecture, block size
24277 and alignment, but always has one of the following overall structures:
24278
24279 Aligned move sequence:
24280
24281 1) Prologue guard: Conditional that jumps up to epilogues for small
24282 blocks that can be handled by epilogue alone. This is faster
24283 but also needed for correctness, since prologue assume the block
24284 is larger than the desired alignment.
24285
24286 Optional dynamic check for size and libcall for large
24287 blocks is emitted here too, with -minline-stringops-dynamically.
24288
24289 2) Prologue: copy first few bytes in order to get destination
24290 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24291 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24292 copied. We emit either a jump tree on power of two sized
24293 blocks, or a byte loop.
24294
24295 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24296 with specified algorithm.
24297
24298 4) Epilogue: code copying tail of the block that is too small to be
24299 handled by main body (or up to size guarded by prologue guard).
24300
24301 Misaligned move sequence
24302
24303 1) missaligned move prologue/epilogue containing:
24304 a) Prologue handling small memory blocks and jumping to done_label
24305 (skipped if blocks are known to be large enough)
24306 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24307 needed by single possibly misaligned move
24308 (skipped if alignment is not needed)
24309 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24310
24311 2) Zero size guard dispatching to done_label, if needed
24312
24313 3) dispatch to library call, if needed,
24314
24315 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24316 with specified algorithm. */
24317 bool
24318 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24319 rtx align_exp, rtx expected_align_exp,
24320 rtx expected_size_exp, rtx min_size_exp,
24321 rtx max_size_exp, rtx probable_max_size_exp,
24322 bool issetmem)
24323 {
24324 rtx destreg;
24325 rtx srcreg = NULL;
24326 rtx_code_label *label = NULL;
24327 rtx tmp;
24328 rtx_code_label *jump_around_label = NULL;
24329 HOST_WIDE_INT align = 1;
24330 unsigned HOST_WIDE_INT count = 0;
24331 HOST_WIDE_INT expected_size = -1;
24332 int size_needed = 0, epilogue_size_needed;
24333 int desired_align = 0, align_bytes = 0;
24334 enum stringop_alg alg;
24335 rtx promoted_val = NULL;
24336 rtx vec_promoted_val = NULL;
24337 bool force_loopy_epilogue = false;
24338 int dynamic_check;
24339 bool need_zero_guard = false;
24340 bool noalign;
24341 enum machine_mode move_mode = VOIDmode;
24342 int unroll_factor = 1;
24343 /* TODO: Once value ranges are available, fill in proper data. */
24344 unsigned HOST_WIDE_INT min_size = 0;
24345 unsigned HOST_WIDE_INT max_size = -1;
24346 unsigned HOST_WIDE_INT probable_max_size = -1;
24347 bool misaligned_prologue_used = false;
24348
24349 if (CONST_INT_P (align_exp))
24350 align = INTVAL (align_exp);
24351 /* i386 can do misaligned access on reasonably increased cost. */
24352 if (CONST_INT_P (expected_align_exp)
24353 && INTVAL (expected_align_exp) > align)
24354 align = INTVAL (expected_align_exp);
24355 /* ALIGN is the minimum of destination and source alignment, but we care here
24356 just about destination alignment. */
24357 else if (!issetmem
24358 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24359 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24360
24361 if (CONST_INT_P (count_exp))
24362 {
24363 min_size = max_size = probable_max_size = count = expected_size
24364 = INTVAL (count_exp);
24365 /* When COUNT is 0, there is nothing to do. */
24366 if (!count)
24367 return true;
24368 }
24369 else
24370 {
24371 if (min_size_exp)
24372 min_size = INTVAL (min_size_exp);
24373 if (max_size_exp)
24374 max_size = INTVAL (max_size_exp);
24375 if (probable_max_size_exp)
24376 probable_max_size = INTVAL (probable_max_size_exp);
24377 if (CONST_INT_P (expected_size_exp))
24378 expected_size = INTVAL (expected_size_exp);
24379 }
24380
24381 /* Make sure we don't need to care about overflow later on. */
24382 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24383 return false;
24384
24385 /* Step 0: Decide on preferred algorithm, desired alignment and
24386 size of chunks to be copied by main loop. */
24387 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24388 issetmem,
24389 issetmem && val_exp == const0_rtx,
24390 &dynamic_check, &noalign);
24391 if (alg == libcall)
24392 return false;
24393 gcc_assert (alg != no_stringop);
24394
24395 /* For now vector-version of memset is generated only for memory zeroing, as
24396 creating of promoted vector value is very cheap in this case. */
24397 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24398 alg = unrolled_loop;
24399
24400 if (!count)
24401 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24402 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24403 if (!issetmem)
24404 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24405
24406 unroll_factor = 1;
24407 move_mode = word_mode;
24408 switch (alg)
24409 {
24410 case libcall:
24411 case no_stringop:
24412 case last_alg:
24413 gcc_unreachable ();
24414 case loop_1_byte:
24415 need_zero_guard = true;
24416 move_mode = QImode;
24417 break;
24418 case loop:
24419 need_zero_guard = true;
24420 break;
24421 case unrolled_loop:
24422 need_zero_guard = true;
24423 unroll_factor = (TARGET_64BIT ? 4 : 2);
24424 break;
24425 case vector_loop:
24426 need_zero_guard = true;
24427 unroll_factor = 4;
24428 /* Find the widest supported mode. */
24429 move_mode = word_mode;
24430 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24431 != CODE_FOR_nothing)
24432 move_mode = GET_MODE_WIDER_MODE (move_mode);
24433
24434 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24435 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24436 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24437 {
24438 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24439 move_mode = mode_for_vector (word_mode, nunits);
24440 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24441 move_mode = word_mode;
24442 }
24443 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24444 break;
24445 case rep_prefix_8_byte:
24446 move_mode = DImode;
24447 break;
24448 case rep_prefix_4_byte:
24449 move_mode = SImode;
24450 break;
24451 case rep_prefix_1_byte:
24452 move_mode = QImode;
24453 break;
24454 }
24455 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24456 epilogue_size_needed = size_needed;
24457
24458 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24459 if (!TARGET_ALIGN_STRINGOPS || noalign)
24460 align = desired_align;
24461
24462 /* Step 1: Prologue guard. */
24463
24464 /* Alignment code needs count to be in register. */
24465 if (CONST_INT_P (count_exp) && desired_align > align)
24466 {
24467 if (INTVAL (count_exp) > desired_align
24468 && INTVAL (count_exp) > size_needed)
24469 {
24470 align_bytes
24471 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24472 if (align_bytes <= 0)
24473 align_bytes = 0;
24474 else
24475 align_bytes = desired_align - align_bytes;
24476 }
24477 if (align_bytes == 0)
24478 count_exp = force_reg (counter_mode (count_exp), count_exp);
24479 }
24480 gcc_assert (desired_align >= 1 && align >= 1);
24481
24482 /* Misaligned move sequences handle both prologue and epilogue at once.
24483 Default code generation results in a smaller code for large alignments
24484 and also avoids redundant job when sizes are known precisely. */
24485 misaligned_prologue_used
24486 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24487 && MAX (desired_align, epilogue_size_needed) <= 32
24488 && desired_align <= epilogue_size_needed
24489 && ((desired_align > align && !align_bytes)
24490 || (!count && epilogue_size_needed > 1)));
24491
24492 /* Do the cheap promotion to allow better CSE across the
24493 main loop and epilogue (ie one load of the big constant in the
24494 front of all code.
24495 For now the misaligned move sequences do not have fast path
24496 without broadcasting. */
24497 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24498 {
24499 if (alg == vector_loop)
24500 {
24501 gcc_assert (val_exp == const0_rtx);
24502 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24503 promoted_val = promote_duplicated_reg_to_size (val_exp,
24504 GET_MODE_SIZE (word_mode),
24505 desired_align, align);
24506 }
24507 else
24508 {
24509 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24510 desired_align, align);
24511 }
24512 }
24513 /* Misaligned move sequences handles both prologues and epilogues at once.
24514 Default code generation results in smaller code for large alignments and
24515 also avoids redundant job when sizes are known precisely. */
24516 if (misaligned_prologue_used)
24517 {
24518 /* Misaligned move prologue handled small blocks by itself. */
24519 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24520 (dst, src, &destreg, &srcreg,
24521 move_mode, promoted_val, vec_promoted_val,
24522 &count_exp,
24523 &jump_around_label,
24524 desired_align < align
24525 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24526 desired_align, align, &min_size, dynamic_check, issetmem);
24527 if (!issetmem)
24528 src = change_address (src, BLKmode, srcreg);
24529 dst = change_address (dst, BLKmode, destreg);
24530 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24531 epilogue_size_needed = 0;
24532 if (need_zero_guard && !min_size)
24533 {
24534 /* It is possible that we copied enough so the main loop will not
24535 execute. */
24536 gcc_assert (size_needed > 1);
24537 if (jump_around_label == NULL_RTX)
24538 jump_around_label = gen_label_rtx ();
24539 emit_cmp_and_jump_insns (count_exp,
24540 GEN_INT (size_needed),
24541 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24542 if (expected_size == -1
24543 || expected_size < (desired_align - align) / 2 + size_needed)
24544 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24545 else
24546 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24547 }
24548 }
24549 /* Ensure that alignment prologue won't copy past end of block. */
24550 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24551 {
24552 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24553 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24554 Make sure it is power of 2. */
24555 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24556
24557 /* To improve performance of small blocks, we jump around the VAL
24558 promoting mode. This mean that if the promoted VAL is not constant,
24559 we might not use it in the epilogue and have to use byte
24560 loop variant. */
24561 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24562 force_loopy_epilogue = true;
24563 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24564 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24565 {
24566 /* If main algorithm works on QImode, no epilogue is needed.
24567 For small sizes just don't align anything. */
24568 if (size_needed == 1)
24569 desired_align = align;
24570 else
24571 goto epilogue;
24572 }
24573 else if (!count
24574 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24575 {
24576 label = gen_label_rtx ();
24577 emit_cmp_and_jump_insns (count_exp,
24578 GEN_INT (epilogue_size_needed),
24579 LTU, 0, counter_mode (count_exp), 1, label);
24580 if (expected_size == -1 || expected_size < epilogue_size_needed)
24581 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24582 else
24583 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24584 }
24585 }
24586
24587 /* Emit code to decide on runtime whether library call or inline should be
24588 used. */
24589 if (dynamic_check != -1)
24590 {
24591 if (!issetmem && CONST_INT_P (count_exp))
24592 {
24593 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24594 {
24595 emit_block_move_via_libcall (dst, src, count_exp, false);
24596 count_exp = const0_rtx;
24597 goto epilogue;
24598 }
24599 }
24600 else
24601 {
24602 rtx_code_label *hot_label = gen_label_rtx ();
24603 if (jump_around_label == NULL_RTX)
24604 jump_around_label = gen_label_rtx ();
24605 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24606 LEU, 0, counter_mode (count_exp),
24607 1, hot_label);
24608 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24609 if (issetmem)
24610 set_storage_via_libcall (dst, count_exp, val_exp, false);
24611 else
24612 emit_block_move_via_libcall (dst, src, count_exp, false);
24613 emit_jump (jump_around_label);
24614 emit_label (hot_label);
24615 }
24616 }
24617
24618 /* Step 2: Alignment prologue. */
24619 /* Do the expensive promotion once we branched off the small blocks. */
24620 if (issetmem && !promoted_val)
24621 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24622 desired_align, align);
24623
24624 if (desired_align > align && !misaligned_prologue_used)
24625 {
24626 if (align_bytes == 0)
24627 {
24628 /* Except for the first move in prologue, we no longer know
24629 constant offset in aliasing info. It don't seems to worth
24630 the pain to maintain it for the first move, so throw away
24631 the info early. */
24632 dst = change_address (dst, BLKmode, destreg);
24633 if (!issetmem)
24634 src = change_address (src, BLKmode, srcreg);
24635 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24636 promoted_val, vec_promoted_val,
24637 count_exp, align, desired_align,
24638 issetmem);
24639 /* At most desired_align - align bytes are copied. */
24640 if (min_size < (unsigned)(desired_align - align))
24641 min_size = 0;
24642 else
24643 min_size -= desired_align - align;
24644 }
24645 else
24646 {
24647 /* If we know how many bytes need to be stored before dst is
24648 sufficiently aligned, maintain aliasing info accurately. */
24649 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24650 srcreg,
24651 promoted_val,
24652 vec_promoted_val,
24653 desired_align,
24654 align_bytes,
24655 issetmem);
24656
24657 count_exp = plus_constant (counter_mode (count_exp),
24658 count_exp, -align_bytes);
24659 count -= align_bytes;
24660 min_size -= align_bytes;
24661 max_size -= align_bytes;
24662 }
24663 if (need_zero_guard
24664 && !min_size
24665 && (count < (unsigned HOST_WIDE_INT) size_needed
24666 || (align_bytes == 0
24667 && count < ((unsigned HOST_WIDE_INT) size_needed
24668 + desired_align - align))))
24669 {
24670 /* It is possible that we copied enough so the main loop will not
24671 execute. */
24672 gcc_assert (size_needed > 1);
24673 if (label == NULL_RTX)
24674 label = gen_label_rtx ();
24675 emit_cmp_and_jump_insns (count_exp,
24676 GEN_INT (size_needed),
24677 LTU, 0, counter_mode (count_exp), 1, label);
24678 if (expected_size == -1
24679 || expected_size < (desired_align - align) / 2 + size_needed)
24680 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24681 else
24682 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24683 }
24684 }
24685 if (label && size_needed == 1)
24686 {
24687 emit_label (label);
24688 LABEL_NUSES (label) = 1;
24689 label = NULL;
24690 epilogue_size_needed = 1;
24691 if (issetmem)
24692 promoted_val = val_exp;
24693 }
24694 else if (label == NULL_RTX && !misaligned_prologue_used)
24695 epilogue_size_needed = size_needed;
24696
24697 /* Step 3: Main loop. */
24698
24699 switch (alg)
24700 {
24701 case libcall:
24702 case no_stringop:
24703 case last_alg:
24704 gcc_unreachable ();
24705 case loop_1_byte:
24706 case loop:
24707 case unrolled_loop:
24708 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24709 count_exp, move_mode, unroll_factor,
24710 expected_size, issetmem);
24711 break;
24712 case vector_loop:
24713 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24714 vec_promoted_val, count_exp, move_mode,
24715 unroll_factor, expected_size, issetmem);
24716 break;
24717 case rep_prefix_8_byte:
24718 case rep_prefix_4_byte:
24719 case rep_prefix_1_byte:
24720 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24721 val_exp, count_exp, move_mode, issetmem);
24722 break;
24723 }
24724 /* Adjust properly the offset of src and dest memory for aliasing. */
24725 if (CONST_INT_P (count_exp))
24726 {
24727 if (!issetmem)
24728 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24729 (count / size_needed) * size_needed);
24730 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24731 (count / size_needed) * size_needed);
24732 }
24733 else
24734 {
24735 if (!issetmem)
24736 src = change_address (src, BLKmode, srcreg);
24737 dst = change_address (dst, BLKmode, destreg);
24738 }
24739
24740 /* Step 4: Epilogue to copy the remaining bytes. */
24741 epilogue:
24742 if (label)
24743 {
24744 /* When the main loop is done, COUNT_EXP might hold original count,
24745 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24746 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24747 bytes. Compensate if needed. */
24748
24749 if (size_needed < epilogue_size_needed)
24750 {
24751 tmp =
24752 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24753 GEN_INT (size_needed - 1), count_exp, 1,
24754 OPTAB_DIRECT);
24755 if (tmp != count_exp)
24756 emit_move_insn (count_exp, tmp);
24757 }
24758 emit_label (label);
24759 LABEL_NUSES (label) = 1;
24760 }
24761
24762 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24763 {
24764 if (force_loopy_epilogue)
24765 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24766 epilogue_size_needed);
24767 else
24768 {
24769 if (issetmem)
24770 expand_setmem_epilogue (dst, destreg, promoted_val,
24771 vec_promoted_val, count_exp,
24772 epilogue_size_needed);
24773 else
24774 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24775 epilogue_size_needed);
24776 }
24777 }
24778 if (jump_around_label)
24779 emit_label (jump_around_label);
24780 return true;
24781 }
24782
24783
24784 /* Expand the appropriate insns for doing strlen if not just doing
24785 repnz; scasb
24786
24787 out = result, initialized with the start address
24788 align_rtx = alignment of the address.
24789 scratch = scratch register, initialized with the startaddress when
24790 not aligned, otherwise undefined
24791
24792 This is just the body. It needs the initializations mentioned above and
24793 some address computing at the end. These things are done in i386.md. */
24794
24795 static void
24796 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24797 {
24798 int align;
24799 rtx tmp;
24800 rtx_code_label *align_2_label = NULL;
24801 rtx_code_label *align_3_label = NULL;
24802 rtx_code_label *align_4_label = gen_label_rtx ();
24803 rtx_code_label *end_0_label = gen_label_rtx ();
24804 rtx mem;
24805 rtx tmpreg = gen_reg_rtx (SImode);
24806 rtx scratch = gen_reg_rtx (SImode);
24807 rtx cmp;
24808
24809 align = 0;
24810 if (CONST_INT_P (align_rtx))
24811 align = INTVAL (align_rtx);
24812
24813 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24814
24815 /* Is there a known alignment and is it less than 4? */
24816 if (align < 4)
24817 {
24818 rtx scratch1 = gen_reg_rtx (Pmode);
24819 emit_move_insn (scratch1, out);
24820 /* Is there a known alignment and is it not 2? */
24821 if (align != 2)
24822 {
24823 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24824 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24825
24826 /* Leave just the 3 lower bits. */
24827 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24828 NULL_RTX, 0, OPTAB_WIDEN);
24829
24830 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24831 Pmode, 1, align_4_label);
24832 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24833 Pmode, 1, align_2_label);
24834 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24835 Pmode, 1, align_3_label);
24836 }
24837 else
24838 {
24839 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24840 check if is aligned to 4 - byte. */
24841
24842 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24843 NULL_RTX, 0, OPTAB_WIDEN);
24844
24845 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24846 Pmode, 1, align_4_label);
24847 }
24848
24849 mem = change_address (src, QImode, out);
24850
24851 /* Now compare the bytes. */
24852
24853 /* Compare the first n unaligned byte on a byte per byte basis. */
24854 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24855 QImode, 1, end_0_label);
24856
24857 /* Increment the address. */
24858 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24859
24860 /* Not needed with an alignment of 2 */
24861 if (align != 2)
24862 {
24863 emit_label (align_2_label);
24864
24865 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24866 end_0_label);
24867
24868 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24869
24870 emit_label (align_3_label);
24871 }
24872
24873 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24874 end_0_label);
24875
24876 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24877 }
24878
24879 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24880 align this loop. It gives only huge programs, but does not help to
24881 speed up. */
24882 emit_label (align_4_label);
24883
24884 mem = change_address (src, SImode, out);
24885 emit_move_insn (scratch, mem);
24886 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24887
24888 /* This formula yields a nonzero result iff one of the bytes is zero.
24889 This saves three branches inside loop and many cycles. */
24890
24891 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24892 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24893 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24894 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24895 gen_int_mode (0x80808080, SImode)));
24896 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24897 align_4_label);
24898
24899 if (TARGET_CMOVE)
24900 {
24901 rtx reg = gen_reg_rtx (SImode);
24902 rtx reg2 = gen_reg_rtx (Pmode);
24903 emit_move_insn (reg, tmpreg);
24904 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24905
24906 /* If zero is not in the first two bytes, move two bytes forward. */
24907 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24908 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24909 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24910 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24911 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24912 reg,
24913 tmpreg)));
24914 /* Emit lea manually to avoid clobbering of flags. */
24915 emit_insn (gen_rtx_SET (SImode, reg2,
24916 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24917
24918 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24919 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24920 emit_insn (gen_rtx_SET (VOIDmode, out,
24921 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24922 reg2,
24923 out)));
24924 }
24925 else
24926 {
24927 rtx_code_label *end_2_label = gen_label_rtx ();
24928 /* Is zero in the first two bytes? */
24929
24930 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24931 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24932 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24933 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24934 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24935 pc_rtx);
24936 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24937 JUMP_LABEL (tmp) = end_2_label;
24938
24939 /* Not in the first two. Move two bytes forward. */
24940 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24941 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24942
24943 emit_label (end_2_label);
24944
24945 }
24946
24947 /* Avoid branch in fixing the byte. */
24948 tmpreg = gen_lowpart (QImode, tmpreg);
24949 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24950 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24951 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24952 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24953
24954 emit_label (end_0_label);
24955 }
24956
24957 /* Expand strlen. */
24958
24959 bool
24960 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24961 {
24962 rtx addr, scratch1, scratch2, scratch3, scratch4;
24963
24964 /* The generic case of strlen expander is long. Avoid it's
24965 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24966
24967 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24968 && !TARGET_INLINE_ALL_STRINGOPS
24969 && !optimize_insn_for_size_p ()
24970 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24971 return false;
24972
24973 addr = force_reg (Pmode, XEXP (src, 0));
24974 scratch1 = gen_reg_rtx (Pmode);
24975
24976 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24977 && !optimize_insn_for_size_p ())
24978 {
24979 /* Well it seems that some optimizer does not combine a call like
24980 foo(strlen(bar), strlen(bar));
24981 when the move and the subtraction is done here. It does calculate
24982 the length just once when these instructions are done inside of
24983 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24984 often used and I use one fewer register for the lifetime of
24985 output_strlen_unroll() this is better. */
24986
24987 emit_move_insn (out, addr);
24988
24989 ix86_expand_strlensi_unroll_1 (out, src, align);
24990
24991 /* strlensi_unroll_1 returns the address of the zero at the end of
24992 the string, like memchr(), so compute the length by subtracting
24993 the start address. */
24994 emit_insn (ix86_gen_sub3 (out, out, addr));
24995 }
24996 else
24997 {
24998 rtx unspec;
24999
25000 /* Can't use this if the user has appropriated eax, ecx, or edi. */
25001 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25002 return false;
25003
25004 scratch2 = gen_reg_rtx (Pmode);
25005 scratch3 = gen_reg_rtx (Pmode);
25006 scratch4 = force_reg (Pmode, constm1_rtx);
25007
25008 emit_move_insn (scratch3, addr);
25009 eoschar = force_reg (QImode, eoschar);
25010
25011 src = replace_equiv_address_nv (src, scratch3);
25012
25013 /* If .md starts supporting :P, this can be done in .md. */
25014 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25015 scratch4), UNSPEC_SCAS);
25016 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25017 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25018 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25019 }
25020 return true;
25021 }
25022
25023 /* For given symbol (function) construct code to compute address of it's PLT
25024 entry in large x86-64 PIC model. */
25025 static rtx
25026 construct_plt_address (rtx symbol)
25027 {
25028 rtx tmp, unspec;
25029
25030 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25031 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25032 gcc_assert (Pmode == DImode);
25033
25034 tmp = gen_reg_rtx (Pmode);
25035 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25036
25037 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25038 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25039 return tmp;
25040 }
25041
25042 rtx
25043 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25044 rtx callarg2,
25045 rtx pop, bool sibcall)
25046 {
25047 rtx vec[3];
25048 rtx use = NULL, call;
25049 unsigned int vec_len = 0;
25050
25051 if (pop == const0_rtx)
25052 pop = NULL;
25053 gcc_assert (!TARGET_64BIT || !pop);
25054
25055 if (TARGET_MACHO && !TARGET_64BIT)
25056 {
25057 #if TARGET_MACHO
25058 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25059 fnaddr = machopic_indirect_call_target (fnaddr);
25060 #endif
25061 }
25062 else
25063 {
25064 /* Static functions and indirect calls don't need the pic register. */
25065 if (flag_pic
25066 && (!TARGET_64BIT
25067 || (ix86_cmodel == CM_LARGE_PIC
25068 && DEFAULT_ABI != MS_ABI))
25069 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25070 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25071 {
25072 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25073 if (ix86_use_pseudo_pic_reg ())
25074 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25075 pic_offset_table_rtx);
25076 }
25077 }
25078
25079 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
25080 {
25081 rtx al = gen_rtx_REG (QImode, AX_REG);
25082 emit_move_insn (al, callarg2);
25083 use_reg (&use, al);
25084 }
25085
25086 if (ix86_cmodel == CM_LARGE_PIC
25087 && !TARGET_PECOFF
25088 && MEM_P (fnaddr)
25089 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25090 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25091 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25092 else if (sibcall
25093 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25094 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25095 {
25096 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25097 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25098 }
25099
25100 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25101 if (retval)
25102 call = gen_rtx_SET (VOIDmode, retval, call);
25103 vec[vec_len++] = call;
25104
25105 if (pop)
25106 {
25107 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25108 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25109 vec[vec_len++] = pop;
25110 }
25111
25112 if (TARGET_64BIT_MS_ABI
25113 && (!callarg2 || INTVAL (callarg2) != -2))
25114 {
25115 int const cregs_size
25116 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25117 int i;
25118
25119 for (i = 0; i < cregs_size; i++)
25120 {
25121 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25122 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25123
25124 clobber_reg (&use, gen_rtx_REG (mode, regno));
25125 }
25126 }
25127
25128 if (vec_len > 1)
25129 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25130 call = emit_call_insn (call);
25131 if (use)
25132 CALL_INSN_FUNCTION_USAGE (call) = use;
25133
25134 return call;
25135 }
25136
25137 /* Output the assembly for a call instruction. */
25138
25139 const char *
25140 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25141 {
25142 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25143 bool seh_nop_p = false;
25144 const char *xasm;
25145
25146 if (SIBLING_CALL_P (insn))
25147 {
25148 if (direct_p)
25149 xasm = "jmp\t%P0";
25150 /* SEH epilogue detection requires the indirect branch case
25151 to include REX.W. */
25152 else if (TARGET_SEH)
25153 xasm = "rex.W jmp %A0";
25154 else
25155 xasm = "jmp\t%A0";
25156
25157 output_asm_insn (xasm, &call_op);
25158 return "";
25159 }
25160
25161 /* SEH unwinding can require an extra nop to be emitted in several
25162 circumstances. Determine if we have one of those. */
25163 if (TARGET_SEH)
25164 {
25165 rtx_insn *i;
25166
25167 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25168 {
25169 /* If we get to another real insn, we don't need the nop. */
25170 if (INSN_P (i))
25171 break;
25172
25173 /* If we get to the epilogue note, prevent a catch region from
25174 being adjacent to the standard epilogue sequence. If non-
25175 call-exceptions, we'll have done this during epilogue emission. */
25176 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25177 && !flag_non_call_exceptions
25178 && !can_throw_internal (insn))
25179 {
25180 seh_nop_p = true;
25181 break;
25182 }
25183 }
25184
25185 /* If we didn't find a real insn following the call, prevent the
25186 unwinder from looking into the next function. */
25187 if (i == NULL)
25188 seh_nop_p = true;
25189 }
25190
25191 if (direct_p)
25192 xasm = "call\t%P0";
25193 else
25194 xasm = "call\t%A0";
25195
25196 output_asm_insn (xasm, &call_op);
25197
25198 if (seh_nop_p)
25199 return "nop";
25200
25201 return "";
25202 }
25203 \f
25204 /* Clear stack slot assignments remembered from previous functions.
25205 This is called from INIT_EXPANDERS once before RTL is emitted for each
25206 function. */
25207
25208 static struct machine_function *
25209 ix86_init_machine_status (void)
25210 {
25211 struct machine_function *f;
25212
25213 f = ggc_cleared_alloc<machine_function> ();
25214 f->use_fast_prologue_epilogue_nregs = -1;
25215 f->call_abi = ix86_abi;
25216
25217 return f;
25218 }
25219
25220 /* Return a MEM corresponding to a stack slot with mode MODE.
25221 Allocate a new slot if necessary.
25222
25223 The RTL for a function can have several slots available: N is
25224 which slot to use. */
25225
25226 rtx
25227 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25228 {
25229 struct stack_local_entry *s;
25230
25231 gcc_assert (n < MAX_386_STACK_LOCALS);
25232
25233 for (s = ix86_stack_locals; s; s = s->next)
25234 if (s->mode == mode && s->n == n)
25235 return validize_mem (copy_rtx (s->rtl));
25236
25237 s = ggc_alloc<stack_local_entry> ();
25238 s->n = n;
25239 s->mode = mode;
25240 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25241
25242 s->next = ix86_stack_locals;
25243 ix86_stack_locals = s;
25244 return validize_mem (copy_rtx (s->rtl));
25245 }
25246
25247 static void
25248 ix86_instantiate_decls (void)
25249 {
25250 struct stack_local_entry *s;
25251
25252 for (s = ix86_stack_locals; s; s = s->next)
25253 if (s->rtl != NULL_RTX)
25254 instantiate_decl_rtl (s->rtl);
25255 }
25256 \f
25257 /* Check whether x86 address PARTS is a pc-relative address. */
25258
25259 static bool
25260 rip_relative_addr_p (struct ix86_address *parts)
25261 {
25262 rtx base, index, disp;
25263
25264 base = parts->base;
25265 index = parts->index;
25266 disp = parts->disp;
25267
25268 if (disp && !base && !index)
25269 {
25270 if (TARGET_64BIT)
25271 {
25272 rtx symbol = disp;
25273
25274 if (GET_CODE (disp) == CONST)
25275 symbol = XEXP (disp, 0);
25276 if (GET_CODE (symbol) == PLUS
25277 && CONST_INT_P (XEXP (symbol, 1)))
25278 symbol = XEXP (symbol, 0);
25279
25280 if (GET_CODE (symbol) == LABEL_REF
25281 || (GET_CODE (symbol) == SYMBOL_REF
25282 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25283 || (GET_CODE (symbol) == UNSPEC
25284 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25285 || XINT (symbol, 1) == UNSPEC_PCREL
25286 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25287 return true;
25288 }
25289 }
25290 return false;
25291 }
25292
25293 /* Calculate the length of the memory address in the instruction encoding.
25294 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25295 or other prefixes. We never generate addr32 prefix for LEA insn. */
25296
25297 int
25298 memory_address_length (rtx addr, bool lea)
25299 {
25300 struct ix86_address parts;
25301 rtx base, index, disp;
25302 int len;
25303 int ok;
25304
25305 if (GET_CODE (addr) == PRE_DEC
25306 || GET_CODE (addr) == POST_INC
25307 || GET_CODE (addr) == PRE_MODIFY
25308 || GET_CODE (addr) == POST_MODIFY)
25309 return 0;
25310
25311 ok = ix86_decompose_address (addr, &parts);
25312 gcc_assert (ok);
25313
25314 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25315
25316 /* If this is not LEA instruction, add the length of addr32 prefix. */
25317 if (TARGET_64BIT && !lea
25318 && (SImode_address_operand (addr, VOIDmode)
25319 || (parts.base && GET_MODE (parts.base) == SImode)
25320 || (parts.index && GET_MODE (parts.index) == SImode)))
25321 len++;
25322
25323 base = parts.base;
25324 index = parts.index;
25325 disp = parts.disp;
25326
25327 if (base && GET_CODE (base) == SUBREG)
25328 base = SUBREG_REG (base);
25329 if (index && GET_CODE (index) == SUBREG)
25330 index = SUBREG_REG (index);
25331
25332 gcc_assert (base == NULL_RTX || REG_P (base));
25333 gcc_assert (index == NULL_RTX || REG_P (index));
25334
25335 /* Rule of thumb:
25336 - esp as the base always wants an index,
25337 - ebp as the base always wants a displacement,
25338 - r12 as the base always wants an index,
25339 - r13 as the base always wants a displacement. */
25340
25341 /* Register Indirect. */
25342 if (base && !index && !disp)
25343 {
25344 /* esp (for its index) and ebp (for its displacement) need
25345 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25346 code. */
25347 if (base == arg_pointer_rtx
25348 || base == frame_pointer_rtx
25349 || REGNO (base) == SP_REG
25350 || REGNO (base) == BP_REG
25351 || REGNO (base) == R12_REG
25352 || REGNO (base) == R13_REG)
25353 len++;
25354 }
25355
25356 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25357 is not disp32, but disp32(%rip), so for disp32
25358 SIB byte is needed, unless print_operand_address
25359 optimizes it into disp32(%rip) or (%rip) is implied
25360 by UNSPEC. */
25361 else if (disp && !base && !index)
25362 {
25363 len += 4;
25364 if (rip_relative_addr_p (&parts))
25365 len++;
25366 }
25367 else
25368 {
25369 /* Find the length of the displacement constant. */
25370 if (disp)
25371 {
25372 if (base && satisfies_constraint_K (disp))
25373 len += 1;
25374 else
25375 len += 4;
25376 }
25377 /* ebp always wants a displacement. Similarly r13. */
25378 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25379 len++;
25380
25381 /* An index requires the two-byte modrm form.... */
25382 if (index
25383 /* ...like esp (or r12), which always wants an index. */
25384 || base == arg_pointer_rtx
25385 || base == frame_pointer_rtx
25386 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25387 len++;
25388 }
25389
25390 return len;
25391 }
25392
25393 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25394 is set, expect that insn have 8bit immediate alternative. */
25395 int
25396 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25397 {
25398 int len = 0;
25399 int i;
25400 extract_insn_cached (insn);
25401 for (i = recog_data.n_operands - 1; i >= 0; --i)
25402 if (CONSTANT_P (recog_data.operand[i]))
25403 {
25404 enum attr_mode mode = get_attr_mode (insn);
25405
25406 gcc_assert (!len);
25407 if (shortform && CONST_INT_P (recog_data.operand[i]))
25408 {
25409 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25410 switch (mode)
25411 {
25412 case MODE_QI:
25413 len = 1;
25414 continue;
25415 case MODE_HI:
25416 ival = trunc_int_for_mode (ival, HImode);
25417 break;
25418 case MODE_SI:
25419 ival = trunc_int_for_mode (ival, SImode);
25420 break;
25421 default:
25422 break;
25423 }
25424 if (IN_RANGE (ival, -128, 127))
25425 {
25426 len = 1;
25427 continue;
25428 }
25429 }
25430 switch (mode)
25431 {
25432 case MODE_QI:
25433 len = 1;
25434 break;
25435 case MODE_HI:
25436 len = 2;
25437 break;
25438 case MODE_SI:
25439 len = 4;
25440 break;
25441 /* Immediates for DImode instructions are encoded
25442 as 32bit sign extended values. */
25443 case MODE_DI:
25444 len = 4;
25445 break;
25446 default:
25447 fatal_insn ("unknown insn mode", insn);
25448 }
25449 }
25450 return len;
25451 }
25452
25453 /* Compute default value for "length_address" attribute. */
25454 int
25455 ix86_attr_length_address_default (rtx_insn *insn)
25456 {
25457 int i;
25458
25459 if (get_attr_type (insn) == TYPE_LEA)
25460 {
25461 rtx set = PATTERN (insn), addr;
25462
25463 if (GET_CODE (set) == PARALLEL)
25464 set = XVECEXP (set, 0, 0);
25465
25466 gcc_assert (GET_CODE (set) == SET);
25467
25468 addr = SET_SRC (set);
25469
25470 return memory_address_length (addr, true);
25471 }
25472
25473 extract_insn_cached (insn);
25474 for (i = recog_data.n_operands - 1; i >= 0; --i)
25475 if (MEM_P (recog_data.operand[i]))
25476 {
25477 constrain_operands_cached (insn, reload_completed);
25478 if (which_alternative != -1)
25479 {
25480 const char *constraints = recog_data.constraints[i];
25481 int alt = which_alternative;
25482
25483 while (*constraints == '=' || *constraints == '+')
25484 constraints++;
25485 while (alt-- > 0)
25486 while (*constraints++ != ',')
25487 ;
25488 /* Skip ignored operands. */
25489 if (*constraints == 'X')
25490 continue;
25491 }
25492 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25493 }
25494 return 0;
25495 }
25496
25497 /* Compute default value for "length_vex" attribute. It includes
25498 2 or 3 byte VEX prefix and 1 opcode byte. */
25499
25500 int
25501 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25502 bool has_vex_w)
25503 {
25504 int i;
25505
25506 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25507 byte VEX prefix. */
25508 if (!has_0f_opcode || has_vex_w)
25509 return 3 + 1;
25510
25511 /* We can always use 2 byte VEX prefix in 32bit. */
25512 if (!TARGET_64BIT)
25513 return 2 + 1;
25514
25515 extract_insn_cached (insn);
25516
25517 for (i = recog_data.n_operands - 1; i >= 0; --i)
25518 if (REG_P (recog_data.operand[i]))
25519 {
25520 /* REX.W bit uses 3 byte VEX prefix. */
25521 if (GET_MODE (recog_data.operand[i]) == DImode
25522 && GENERAL_REG_P (recog_data.operand[i]))
25523 return 3 + 1;
25524 }
25525 else
25526 {
25527 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25528 if (MEM_P (recog_data.operand[i])
25529 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25530 return 3 + 1;
25531 }
25532
25533 return 2 + 1;
25534 }
25535 \f
25536 /* Return the maximum number of instructions a cpu can issue. */
25537
25538 static int
25539 ix86_issue_rate (void)
25540 {
25541 switch (ix86_tune)
25542 {
25543 case PROCESSOR_PENTIUM:
25544 case PROCESSOR_BONNELL:
25545 case PROCESSOR_SILVERMONT:
25546 case PROCESSOR_INTEL:
25547 case PROCESSOR_K6:
25548 case PROCESSOR_BTVER2:
25549 case PROCESSOR_PENTIUM4:
25550 case PROCESSOR_NOCONA:
25551 return 2;
25552
25553 case PROCESSOR_PENTIUMPRO:
25554 case PROCESSOR_ATHLON:
25555 case PROCESSOR_K8:
25556 case PROCESSOR_AMDFAM10:
25557 case PROCESSOR_GENERIC:
25558 case PROCESSOR_BTVER1:
25559 return 3;
25560
25561 case PROCESSOR_BDVER1:
25562 case PROCESSOR_BDVER2:
25563 case PROCESSOR_BDVER3:
25564 case PROCESSOR_BDVER4:
25565 case PROCESSOR_CORE2:
25566 case PROCESSOR_NEHALEM:
25567 case PROCESSOR_SANDYBRIDGE:
25568 case PROCESSOR_HASWELL:
25569 return 4;
25570
25571 default:
25572 return 1;
25573 }
25574 }
25575
25576 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25577 by DEP_INSN and nothing set by DEP_INSN. */
25578
25579 static bool
25580 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25581 {
25582 rtx set, set2;
25583
25584 /* Simplify the test for uninteresting insns. */
25585 if (insn_type != TYPE_SETCC
25586 && insn_type != TYPE_ICMOV
25587 && insn_type != TYPE_FCMOV
25588 && insn_type != TYPE_IBR)
25589 return false;
25590
25591 if ((set = single_set (dep_insn)) != 0)
25592 {
25593 set = SET_DEST (set);
25594 set2 = NULL_RTX;
25595 }
25596 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25597 && XVECLEN (PATTERN (dep_insn), 0) == 2
25598 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25599 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25600 {
25601 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25602 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25603 }
25604 else
25605 return false;
25606
25607 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25608 return false;
25609
25610 /* This test is true if the dependent insn reads the flags but
25611 not any other potentially set register. */
25612 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25613 return false;
25614
25615 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25616 return false;
25617
25618 return true;
25619 }
25620
25621 /* Return true iff USE_INSN has a memory address with operands set by
25622 SET_INSN. */
25623
25624 bool
25625 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25626 {
25627 int i;
25628 extract_insn_cached (use_insn);
25629 for (i = recog_data.n_operands - 1; i >= 0; --i)
25630 if (MEM_P (recog_data.operand[i]))
25631 {
25632 rtx addr = XEXP (recog_data.operand[i], 0);
25633 return modified_in_p (addr, set_insn) != 0;
25634 }
25635 return false;
25636 }
25637
25638 /* Helper function for exact_store_load_dependency.
25639 Return true if addr is found in insn. */
25640 static bool
25641 exact_dependency_1 (rtx addr, rtx insn)
25642 {
25643 enum rtx_code code;
25644 const char *format_ptr;
25645 int i, j;
25646
25647 code = GET_CODE (insn);
25648 switch (code)
25649 {
25650 case MEM:
25651 if (rtx_equal_p (addr, insn))
25652 return true;
25653 break;
25654 case REG:
25655 CASE_CONST_ANY:
25656 case SYMBOL_REF:
25657 case CODE_LABEL:
25658 case PC:
25659 case CC0:
25660 case EXPR_LIST:
25661 return false;
25662 default:
25663 break;
25664 }
25665
25666 format_ptr = GET_RTX_FORMAT (code);
25667 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25668 {
25669 switch (*format_ptr++)
25670 {
25671 case 'e':
25672 if (exact_dependency_1 (addr, XEXP (insn, i)))
25673 return true;
25674 break;
25675 case 'E':
25676 for (j = 0; j < XVECLEN (insn, i); j++)
25677 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25678 return true;
25679 break;
25680 }
25681 }
25682 return false;
25683 }
25684
25685 /* Return true if there exists exact dependency for store & load, i.e.
25686 the same memory address is used in them. */
25687 static bool
25688 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25689 {
25690 rtx set1, set2;
25691
25692 set1 = single_set (store);
25693 if (!set1)
25694 return false;
25695 if (!MEM_P (SET_DEST (set1)))
25696 return false;
25697 set2 = single_set (load);
25698 if (!set2)
25699 return false;
25700 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25701 return true;
25702 return false;
25703 }
25704
25705 static int
25706 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25707 {
25708 enum attr_type insn_type, dep_insn_type;
25709 enum attr_memory memory;
25710 rtx set, set2;
25711 int dep_insn_code_number;
25712
25713 /* Anti and output dependencies have zero cost on all CPUs. */
25714 if (REG_NOTE_KIND (link) != 0)
25715 return 0;
25716
25717 dep_insn_code_number = recog_memoized (dep_insn);
25718
25719 /* If we can't recognize the insns, we can't really do anything. */
25720 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25721 return cost;
25722
25723 insn_type = get_attr_type (insn);
25724 dep_insn_type = get_attr_type (dep_insn);
25725
25726 switch (ix86_tune)
25727 {
25728 case PROCESSOR_PENTIUM:
25729 /* Address Generation Interlock adds a cycle of latency. */
25730 if (insn_type == TYPE_LEA)
25731 {
25732 rtx addr = PATTERN (insn);
25733
25734 if (GET_CODE (addr) == PARALLEL)
25735 addr = XVECEXP (addr, 0, 0);
25736
25737 gcc_assert (GET_CODE (addr) == SET);
25738
25739 addr = SET_SRC (addr);
25740 if (modified_in_p (addr, dep_insn))
25741 cost += 1;
25742 }
25743 else if (ix86_agi_dependent (dep_insn, insn))
25744 cost += 1;
25745
25746 /* ??? Compares pair with jump/setcc. */
25747 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25748 cost = 0;
25749
25750 /* Floating point stores require value to be ready one cycle earlier. */
25751 if (insn_type == TYPE_FMOV
25752 && get_attr_memory (insn) == MEMORY_STORE
25753 && !ix86_agi_dependent (dep_insn, insn))
25754 cost += 1;
25755 break;
25756
25757 case PROCESSOR_PENTIUMPRO:
25758 /* INT->FP conversion is expensive. */
25759 if (get_attr_fp_int_src (dep_insn))
25760 cost += 5;
25761
25762 /* There is one cycle extra latency between an FP op and a store. */
25763 if (insn_type == TYPE_FMOV
25764 && (set = single_set (dep_insn)) != NULL_RTX
25765 && (set2 = single_set (insn)) != NULL_RTX
25766 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25767 && MEM_P (SET_DEST (set2)))
25768 cost += 1;
25769
25770 memory = get_attr_memory (insn);
25771
25772 /* Show ability of reorder buffer to hide latency of load by executing
25773 in parallel with previous instruction in case
25774 previous instruction is not needed to compute the address. */
25775 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25776 && !ix86_agi_dependent (dep_insn, insn))
25777 {
25778 /* Claim moves to take one cycle, as core can issue one load
25779 at time and the next load can start cycle later. */
25780 if (dep_insn_type == TYPE_IMOV
25781 || dep_insn_type == TYPE_FMOV)
25782 cost = 1;
25783 else if (cost > 1)
25784 cost--;
25785 }
25786 break;
25787
25788 case PROCESSOR_K6:
25789 /* The esp dependency is resolved before
25790 the instruction is really finished. */
25791 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25792 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25793 return 1;
25794
25795 /* INT->FP conversion is expensive. */
25796 if (get_attr_fp_int_src (dep_insn))
25797 cost += 5;
25798
25799 memory = get_attr_memory (insn);
25800
25801 /* Show ability of reorder buffer to hide latency of load by executing
25802 in parallel with previous instruction in case
25803 previous instruction is not needed to compute the address. */
25804 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25805 && !ix86_agi_dependent (dep_insn, insn))
25806 {
25807 /* Claim moves to take one cycle, as core can issue one load
25808 at time and the next load can start cycle later. */
25809 if (dep_insn_type == TYPE_IMOV
25810 || dep_insn_type == TYPE_FMOV)
25811 cost = 1;
25812 else if (cost > 2)
25813 cost -= 2;
25814 else
25815 cost = 1;
25816 }
25817 break;
25818
25819 case PROCESSOR_AMDFAM10:
25820 case PROCESSOR_BDVER1:
25821 case PROCESSOR_BDVER2:
25822 case PROCESSOR_BDVER3:
25823 case PROCESSOR_BDVER4:
25824 case PROCESSOR_BTVER1:
25825 case PROCESSOR_BTVER2:
25826 case PROCESSOR_GENERIC:
25827 /* Stack engine allows to execute push&pop instructions in parall. */
25828 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25829 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25830 return 0;
25831 /* FALLTHRU */
25832
25833 case PROCESSOR_ATHLON:
25834 case PROCESSOR_K8:
25835 memory = get_attr_memory (insn);
25836
25837 /* Show ability of reorder buffer to hide latency of load by executing
25838 in parallel with previous instruction in case
25839 previous instruction is not needed to compute the address. */
25840 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25841 && !ix86_agi_dependent (dep_insn, insn))
25842 {
25843 enum attr_unit unit = get_attr_unit (insn);
25844 int loadcost = 3;
25845
25846 /* Because of the difference between the length of integer and
25847 floating unit pipeline preparation stages, the memory operands
25848 for floating point are cheaper.
25849
25850 ??? For Athlon it the difference is most probably 2. */
25851 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25852 loadcost = 3;
25853 else
25854 loadcost = TARGET_ATHLON ? 2 : 0;
25855
25856 if (cost >= loadcost)
25857 cost -= loadcost;
25858 else
25859 cost = 0;
25860 }
25861 break;
25862
25863 case PROCESSOR_CORE2:
25864 case PROCESSOR_NEHALEM:
25865 case PROCESSOR_SANDYBRIDGE:
25866 case PROCESSOR_HASWELL:
25867 /* Stack engine allows to execute push&pop instructions in parall. */
25868 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25869 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25870 return 0;
25871
25872 memory = get_attr_memory (insn);
25873
25874 /* Show ability of reorder buffer to hide latency of load by executing
25875 in parallel with previous instruction in case
25876 previous instruction is not needed to compute the address. */
25877 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25878 && !ix86_agi_dependent (dep_insn, insn))
25879 {
25880 if (cost >= 4)
25881 cost -= 4;
25882 else
25883 cost = 0;
25884 }
25885 break;
25886
25887 case PROCESSOR_SILVERMONT:
25888 case PROCESSOR_INTEL:
25889 if (!reload_completed)
25890 return cost;
25891
25892 /* Increase cost of integer loads. */
25893 memory = get_attr_memory (dep_insn);
25894 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25895 {
25896 enum attr_unit unit = get_attr_unit (dep_insn);
25897 if (unit == UNIT_INTEGER && cost == 1)
25898 {
25899 if (memory == MEMORY_LOAD)
25900 cost = 3;
25901 else
25902 {
25903 /* Increase cost of ld/st for short int types only
25904 because of store forwarding issue. */
25905 rtx set = single_set (dep_insn);
25906 if (set && (GET_MODE (SET_DEST (set)) == QImode
25907 || GET_MODE (SET_DEST (set)) == HImode))
25908 {
25909 /* Increase cost of store/load insn if exact
25910 dependence exists and it is load insn. */
25911 enum attr_memory insn_memory = get_attr_memory (insn);
25912 if (insn_memory == MEMORY_LOAD
25913 && exact_store_load_dependency (dep_insn, insn))
25914 cost = 3;
25915 }
25916 }
25917 }
25918 }
25919
25920 default:
25921 break;
25922 }
25923
25924 return cost;
25925 }
25926
25927 /* How many alternative schedules to try. This should be as wide as the
25928 scheduling freedom in the DFA, but no wider. Making this value too
25929 large results extra work for the scheduler. */
25930
25931 static int
25932 ia32_multipass_dfa_lookahead (void)
25933 {
25934 switch (ix86_tune)
25935 {
25936 case PROCESSOR_PENTIUM:
25937 return 2;
25938
25939 case PROCESSOR_PENTIUMPRO:
25940 case PROCESSOR_K6:
25941 return 1;
25942
25943 case PROCESSOR_BDVER1:
25944 case PROCESSOR_BDVER2:
25945 case PROCESSOR_BDVER3:
25946 case PROCESSOR_BDVER4:
25947 /* We use lookahead value 4 for BD both before and after reload
25948 schedules. Plan is to have value 8 included for O3. */
25949 return 4;
25950
25951 case PROCESSOR_CORE2:
25952 case PROCESSOR_NEHALEM:
25953 case PROCESSOR_SANDYBRIDGE:
25954 case PROCESSOR_HASWELL:
25955 case PROCESSOR_BONNELL:
25956 case PROCESSOR_SILVERMONT:
25957 case PROCESSOR_INTEL:
25958 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25959 as many instructions can be executed on a cycle, i.e.,
25960 issue_rate. I wonder why tuning for many CPUs does not do this. */
25961 if (reload_completed)
25962 return ix86_issue_rate ();
25963 /* Don't use lookahead for pre-reload schedule to save compile time. */
25964 return 0;
25965
25966 default:
25967 return 0;
25968 }
25969 }
25970
25971 /* Return true if target platform supports macro-fusion. */
25972
25973 static bool
25974 ix86_macro_fusion_p ()
25975 {
25976 return TARGET_FUSE_CMP_AND_BRANCH;
25977 }
25978
25979 /* Check whether current microarchitecture support macro fusion
25980 for insn pair "CONDGEN + CONDJMP". Refer to
25981 "Intel Architectures Optimization Reference Manual". */
25982
25983 static bool
25984 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25985 {
25986 rtx src, dest;
25987 enum rtx_code ccode;
25988 rtx compare_set = NULL_RTX, test_if, cond;
25989 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25990
25991 if (!any_condjump_p (condjmp))
25992 return false;
25993
25994 if (get_attr_type (condgen) != TYPE_TEST
25995 && get_attr_type (condgen) != TYPE_ICMP
25996 && get_attr_type (condgen) != TYPE_INCDEC
25997 && get_attr_type (condgen) != TYPE_ALU)
25998 return false;
25999
26000 compare_set = single_set (condgen);
26001 if (compare_set == NULL_RTX
26002 && !TARGET_FUSE_ALU_AND_BRANCH)
26003 return false;
26004
26005 if (compare_set == NULL_RTX)
26006 {
26007 int i;
26008 rtx pat = PATTERN (condgen);
26009 for (i = 0; i < XVECLEN (pat, 0); i++)
26010 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26011 {
26012 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26013 if (GET_CODE (set_src) == COMPARE)
26014 compare_set = XVECEXP (pat, 0, i);
26015 else
26016 alu_set = XVECEXP (pat, 0, i);
26017 }
26018 }
26019 if (compare_set == NULL_RTX)
26020 return false;
26021 src = SET_SRC (compare_set);
26022 if (GET_CODE (src) != COMPARE)
26023 return false;
26024
26025 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26026 supported. */
26027 if ((MEM_P (XEXP (src, 0))
26028 && CONST_INT_P (XEXP (src, 1)))
26029 || (MEM_P (XEXP (src, 1))
26030 && CONST_INT_P (XEXP (src, 0))))
26031 return false;
26032
26033 /* No fusion for RIP-relative address. */
26034 if (MEM_P (XEXP (src, 0)))
26035 addr = XEXP (XEXP (src, 0), 0);
26036 else if (MEM_P (XEXP (src, 1)))
26037 addr = XEXP (XEXP (src, 1), 0);
26038
26039 if (addr) {
26040 ix86_address parts;
26041 int ok = ix86_decompose_address (addr, &parts);
26042 gcc_assert (ok);
26043
26044 if (rip_relative_addr_p (&parts))
26045 return false;
26046 }
26047
26048 test_if = SET_SRC (pc_set (condjmp));
26049 cond = XEXP (test_if, 0);
26050 ccode = GET_CODE (cond);
26051 /* Check whether conditional jump use Sign or Overflow Flags. */
26052 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26053 && (ccode == GE
26054 || ccode == GT
26055 || ccode == LE
26056 || ccode == LT))
26057 return false;
26058
26059 /* Return true for TYPE_TEST and TYPE_ICMP. */
26060 if (get_attr_type (condgen) == TYPE_TEST
26061 || get_attr_type (condgen) == TYPE_ICMP)
26062 return true;
26063
26064 /* The following is the case that macro-fusion for alu + jmp. */
26065 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26066 return false;
26067
26068 /* No fusion for alu op with memory destination operand. */
26069 dest = SET_DEST (alu_set);
26070 if (MEM_P (dest))
26071 return false;
26072
26073 /* Macro-fusion for inc/dec + unsigned conditional jump is not
26074 supported. */
26075 if (get_attr_type (condgen) == TYPE_INCDEC
26076 && (ccode == GEU
26077 || ccode == GTU
26078 || ccode == LEU
26079 || ccode == LTU))
26080 return false;
26081
26082 return true;
26083 }
26084
26085 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
26086 execution. It is applied if
26087 (1) IMUL instruction is on the top of list;
26088 (2) There exists the only producer of independent IMUL instruction in
26089 ready list.
26090 Return index of IMUL producer if it was found and -1 otherwise. */
26091 static int
26092 do_reorder_for_imul (rtx_insn **ready, int n_ready)
26093 {
26094 rtx_insn *insn;
26095 rtx set, insn1, insn2;
26096 sd_iterator_def sd_it;
26097 dep_t dep;
26098 int index = -1;
26099 int i;
26100
26101 if (!TARGET_BONNELL)
26102 return index;
26103
26104 /* Check that IMUL instruction is on the top of ready list. */
26105 insn = ready[n_ready - 1];
26106 set = single_set (insn);
26107 if (!set)
26108 return index;
26109 if (!(GET_CODE (SET_SRC (set)) == MULT
26110 && GET_MODE (SET_SRC (set)) == SImode))
26111 return index;
26112
26113 /* Search for producer of independent IMUL instruction. */
26114 for (i = n_ready - 2; i >= 0; i--)
26115 {
26116 insn = ready[i];
26117 if (!NONDEBUG_INSN_P (insn))
26118 continue;
26119 /* Skip IMUL instruction. */
26120 insn2 = PATTERN (insn);
26121 if (GET_CODE (insn2) == PARALLEL)
26122 insn2 = XVECEXP (insn2, 0, 0);
26123 if (GET_CODE (insn2) == SET
26124 && GET_CODE (SET_SRC (insn2)) == MULT
26125 && GET_MODE (SET_SRC (insn2)) == SImode)
26126 continue;
26127
26128 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26129 {
26130 rtx con;
26131 con = DEP_CON (dep);
26132 if (!NONDEBUG_INSN_P (con))
26133 continue;
26134 insn1 = PATTERN (con);
26135 if (GET_CODE (insn1) == PARALLEL)
26136 insn1 = XVECEXP (insn1, 0, 0);
26137
26138 if (GET_CODE (insn1) == SET
26139 && GET_CODE (SET_SRC (insn1)) == MULT
26140 && GET_MODE (SET_SRC (insn1)) == SImode)
26141 {
26142 sd_iterator_def sd_it1;
26143 dep_t dep1;
26144 /* Check if there is no other dependee for IMUL. */
26145 index = i;
26146 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26147 {
26148 rtx pro;
26149 pro = DEP_PRO (dep1);
26150 if (!NONDEBUG_INSN_P (pro))
26151 continue;
26152 if (pro != insn)
26153 index = -1;
26154 }
26155 if (index >= 0)
26156 break;
26157 }
26158 }
26159 if (index >= 0)
26160 break;
26161 }
26162 return index;
26163 }
26164
26165 /* Try to find the best candidate on the top of ready list if two insns
26166 have the same priority - candidate is best if its dependees were
26167 scheduled earlier. Applied for Silvermont only.
26168 Return true if top 2 insns must be interchanged. */
26169 static bool
26170 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26171 {
26172 rtx_insn *top = ready[n_ready - 1];
26173 rtx_insn *next = ready[n_ready - 2];
26174 rtx set;
26175 sd_iterator_def sd_it;
26176 dep_t dep;
26177 int clock1 = -1;
26178 int clock2 = -1;
26179 #define INSN_TICK(INSN) (HID (INSN)->tick)
26180
26181 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26182 return false;
26183
26184 if (!NONDEBUG_INSN_P (top))
26185 return false;
26186 if (!NONJUMP_INSN_P (top))
26187 return false;
26188 if (!NONDEBUG_INSN_P (next))
26189 return false;
26190 if (!NONJUMP_INSN_P (next))
26191 return false;
26192 set = single_set (top);
26193 if (!set)
26194 return false;
26195 set = single_set (next);
26196 if (!set)
26197 return false;
26198
26199 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26200 {
26201 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26202 return false;
26203 /* Determine winner more precise. */
26204 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26205 {
26206 rtx pro;
26207 pro = DEP_PRO (dep);
26208 if (!NONDEBUG_INSN_P (pro))
26209 continue;
26210 if (INSN_TICK (pro) > clock1)
26211 clock1 = INSN_TICK (pro);
26212 }
26213 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26214 {
26215 rtx pro;
26216 pro = DEP_PRO (dep);
26217 if (!NONDEBUG_INSN_P (pro))
26218 continue;
26219 if (INSN_TICK (pro) > clock2)
26220 clock2 = INSN_TICK (pro);
26221 }
26222
26223 if (clock1 == clock2)
26224 {
26225 /* Determine winner - load must win. */
26226 enum attr_memory memory1, memory2;
26227 memory1 = get_attr_memory (top);
26228 memory2 = get_attr_memory (next);
26229 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26230 return true;
26231 }
26232 return (bool) (clock2 < clock1);
26233 }
26234 return false;
26235 #undef INSN_TICK
26236 }
26237
26238 /* Perform possible reodering of ready list for Atom/Silvermont only.
26239 Return issue rate. */
26240 static int
26241 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26242 int *pn_ready, int clock_var)
26243 {
26244 int issue_rate = -1;
26245 int n_ready = *pn_ready;
26246 int i;
26247 rtx_insn *insn;
26248 int index = -1;
26249
26250 /* Set up issue rate. */
26251 issue_rate = ix86_issue_rate ();
26252
26253 /* Do reodering for BONNELL/SILVERMONT only. */
26254 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26255 return issue_rate;
26256
26257 /* Nothing to do if ready list contains only 1 instruction. */
26258 if (n_ready <= 1)
26259 return issue_rate;
26260
26261 /* Do reodering for post-reload scheduler only. */
26262 if (!reload_completed)
26263 return issue_rate;
26264
26265 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26266 {
26267 if (sched_verbose > 1)
26268 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26269 INSN_UID (ready[index]));
26270
26271 /* Put IMUL producer (ready[index]) at the top of ready list. */
26272 insn = ready[index];
26273 for (i = index; i < n_ready - 1; i++)
26274 ready[i] = ready[i + 1];
26275 ready[n_ready - 1] = insn;
26276 return issue_rate;
26277 }
26278 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26279 {
26280 if (sched_verbose > 1)
26281 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26282 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26283 /* Swap 2 top elements of ready list. */
26284 insn = ready[n_ready - 1];
26285 ready[n_ready - 1] = ready[n_ready - 2];
26286 ready[n_ready - 2] = insn;
26287 }
26288 return issue_rate;
26289 }
26290
26291 static bool
26292 ix86_class_likely_spilled_p (reg_class_t);
26293
26294 /* Returns true if lhs of insn is HW function argument register and set up
26295 is_spilled to true if it is likely spilled HW register. */
26296 static bool
26297 insn_is_function_arg (rtx insn, bool* is_spilled)
26298 {
26299 rtx dst;
26300
26301 if (!NONDEBUG_INSN_P (insn))
26302 return false;
26303 /* Call instructions are not movable, ignore it. */
26304 if (CALL_P (insn))
26305 return false;
26306 insn = PATTERN (insn);
26307 if (GET_CODE (insn) == PARALLEL)
26308 insn = XVECEXP (insn, 0, 0);
26309 if (GET_CODE (insn) != SET)
26310 return false;
26311 dst = SET_DEST (insn);
26312 if (REG_P (dst) && HARD_REGISTER_P (dst)
26313 && ix86_function_arg_regno_p (REGNO (dst)))
26314 {
26315 /* Is it likely spilled HW register? */
26316 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26317 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26318 *is_spilled = true;
26319 return true;
26320 }
26321 return false;
26322 }
26323
26324 /* Add output dependencies for chain of function adjacent arguments if only
26325 there is a move to likely spilled HW register. Return first argument
26326 if at least one dependence was added or NULL otherwise. */
26327 static rtx_insn *
26328 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26329 {
26330 rtx_insn *insn;
26331 rtx_insn *last = call;
26332 rtx_insn *first_arg = NULL;
26333 bool is_spilled = false;
26334
26335 head = PREV_INSN (head);
26336
26337 /* Find nearest to call argument passing instruction. */
26338 while (true)
26339 {
26340 last = PREV_INSN (last);
26341 if (last == head)
26342 return NULL;
26343 if (!NONDEBUG_INSN_P (last))
26344 continue;
26345 if (insn_is_function_arg (last, &is_spilled))
26346 break;
26347 return NULL;
26348 }
26349
26350 first_arg = last;
26351 while (true)
26352 {
26353 insn = PREV_INSN (last);
26354 if (!INSN_P (insn))
26355 break;
26356 if (insn == head)
26357 break;
26358 if (!NONDEBUG_INSN_P (insn))
26359 {
26360 last = insn;
26361 continue;
26362 }
26363 if (insn_is_function_arg (insn, &is_spilled))
26364 {
26365 /* Add output depdendence between two function arguments if chain
26366 of output arguments contains likely spilled HW registers. */
26367 if (is_spilled)
26368 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26369 first_arg = last = insn;
26370 }
26371 else
26372 break;
26373 }
26374 if (!is_spilled)
26375 return NULL;
26376 return first_arg;
26377 }
26378
26379 /* Add output or anti dependency from insn to first_arg to restrict its code
26380 motion. */
26381 static void
26382 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26383 {
26384 rtx set;
26385 rtx tmp;
26386
26387 set = single_set (insn);
26388 if (!set)
26389 return;
26390 tmp = SET_DEST (set);
26391 if (REG_P (tmp))
26392 {
26393 /* Add output dependency to the first function argument. */
26394 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26395 return;
26396 }
26397 /* Add anti dependency. */
26398 add_dependence (first_arg, insn, REG_DEP_ANTI);
26399 }
26400
26401 /* Avoid cross block motion of function argument through adding dependency
26402 from the first non-jump instruction in bb. */
26403 static void
26404 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26405 {
26406 rtx_insn *insn = BB_END (bb);
26407
26408 while (insn)
26409 {
26410 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26411 {
26412 rtx set = single_set (insn);
26413 if (set)
26414 {
26415 avoid_func_arg_motion (arg, insn);
26416 return;
26417 }
26418 }
26419 if (insn == BB_HEAD (bb))
26420 return;
26421 insn = PREV_INSN (insn);
26422 }
26423 }
26424
26425 /* Hook for pre-reload schedule - avoid motion of function arguments
26426 passed in likely spilled HW registers. */
26427 static void
26428 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26429 {
26430 rtx_insn *insn;
26431 rtx_insn *first_arg = NULL;
26432 if (reload_completed)
26433 return;
26434 while (head != tail && DEBUG_INSN_P (head))
26435 head = NEXT_INSN (head);
26436 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26437 if (INSN_P (insn) && CALL_P (insn))
26438 {
26439 first_arg = add_parameter_dependencies (insn, head);
26440 if (first_arg)
26441 {
26442 /* Add dependee for first argument to predecessors if only
26443 region contains more than one block. */
26444 basic_block bb = BLOCK_FOR_INSN (insn);
26445 int rgn = CONTAINING_RGN (bb->index);
26446 int nr_blks = RGN_NR_BLOCKS (rgn);
26447 /* Skip trivial regions and region head blocks that can have
26448 predecessors outside of region. */
26449 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26450 {
26451 edge e;
26452 edge_iterator ei;
26453
26454 /* Regions are SCCs with the exception of selective
26455 scheduling with pipelining of outer blocks enabled.
26456 So also check that immediate predecessors of a non-head
26457 block are in the same region. */
26458 FOR_EACH_EDGE (e, ei, bb->preds)
26459 {
26460 /* Avoid creating of loop-carried dependencies through
26461 using topological ordering in the region. */
26462 if (rgn == CONTAINING_RGN (e->src->index)
26463 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26464 add_dependee_for_func_arg (first_arg, e->src);
26465 }
26466 }
26467 insn = first_arg;
26468 if (insn == head)
26469 break;
26470 }
26471 }
26472 else if (first_arg)
26473 avoid_func_arg_motion (first_arg, insn);
26474 }
26475
26476 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26477 HW registers to maximum, to schedule them at soon as possible. These are
26478 moves from function argument registers at the top of the function entry
26479 and moves from function return value registers after call. */
26480 static int
26481 ix86_adjust_priority (rtx_insn *insn, int priority)
26482 {
26483 rtx set;
26484
26485 if (reload_completed)
26486 return priority;
26487
26488 if (!NONDEBUG_INSN_P (insn))
26489 return priority;
26490
26491 set = single_set (insn);
26492 if (set)
26493 {
26494 rtx tmp = SET_SRC (set);
26495 if (REG_P (tmp)
26496 && HARD_REGISTER_P (tmp)
26497 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26498 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26499 return current_sched_info->sched_max_insns_priority;
26500 }
26501
26502 return priority;
26503 }
26504
26505 /* Model decoder of Core 2/i7.
26506 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26507 track the instruction fetch block boundaries and make sure that long
26508 (9+ bytes) instructions are assigned to D0. */
26509
26510 /* Maximum length of an insn that can be handled by
26511 a secondary decoder unit. '8' for Core 2/i7. */
26512 static int core2i7_secondary_decoder_max_insn_size;
26513
26514 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26515 '16' for Core 2/i7. */
26516 static int core2i7_ifetch_block_size;
26517
26518 /* Maximum number of instructions decoder can handle per cycle.
26519 '6' for Core 2/i7. */
26520 static int core2i7_ifetch_block_max_insns;
26521
26522 typedef struct ix86_first_cycle_multipass_data_ *
26523 ix86_first_cycle_multipass_data_t;
26524 typedef const struct ix86_first_cycle_multipass_data_ *
26525 const_ix86_first_cycle_multipass_data_t;
26526
26527 /* A variable to store target state across calls to max_issue within
26528 one cycle. */
26529 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26530 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26531
26532 /* Initialize DATA. */
26533 static void
26534 core2i7_first_cycle_multipass_init (void *_data)
26535 {
26536 ix86_first_cycle_multipass_data_t data
26537 = (ix86_first_cycle_multipass_data_t) _data;
26538
26539 data->ifetch_block_len = 0;
26540 data->ifetch_block_n_insns = 0;
26541 data->ready_try_change = NULL;
26542 data->ready_try_change_size = 0;
26543 }
26544
26545 /* Advancing the cycle; reset ifetch block counts. */
26546 static void
26547 core2i7_dfa_post_advance_cycle (void)
26548 {
26549 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26550
26551 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26552
26553 data->ifetch_block_len = 0;
26554 data->ifetch_block_n_insns = 0;
26555 }
26556
26557 static int min_insn_size (rtx_insn *);
26558
26559 /* Filter out insns from ready_try that the core will not be able to issue
26560 on current cycle due to decoder. */
26561 static void
26562 core2i7_first_cycle_multipass_filter_ready_try
26563 (const_ix86_first_cycle_multipass_data_t data,
26564 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26565 {
26566 while (n_ready--)
26567 {
26568 rtx_insn *insn;
26569 int insn_size;
26570
26571 if (ready_try[n_ready])
26572 continue;
26573
26574 insn = get_ready_element (n_ready);
26575 insn_size = min_insn_size (insn);
26576
26577 if (/* If this is a too long an insn for a secondary decoder ... */
26578 (!first_cycle_insn_p
26579 && insn_size > core2i7_secondary_decoder_max_insn_size)
26580 /* ... or it would not fit into the ifetch block ... */
26581 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26582 /* ... or the decoder is full already ... */
26583 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26584 /* ... mask the insn out. */
26585 {
26586 ready_try[n_ready] = 1;
26587
26588 if (data->ready_try_change)
26589 bitmap_set_bit (data->ready_try_change, n_ready);
26590 }
26591 }
26592 }
26593
26594 /* Prepare for a new round of multipass lookahead scheduling. */
26595 static void
26596 core2i7_first_cycle_multipass_begin (void *_data,
26597 signed char *ready_try, int n_ready,
26598 bool first_cycle_insn_p)
26599 {
26600 ix86_first_cycle_multipass_data_t data
26601 = (ix86_first_cycle_multipass_data_t) _data;
26602 const_ix86_first_cycle_multipass_data_t prev_data
26603 = ix86_first_cycle_multipass_data;
26604
26605 /* Restore the state from the end of the previous round. */
26606 data->ifetch_block_len = prev_data->ifetch_block_len;
26607 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26608
26609 /* Filter instructions that cannot be issued on current cycle due to
26610 decoder restrictions. */
26611 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26612 first_cycle_insn_p);
26613 }
26614
26615 /* INSN is being issued in current solution. Account for its impact on
26616 the decoder model. */
26617 static void
26618 core2i7_first_cycle_multipass_issue (void *_data,
26619 signed char *ready_try, int n_ready,
26620 rtx_insn *insn, const void *_prev_data)
26621 {
26622 ix86_first_cycle_multipass_data_t data
26623 = (ix86_first_cycle_multipass_data_t) _data;
26624 const_ix86_first_cycle_multipass_data_t prev_data
26625 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26626
26627 int insn_size = min_insn_size (insn);
26628
26629 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26630 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26631 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26632 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26633
26634 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26635 if (!data->ready_try_change)
26636 {
26637 data->ready_try_change = sbitmap_alloc (n_ready);
26638 data->ready_try_change_size = n_ready;
26639 }
26640 else if (data->ready_try_change_size < n_ready)
26641 {
26642 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26643 n_ready, 0);
26644 data->ready_try_change_size = n_ready;
26645 }
26646 bitmap_clear (data->ready_try_change);
26647
26648 /* Filter out insns from ready_try that the core will not be able to issue
26649 on current cycle due to decoder. */
26650 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26651 false);
26652 }
26653
26654 /* Revert the effect on ready_try. */
26655 static void
26656 core2i7_first_cycle_multipass_backtrack (const void *_data,
26657 signed char *ready_try,
26658 int n_ready ATTRIBUTE_UNUSED)
26659 {
26660 const_ix86_first_cycle_multipass_data_t data
26661 = (const_ix86_first_cycle_multipass_data_t) _data;
26662 unsigned int i = 0;
26663 sbitmap_iterator sbi;
26664
26665 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26666 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26667 {
26668 ready_try[i] = 0;
26669 }
26670 }
26671
26672 /* Save the result of multipass lookahead scheduling for the next round. */
26673 static void
26674 core2i7_first_cycle_multipass_end (const void *_data)
26675 {
26676 const_ix86_first_cycle_multipass_data_t data
26677 = (const_ix86_first_cycle_multipass_data_t) _data;
26678 ix86_first_cycle_multipass_data_t next_data
26679 = ix86_first_cycle_multipass_data;
26680
26681 if (data != NULL)
26682 {
26683 next_data->ifetch_block_len = data->ifetch_block_len;
26684 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26685 }
26686 }
26687
26688 /* Deallocate target data. */
26689 static void
26690 core2i7_first_cycle_multipass_fini (void *_data)
26691 {
26692 ix86_first_cycle_multipass_data_t data
26693 = (ix86_first_cycle_multipass_data_t) _data;
26694
26695 if (data->ready_try_change)
26696 {
26697 sbitmap_free (data->ready_try_change);
26698 data->ready_try_change = NULL;
26699 data->ready_try_change_size = 0;
26700 }
26701 }
26702
26703 /* Prepare for scheduling pass. */
26704 static void
26705 ix86_sched_init_global (FILE *, int, int)
26706 {
26707 /* Install scheduling hooks for current CPU. Some of these hooks are used
26708 in time-critical parts of the scheduler, so we only set them up when
26709 they are actually used. */
26710 switch (ix86_tune)
26711 {
26712 case PROCESSOR_CORE2:
26713 case PROCESSOR_NEHALEM:
26714 case PROCESSOR_SANDYBRIDGE:
26715 case PROCESSOR_HASWELL:
26716 /* Do not perform multipass scheduling for pre-reload schedule
26717 to save compile time. */
26718 if (reload_completed)
26719 {
26720 targetm.sched.dfa_post_advance_cycle
26721 = core2i7_dfa_post_advance_cycle;
26722 targetm.sched.first_cycle_multipass_init
26723 = core2i7_first_cycle_multipass_init;
26724 targetm.sched.first_cycle_multipass_begin
26725 = core2i7_first_cycle_multipass_begin;
26726 targetm.sched.first_cycle_multipass_issue
26727 = core2i7_first_cycle_multipass_issue;
26728 targetm.sched.first_cycle_multipass_backtrack
26729 = core2i7_first_cycle_multipass_backtrack;
26730 targetm.sched.first_cycle_multipass_end
26731 = core2i7_first_cycle_multipass_end;
26732 targetm.sched.first_cycle_multipass_fini
26733 = core2i7_first_cycle_multipass_fini;
26734
26735 /* Set decoder parameters. */
26736 core2i7_secondary_decoder_max_insn_size = 8;
26737 core2i7_ifetch_block_size = 16;
26738 core2i7_ifetch_block_max_insns = 6;
26739 break;
26740 }
26741 /* ... Fall through ... */
26742 default:
26743 targetm.sched.dfa_post_advance_cycle = NULL;
26744 targetm.sched.first_cycle_multipass_init = NULL;
26745 targetm.sched.first_cycle_multipass_begin = NULL;
26746 targetm.sched.first_cycle_multipass_issue = NULL;
26747 targetm.sched.first_cycle_multipass_backtrack = NULL;
26748 targetm.sched.first_cycle_multipass_end = NULL;
26749 targetm.sched.first_cycle_multipass_fini = NULL;
26750 break;
26751 }
26752 }
26753
26754 \f
26755 /* Compute the alignment given to a constant that is being placed in memory.
26756 EXP is the constant and ALIGN is the alignment that the object would
26757 ordinarily have.
26758 The value of this function is used instead of that alignment to align
26759 the object. */
26760
26761 int
26762 ix86_constant_alignment (tree exp, int align)
26763 {
26764 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26765 || TREE_CODE (exp) == INTEGER_CST)
26766 {
26767 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26768 return 64;
26769 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26770 return 128;
26771 }
26772 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26773 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26774 return BITS_PER_WORD;
26775
26776 return align;
26777 }
26778
26779 /* Compute the alignment for a static variable.
26780 TYPE is the data type, and ALIGN is the alignment that
26781 the object would ordinarily have. The value of this function is used
26782 instead of that alignment to align the object. */
26783
26784 int
26785 ix86_data_alignment (tree type, int align, bool opt)
26786 {
26787 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26788 for symbols from other compilation units or symbols that don't need
26789 to bind locally. In order to preserve some ABI compatibility with
26790 those compilers, ensure we don't decrease alignment from what we
26791 used to assume. */
26792
26793 int max_align_compat
26794 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26795
26796 /* A data structure, equal or greater than the size of a cache line
26797 (64 bytes in the Pentium 4 and other recent Intel processors, including
26798 processors based on Intel Core microarchitecture) should be aligned
26799 so that its base address is a multiple of a cache line size. */
26800
26801 int max_align
26802 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26803
26804 if (max_align < BITS_PER_WORD)
26805 max_align = BITS_PER_WORD;
26806
26807 if (opt
26808 && AGGREGATE_TYPE_P (type)
26809 && TYPE_SIZE (type)
26810 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26811 {
26812 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26813 && align < max_align_compat)
26814 align = max_align_compat;
26815 if (wi::geu_p (TYPE_SIZE (type), max_align)
26816 && align < max_align)
26817 align = max_align;
26818 }
26819
26820 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26821 to 16byte boundary. */
26822 if (TARGET_64BIT)
26823 {
26824 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26825 && TYPE_SIZE (type)
26826 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26827 && wi::geu_p (TYPE_SIZE (type), 128)
26828 && align < 128)
26829 return 128;
26830 }
26831
26832 if (!opt)
26833 return align;
26834
26835 if (TREE_CODE (type) == ARRAY_TYPE)
26836 {
26837 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26838 return 64;
26839 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26840 return 128;
26841 }
26842 else if (TREE_CODE (type) == COMPLEX_TYPE)
26843 {
26844
26845 if (TYPE_MODE (type) == DCmode && align < 64)
26846 return 64;
26847 if ((TYPE_MODE (type) == XCmode
26848 || TYPE_MODE (type) == TCmode) && align < 128)
26849 return 128;
26850 }
26851 else if ((TREE_CODE (type) == RECORD_TYPE
26852 || TREE_CODE (type) == UNION_TYPE
26853 || TREE_CODE (type) == QUAL_UNION_TYPE)
26854 && TYPE_FIELDS (type))
26855 {
26856 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26857 return 64;
26858 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26859 return 128;
26860 }
26861 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26862 || TREE_CODE (type) == INTEGER_TYPE)
26863 {
26864 if (TYPE_MODE (type) == DFmode && align < 64)
26865 return 64;
26866 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26867 return 128;
26868 }
26869
26870 return align;
26871 }
26872
26873 /* Compute the alignment for a local variable or a stack slot. EXP is
26874 the data type or decl itself, MODE is the widest mode available and
26875 ALIGN is the alignment that the object would ordinarily have. The
26876 value of this macro is used instead of that alignment to align the
26877 object. */
26878
26879 unsigned int
26880 ix86_local_alignment (tree exp, enum machine_mode mode,
26881 unsigned int align)
26882 {
26883 tree type, decl;
26884
26885 if (exp && DECL_P (exp))
26886 {
26887 type = TREE_TYPE (exp);
26888 decl = exp;
26889 }
26890 else
26891 {
26892 type = exp;
26893 decl = NULL;
26894 }
26895
26896 /* Don't do dynamic stack realignment for long long objects with
26897 -mpreferred-stack-boundary=2. */
26898 if (!TARGET_64BIT
26899 && align == 64
26900 && ix86_preferred_stack_boundary < 64
26901 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26902 && (!type || !TYPE_USER_ALIGN (type))
26903 && (!decl || !DECL_USER_ALIGN (decl)))
26904 align = 32;
26905
26906 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26907 register in MODE. We will return the largest alignment of XF
26908 and DF. */
26909 if (!type)
26910 {
26911 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26912 align = GET_MODE_ALIGNMENT (DFmode);
26913 return align;
26914 }
26915
26916 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26917 to 16byte boundary. Exact wording is:
26918
26919 An array uses the same alignment as its elements, except that a local or
26920 global array variable of length at least 16 bytes or
26921 a C99 variable-length array variable always has alignment of at least 16 bytes.
26922
26923 This was added to allow use of aligned SSE instructions at arrays. This
26924 rule is meant for static storage (where compiler can not do the analysis
26925 by itself). We follow it for automatic variables only when convenient.
26926 We fully control everything in the function compiled and functions from
26927 other unit can not rely on the alignment.
26928
26929 Exclude va_list type. It is the common case of local array where
26930 we can not benefit from the alignment.
26931
26932 TODO: Probably one should optimize for size only when var is not escaping. */
26933 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26934 && TARGET_SSE)
26935 {
26936 if (AGGREGATE_TYPE_P (type)
26937 && (va_list_type_node == NULL_TREE
26938 || (TYPE_MAIN_VARIANT (type)
26939 != TYPE_MAIN_VARIANT (va_list_type_node)))
26940 && TYPE_SIZE (type)
26941 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26942 && wi::geu_p (TYPE_SIZE (type), 16)
26943 && align < 128)
26944 return 128;
26945 }
26946 if (TREE_CODE (type) == ARRAY_TYPE)
26947 {
26948 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26949 return 64;
26950 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26951 return 128;
26952 }
26953 else if (TREE_CODE (type) == COMPLEX_TYPE)
26954 {
26955 if (TYPE_MODE (type) == DCmode && align < 64)
26956 return 64;
26957 if ((TYPE_MODE (type) == XCmode
26958 || TYPE_MODE (type) == TCmode) && align < 128)
26959 return 128;
26960 }
26961 else if ((TREE_CODE (type) == RECORD_TYPE
26962 || TREE_CODE (type) == UNION_TYPE
26963 || TREE_CODE (type) == QUAL_UNION_TYPE)
26964 && TYPE_FIELDS (type))
26965 {
26966 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26967 return 64;
26968 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26969 return 128;
26970 }
26971 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26972 || TREE_CODE (type) == INTEGER_TYPE)
26973 {
26974
26975 if (TYPE_MODE (type) == DFmode && align < 64)
26976 return 64;
26977 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26978 return 128;
26979 }
26980 return align;
26981 }
26982
26983 /* Compute the minimum required alignment for dynamic stack realignment
26984 purposes for a local variable, parameter or a stack slot. EXP is
26985 the data type or decl itself, MODE is its mode and ALIGN is the
26986 alignment that the object would ordinarily have. */
26987
26988 unsigned int
26989 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26990 unsigned int align)
26991 {
26992 tree type, decl;
26993
26994 if (exp && DECL_P (exp))
26995 {
26996 type = TREE_TYPE (exp);
26997 decl = exp;
26998 }
26999 else
27000 {
27001 type = exp;
27002 decl = NULL;
27003 }
27004
27005 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27006 return align;
27007
27008 /* Don't do dynamic stack realignment for long long objects with
27009 -mpreferred-stack-boundary=2. */
27010 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27011 && (!type || !TYPE_USER_ALIGN (type))
27012 && (!decl || !DECL_USER_ALIGN (decl)))
27013 return 32;
27014
27015 return align;
27016 }
27017 \f
27018 /* Find a location for the static chain incoming to a nested function.
27019 This is a register, unless all free registers are used by arguments. */
27020
27021 static rtx
27022 ix86_static_chain (const_tree fndecl, bool incoming_p)
27023 {
27024 unsigned regno;
27025
27026 if (!DECL_STATIC_CHAIN (fndecl))
27027 return NULL;
27028
27029 if (TARGET_64BIT)
27030 {
27031 /* We always use R10 in 64-bit mode. */
27032 regno = R10_REG;
27033 }
27034 else
27035 {
27036 tree fntype;
27037 unsigned int ccvt;
27038
27039 /* By default in 32-bit mode we use ECX to pass the static chain. */
27040 regno = CX_REG;
27041
27042 fntype = TREE_TYPE (fndecl);
27043 ccvt = ix86_get_callcvt (fntype);
27044 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27045 {
27046 /* Fastcall functions use ecx/edx for arguments, which leaves
27047 us with EAX for the static chain.
27048 Thiscall functions use ecx for arguments, which also
27049 leaves us with EAX for the static chain. */
27050 regno = AX_REG;
27051 }
27052 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27053 {
27054 /* Thiscall functions use ecx for arguments, which leaves
27055 us with EAX and EDX for the static chain.
27056 We are using for abi-compatibility EAX. */
27057 regno = AX_REG;
27058 }
27059 else if (ix86_function_regparm (fntype, fndecl) == 3)
27060 {
27061 /* For regparm 3, we have no free call-clobbered registers in
27062 which to store the static chain. In order to implement this,
27063 we have the trampoline push the static chain to the stack.
27064 However, we can't push a value below the return address when
27065 we call the nested function directly, so we have to use an
27066 alternate entry point. For this we use ESI, and have the
27067 alternate entry point push ESI, so that things appear the
27068 same once we're executing the nested function. */
27069 if (incoming_p)
27070 {
27071 if (fndecl == current_function_decl)
27072 ix86_static_chain_on_stack = true;
27073 return gen_frame_mem (SImode,
27074 plus_constant (Pmode,
27075 arg_pointer_rtx, -8));
27076 }
27077 regno = SI_REG;
27078 }
27079 }
27080
27081 return gen_rtx_REG (Pmode, regno);
27082 }
27083
27084 /* Emit RTL insns to initialize the variable parts of a trampoline.
27085 FNDECL is the decl of the target address; M_TRAMP is a MEM for
27086 the trampoline, and CHAIN_VALUE is an RTX for the static chain
27087 to be passed to the target function. */
27088
27089 static void
27090 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27091 {
27092 rtx mem, fnaddr;
27093 int opcode;
27094 int offset = 0;
27095
27096 fnaddr = XEXP (DECL_RTL (fndecl), 0);
27097
27098 if (TARGET_64BIT)
27099 {
27100 int size;
27101
27102 /* Load the function address to r11. Try to load address using
27103 the shorter movl instead of movabs. We may want to support
27104 movq for kernel mode, but kernel does not use trampolines at
27105 the moment. FNADDR is a 32bit address and may not be in
27106 DImode when ptr_mode == SImode. Always use movl in this
27107 case. */
27108 if (ptr_mode == SImode
27109 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27110 {
27111 fnaddr = copy_addr_to_reg (fnaddr);
27112
27113 mem = adjust_address (m_tramp, HImode, offset);
27114 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27115
27116 mem = adjust_address (m_tramp, SImode, offset + 2);
27117 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27118 offset += 6;
27119 }
27120 else
27121 {
27122 mem = adjust_address (m_tramp, HImode, offset);
27123 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27124
27125 mem = adjust_address (m_tramp, DImode, offset + 2);
27126 emit_move_insn (mem, fnaddr);
27127 offset += 10;
27128 }
27129
27130 /* Load static chain using movabs to r10. Use the shorter movl
27131 instead of movabs when ptr_mode == SImode. */
27132 if (ptr_mode == SImode)
27133 {
27134 opcode = 0xba41;
27135 size = 6;
27136 }
27137 else
27138 {
27139 opcode = 0xba49;
27140 size = 10;
27141 }
27142
27143 mem = adjust_address (m_tramp, HImode, offset);
27144 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27145
27146 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27147 emit_move_insn (mem, chain_value);
27148 offset += size;
27149
27150 /* Jump to r11; the last (unused) byte is a nop, only there to
27151 pad the write out to a single 32-bit store. */
27152 mem = adjust_address (m_tramp, SImode, offset);
27153 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27154 offset += 4;
27155 }
27156 else
27157 {
27158 rtx disp, chain;
27159
27160 /* Depending on the static chain location, either load a register
27161 with a constant, or push the constant to the stack. All of the
27162 instructions are the same size. */
27163 chain = ix86_static_chain (fndecl, true);
27164 if (REG_P (chain))
27165 {
27166 switch (REGNO (chain))
27167 {
27168 case AX_REG:
27169 opcode = 0xb8; break;
27170 case CX_REG:
27171 opcode = 0xb9; break;
27172 default:
27173 gcc_unreachable ();
27174 }
27175 }
27176 else
27177 opcode = 0x68;
27178
27179 mem = adjust_address (m_tramp, QImode, offset);
27180 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27181
27182 mem = adjust_address (m_tramp, SImode, offset + 1);
27183 emit_move_insn (mem, chain_value);
27184 offset += 5;
27185
27186 mem = adjust_address (m_tramp, QImode, offset);
27187 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27188
27189 mem = adjust_address (m_tramp, SImode, offset + 1);
27190
27191 /* Compute offset from the end of the jmp to the target function.
27192 In the case in which the trampoline stores the static chain on
27193 the stack, we need to skip the first insn which pushes the
27194 (call-saved) register static chain; this push is 1 byte. */
27195 offset += 5;
27196 disp = expand_binop (SImode, sub_optab, fnaddr,
27197 plus_constant (Pmode, XEXP (m_tramp, 0),
27198 offset - (MEM_P (chain) ? 1 : 0)),
27199 NULL_RTX, 1, OPTAB_DIRECT);
27200 emit_move_insn (mem, disp);
27201 }
27202
27203 gcc_assert (offset <= TRAMPOLINE_SIZE);
27204
27205 #ifdef HAVE_ENABLE_EXECUTE_STACK
27206 #ifdef CHECK_EXECUTE_STACK_ENABLED
27207 if (CHECK_EXECUTE_STACK_ENABLED)
27208 #endif
27209 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27210 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27211 #endif
27212 }
27213 \f
27214 /* The following file contains several enumerations and data structures
27215 built from the definitions in i386-builtin-types.def. */
27216
27217 #include "i386-builtin-types.inc"
27218
27219 /* Table for the ix86 builtin non-function types. */
27220 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27221
27222 /* Retrieve an element from the above table, building some of
27223 the types lazily. */
27224
27225 static tree
27226 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27227 {
27228 unsigned int index;
27229 tree type, itype;
27230
27231 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27232
27233 type = ix86_builtin_type_tab[(int) tcode];
27234 if (type != NULL)
27235 return type;
27236
27237 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27238 if (tcode <= IX86_BT_LAST_VECT)
27239 {
27240 enum machine_mode mode;
27241
27242 index = tcode - IX86_BT_LAST_PRIM - 1;
27243 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27244 mode = ix86_builtin_type_vect_mode[index];
27245
27246 type = build_vector_type_for_mode (itype, mode);
27247 }
27248 else
27249 {
27250 int quals;
27251
27252 index = tcode - IX86_BT_LAST_VECT - 1;
27253 if (tcode <= IX86_BT_LAST_PTR)
27254 quals = TYPE_UNQUALIFIED;
27255 else
27256 quals = TYPE_QUAL_CONST;
27257
27258 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27259 if (quals != TYPE_UNQUALIFIED)
27260 itype = build_qualified_type (itype, quals);
27261
27262 type = build_pointer_type (itype);
27263 }
27264
27265 ix86_builtin_type_tab[(int) tcode] = type;
27266 return type;
27267 }
27268
27269 /* Table for the ix86 builtin function types. */
27270 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27271
27272 /* Retrieve an element from the above table, building some of
27273 the types lazily. */
27274
27275 static tree
27276 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27277 {
27278 tree type;
27279
27280 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27281
27282 type = ix86_builtin_func_type_tab[(int) tcode];
27283 if (type != NULL)
27284 return type;
27285
27286 if (tcode <= IX86_BT_LAST_FUNC)
27287 {
27288 unsigned start = ix86_builtin_func_start[(int) tcode];
27289 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27290 tree rtype, atype, args = void_list_node;
27291 unsigned i;
27292
27293 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27294 for (i = after - 1; i > start; --i)
27295 {
27296 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27297 args = tree_cons (NULL, atype, args);
27298 }
27299
27300 type = build_function_type (rtype, args);
27301 }
27302 else
27303 {
27304 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27305 enum ix86_builtin_func_type icode;
27306
27307 icode = ix86_builtin_func_alias_base[index];
27308 type = ix86_get_builtin_func_type (icode);
27309 }
27310
27311 ix86_builtin_func_type_tab[(int) tcode] = type;
27312 return type;
27313 }
27314
27315
27316 /* Codes for all the SSE/MMX builtins. */
27317 enum ix86_builtins
27318 {
27319 IX86_BUILTIN_ADDPS,
27320 IX86_BUILTIN_ADDSS,
27321 IX86_BUILTIN_DIVPS,
27322 IX86_BUILTIN_DIVSS,
27323 IX86_BUILTIN_MULPS,
27324 IX86_BUILTIN_MULSS,
27325 IX86_BUILTIN_SUBPS,
27326 IX86_BUILTIN_SUBSS,
27327
27328 IX86_BUILTIN_CMPEQPS,
27329 IX86_BUILTIN_CMPLTPS,
27330 IX86_BUILTIN_CMPLEPS,
27331 IX86_BUILTIN_CMPGTPS,
27332 IX86_BUILTIN_CMPGEPS,
27333 IX86_BUILTIN_CMPNEQPS,
27334 IX86_BUILTIN_CMPNLTPS,
27335 IX86_BUILTIN_CMPNLEPS,
27336 IX86_BUILTIN_CMPNGTPS,
27337 IX86_BUILTIN_CMPNGEPS,
27338 IX86_BUILTIN_CMPORDPS,
27339 IX86_BUILTIN_CMPUNORDPS,
27340 IX86_BUILTIN_CMPEQSS,
27341 IX86_BUILTIN_CMPLTSS,
27342 IX86_BUILTIN_CMPLESS,
27343 IX86_BUILTIN_CMPNEQSS,
27344 IX86_BUILTIN_CMPNLTSS,
27345 IX86_BUILTIN_CMPNLESS,
27346 IX86_BUILTIN_CMPORDSS,
27347 IX86_BUILTIN_CMPUNORDSS,
27348
27349 IX86_BUILTIN_COMIEQSS,
27350 IX86_BUILTIN_COMILTSS,
27351 IX86_BUILTIN_COMILESS,
27352 IX86_BUILTIN_COMIGTSS,
27353 IX86_BUILTIN_COMIGESS,
27354 IX86_BUILTIN_COMINEQSS,
27355 IX86_BUILTIN_UCOMIEQSS,
27356 IX86_BUILTIN_UCOMILTSS,
27357 IX86_BUILTIN_UCOMILESS,
27358 IX86_BUILTIN_UCOMIGTSS,
27359 IX86_BUILTIN_UCOMIGESS,
27360 IX86_BUILTIN_UCOMINEQSS,
27361
27362 IX86_BUILTIN_CVTPI2PS,
27363 IX86_BUILTIN_CVTPS2PI,
27364 IX86_BUILTIN_CVTSI2SS,
27365 IX86_BUILTIN_CVTSI642SS,
27366 IX86_BUILTIN_CVTSS2SI,
27367 IX86_BUILTIN_CVTSS2SI64,
27368 IX86_BUILTIN_CVTTPS2PI,
27369 IX86_BUILTIN_CVTTSS2SI,
27370 IX86_BUILTIN_CVTTSS2SI64,
27371
27372 IX86_BUILTIN_MAXPS,
27373 IX86_BUILTIN_MAXSS,
27374 IX86_BUILTIN_MINPS,
27375 IX86_BUILTIN_MINSS,
27376
27377 IX86_BUILTIN_LOADUPS,
27378 IX86_BUILTIN_STOREUPS,
27379 IX86_BUILTIN_MOVSS,
27380
27381 IX86_BUILTIN_MOVHLPS,
27382 IX86_BUILTIN_MOVLHPS,
27383 IX86_BUILTIN_LOADHPS,
27384 IX86_BUILTIN_LOADLPS,
27385 IX86_BUILTIN_STOREHPS,
27386 IX86_BUILTIN_STORELPS,
27387
27388 IX86_BUILTIN_MASKMOVQ,
27389 IX86_BUILTIN_MOVMSKPS,
27390 IX86_BUILTIN_PMOVMSKB,
27391
27392 IX86_BUILTIN_MOVNTPS,
27393 IX86_BUILTIN_MOVNTQ,
27394
27395 IX86_BUILTIN_LOADDQU,
27396 IX86_BUILTIN_STOREDQU,
27397
27398 IX86_BUILTIN_PACKSSWB,
27399 IX86_BUILTIN_PACKSSDW,
27400 IX86_BUILTIN_PACKUSWB,
27401
27402 IX86_BUILTIN_PADDB,
27403 IX86_BUILTIN_PADDW,
27404 IX86_BUILTIN_PADDD,
27405 IX86_BUILTIN_PADDQ,
27406 IX86_BUILTIN_PADDSB,
27407 IX86_BUILTIN_PADDSW,
27408 IX86_BUILTIN_PADDUSB,
27409 IX86_BUILTIN_PADDUSW,
27410 IX86_BUILTIN_PSUBB,
27411 IX86_BUILTIN_PSUBW,
27412 IX86_BUILTIN_PSUBD,
27413 IX86_BUILTIN_PSUBQ,
27414 IX86_BUILTIN_PSUBSB,
27415 IX86_BUILTIN_PSUBSW,
27416 IX86_BUILTIN_PSUBUSB,
27417 IX86_BUILTIN_PSUBUSW,
27418
27419 IX86_BUILTIN_PAND,
27420 IX86_BUILTIN_PANDN,
27421 IX86_BUILTIN_POR,
27422 IX86_BUILTIN_PXOR,
27423
27424 IX86_BUILTIN_PAVGB,
27425 IX86_BUILTIN_PAVGW,
27426
27427 IX86_BUILTIN_PCMPEQB,
27428 IX86_BUILTIN_PCMPEQW,
27429 IX86_BUILTIN_PCMPEQD,
27430 IX86_BUILTIN_PCMPGTB,
27431 IX86_BUILTIN_PCMPGTW,
27432 IX86_BUILTIN_PCMPGTD,
27433
27434 IX86_BUILTIN_PMADDWD,
27435
27436 IX86_BUILTIN_PMAXSW,
27437 IX86_BUILTIN_PMAXUB,
27438 IX86_BUILTIN_PMINSW,
27439 IX86_BUILTIN_PMINUB,
27440
27441 IX86_BUILTIN_PMULHUW,
27442 IX86_BUILTIN_PMULHW,
27443 IX86_BUILTIN_PMULLW,
27444
27445 IX86_BUILTIN_PSADBW,
27446 IX86_BUILTIN_PSHUFW,
27447
27448 IX86_BUILTIN_PSLLW,
27449 IX86_BUILTIN_PSLLD,
27450 IX86_BUILTIN_PSLLQ,
27451 IX86_BUILTIN_PSRAW,
27452 IX86_BUILTIN_PSRAD,
27453 IX86_BUILTIN_PSRLW,
27454 IX86_BUILTIN_PSRLD,
27455 IX86_BUILTIN_PSRLQ,
27456 IX86_BUILTIN_PSLLWI,
27457 IX86_BUILTIN_PSLLDI,
27458 IX86_BUILTIN_PSLLQI,
27459 IX86_BUILTIN_PSRAWI,
27460 IX86_BUILTIN_PSRADI,
27461 IX86_BUILTIN_PSRLWI,
27462 IX86_BUILTIN_PSRLDI,
27463 IX86_BUILTIN_PSRLQI,
27464
27465 IX86_BUILTIN_PUNPCKHBW,
27466 IX86_BUILTIN_PUNPCKHWD,
27467 IX86_BUILTIN_PUNPCKHDQ,
27468 IX86_BUILTIN_PUNPCKLBW,
27469 IX86_BUILTIN_PUNPCKLWD,
27470 IX86_BUILTIN_PUNPCKLDQ,
27471
27472 IX86_BUILTIN_SHUFPS,
27473
27474 IX86_BUILTIN_RCPPS,
27475 IX86_BUILTIN_RCPSS,
27476 IX86_BUILTIN_RSQRTPS,
27477 IX86_BUILTIN_RSQRTPS_NR,
27478 IX86_BUILTIN_RSQRTSS,
27479 IX86_BUILTIN_RSQRTF,
27480 IX86_BUILTIN_SQRTPS,
27481 IX86_BUILTIN_SQRTPS_NR,
27482 IX86_BUILTIN_SQRTSS,
27483
27484 IX86_BUILTIN_UNPCKHPS,
27485 IX86_BUILTIN_UNPCKLPS,
27486
27487 IX86_BUILTIN_ANDPS,
27488 IX86_BUILTIN_ANDNPS,
27489 IX86_BUILTIN_ORPS,
27490 IX86_BUILTIN_XORPS,
27491
27492 IX86_BUILTIN_EMMS,
27493 IX86_BUILTIN_LDMXCSR,
27494 IX86_BUILTIN_STMXCSR,
27495 IX86_BUILTIN_SFENCE,
27496
27497 IX86_BUILTIN_FXSAVE,
27498 IX86_BUILTIN_FXRSTOR,
27499 IX86_BUILTIN_FXSAVE64,
27500 IX86_BUILTIN_FXRSTOR64,
27501
27502 IX86_BUILTIN_XSAVE,
27503 IX86_BUILTIN_XRSTOR,
27504 IX86_BUILTIN_XSAVE64,
27505 IX86_BUILTIN_XRSTOR64,
27506
27507 IX86_BUILTIN_XSAVEOPT,
27508 IX86_BUILTIN_XSAVEOPT64,
27509
27510 IX86_BUILTIN_XSAVEC,
27511 IX86_BUILTIN_XSAVEC64,
27512
27513 IX86_BUILTIN_XSAVES,
27514 IX86_BUILTIN_XRSTORS,
27515 IX86_BUILTIN_XSAVES64,
27516 IX86_BUILTIN_XRSTORS64,
27517
27518 /* 3DNow! Original */
27519 IX86_BUILTIN_FEMMS,
27520 IX86_BUILTIN_PAVGUSB,
27521 IX86_BUILTIN_PF2ID,
27522 IX86_BUILTIN_PFACC,
27523 IX86_BUILTIN_PFADD,
27524 IX86_BUILTIN_PFCMPEQ,
27525 IX86_BUILTIN_PFCMPGE,
27526 IX86_BUILTIN_PFCMPGT,
27527 IX86_BUILTIN_PFMAX,
27528 IX86_BUILTIN_PFMIN,
27529 IX86_BUILTIN_PFMUL,
27530 IX86_BUILTIN_PFRCP,
27531 IX86_BUILTIN_PFRCPIT1,
27532 IX86_BUILTIN_PFRCPIT2,
27533 IX86_BUILTIN_PFRSQIT1,
27534 IX86_BUILTIN_PFRSQRT,
27535 IX86_BUILTIN_PFSUB,
27536 IX86_BUILTIN_PFSUBR,
27537 IX86_BUILTIN_PI2FD,
27538 IX86_BUILTIN_PMULHRW,
27539
27540 /* 3DNow! Athlon Extensions */
27541 IX86_BUILTIN_PF2IW,
27542 IX86_BUILTIN_PFNACC,
27543 IX86_BUILTIN_PFPNACC,
27544 IX86_BUILTIN_PI2FW,
27545 IX86_BUILTIN_PSWAPDSI,
27546 IX86_BUILTIN_PSWAPDSF,
27547
27548 /* SSE2 */
27549 IX86_BUILTIN_ADDPD,
27550 IX86_BUILTIN_ADDSD,
27551 IX86_BUILTIN_DIVPD,
27552 IX86_BUILTIN_DIVSD,
27553 IX86_BUILTIN_MULPD,
27554 IX86_BUILTIN_MULSD,
27555 IX86_BUILTIN_SUBPD,
27556 IX86_BUILTIN_SUBSD,
27557
27558 IX86_BUILTIN_CMPEQPD,
27559 IX86_BUILTIN_CMPLTPD,
27560 IX86_BUILTIN_CMPLEPD,
27561 IX86_BUILTIN_CMPGTPD,
27562 IX86_BUILTIN_CMPGEPD,
27563 IX86_BUILTIN_CMPNEQPD,
27564 IX86_BUILTIN_CMPNLTPD,
27565 IX86_BUILTIN_CMPNLEPD,
27566 IX86_BUILTIN_CMPNGTPD,
27567 IX86_BUILTIN_CMPNGEPD,
27568 IX86_BUILTIN_CMPORDPD,
27569 IX86_BUILTIN_CMPUNORDPD,
27570 IX86_BUILTIN_CMPEQSD,
27571 IX86_BUILTIN_CMPLTSD,
27572 IX86_BUILTIN_CMPLESD,
27573 IX86_BUILTIN_CMPNEQSD,
27574 IX86_BUILTIN_CMPNLTSD,
27575 IX86_BUILTIN_CMPNLESD,
27576 IX86_BUILTIN_CMPORDSD,
27577 IX86_BUILTIN_CMPUNORDSD,
27578
27579 IX86_BUILTIN_COMIEQSD,
27580 IX86_BUILTIN_COMILTSD,
27581 IX86_BUILTIN_COMILESD,
27582 IX86_BUILTIN_COMIGTSD,
27583 IX86_BUILTIN_COMIGESD,
27584 IX86_BUILTIN_COMINEQSD,
27585 IX86_BUILTIN_UCOMIEQSD,
27586 IX86_BUILTIN_UCOMILTSD,
27587 IX86_BUILTIN_UCOMILESD,
27588 IX86_BUILTIN_UCOMIGTSD,
27589 IX86_BUILTIN_UCOMIGESD,
27590 IX86_BUILTIN_UCOMINEQSD,
27591
27592 IX86_BUILTIN_MAXPD,
27593 IX86_BUILTIN_MAXSD,
27594 IX86_BUILTIN_MINPD,
27595 IX86_BUILTIN_MINSD,
27596
27597 IX86_BUILTIN_ANDPD,
27598 IX86_BUILTIN_ANDNPD,
27599 IX86_BUILTIN_ORPD,
27600 IX86_BUILTIN_XORPD,
27601
27602 IX86_BUILTIN_SQRTPD,
27603 IX86_BUILTIN_SQRTSD,
27604
27605 IX86_BUILTIN_UNPCKHPD,
27606 IX86_BUILTIN_UNPCKLPD,
27607
27608 IX86_BUILTIN_SHUFPD,
27609
27610 IX86_BUILTIN_LOADUPD,
27611 IX86_BUILTIN_STOREUPD,
27612 IX86_BUILTIN_MOVSD,
27613
27614 IX86_BUILTIN_LOADHPD,
27615 IX86_BUILTIN_LOADLPD,
27616
27617 IX86_BUILTIN_CVTDQ2PD,
27618 IX86_BUILTIN_CVTDQ2PS,
27619
27620 IX86_BUILTIN_CVTPD2DQ,
27621 IX86_BUILTIN_CVTPD2PI,
27622 IX86_BUILTIN_CVTPD2PS,
27623 IX86_BUILTIN_CVTTPD2DQ,
27624 IX86_BUILTIN_CVTTPD2PI,
27625
27626 IX86_BUILTIN_CVTPI2PD,
27627 IX86_BUILTIN_CVTSI2SD,
27628 IX86_BUILTIN_CVTSI642SD,
27629
27630 IX86_BUILTIN_CVTSD2SI,
27631 IX86_BUILTIN_CVTSD2SI64,
27632 IX86_BUILTIN_CVTSD2SS,
27633 IX86_BUILTIN_CVTSS2SD,
27634 IX86_BUILTIN_CVTTSD2SI,
27635 IX86_BUILTIN_CVTTSD2SI64,
27636
27637 IX86_BUILTIN_CVTPS2DQ,
27638 IX86_BUILTIN_CVTPS2PD,
27639 IX86_BUILTIN_CVTTPS2DQ,
27640
27641 IX86_BUILTIN_MOVNTI,
27642 IX86_BUILTIN_MOVNTI64,
27643 IX86_BUILTIN_MOVNTPD,
27644 IX86_BUILTIN_MOVNTDQ,
27645
27646 IX86_BUILTIN_MOVQ128,
27647
27648 /* SSE2 MMX */
27649 IX86_BUILTIN_MASKMOVDQU,
27650 IX86_BUILTIN_MOVMSKPD,
27651 IX86_BUILTIN_PMOVMSKB128,
27652
27653 IX86_BUILTIN_PACKSSWB128,
27654 IX86_BUILTIN_PACKSSDW128,
27655 IX86_BUILTIN_PACKUSWB128,
27656
27657 IX86_BUILTIN_PADDB128,
27658 IX86_BUILTIN_PADDW128,
27659 IX86_BUILTIN_PADDD128,
27660 IX86_BUILTIN_PADDQ128,
27661 IX86_BUILTIN_PADDSB128,
27662 IX86_BUILTIN_PADDSW128,
27663 IX86_BUILTIN_PADDUSB128,
27664 IX86_BUILTIN_PADDUSW128,
27665 IX86_BUILTIN_PSUBB128,
27666 IX86_BUILTIN_PSUBW128,
27667 IX86_BUILTIN_PSUBD128,
27668 IX86_BUILTIN_PSUBQ128,
27669 IX86_BUILTIN_PSUBSB128,
27670 IX86_BUILTIN_PSUBSW128,
27671 IX86_BUILTIN_PSUBUSB128,
27672 IX86_BUILTIN_PSUBUSW128,
27673
27674 IX86_BUILTIN_PAND128,
27675 IX86_BUILTIN_PANDN128,
27676 IX86_BUILTIN_POR128,
27677 IX86_BUILTIN_PXOR128,
27678
27679 IX86_BUILTIN_PAVGB128,
27680 IX86_BUILTIN_PAVGW128,
27681
27682 IX86_BUILTIN_PCMPEQB128,
27683 IX86_BUILTIN_PCMPEQW128,
27684 IX86_BUILTIN_PCMPEQD128,
27685 IX86_BUILTIN_PCMPGTB128,
27686 IX86_BUILTIN_PCMPGTW128,
27687 IX86_BUILTIN_PCMPGTD128,
27688
27689 IX86_BUILTIN_PMADDWD128,
27690
27691 IX86_BUILTIN_PMAXSW128,
27692 IX86_BUILTIN_PMAXUB128,
27693 IX86_BUILTIN_PMINSW128,
27694 IX86_BUILTIN_PMINUB128,
27695
27696 IX86_BUILTIN_PMULUDQ,
27697 IX86_BUILTIN_PMULUDQ128,
27698 IX86_BUILTIN_PMULHUW128,
27699 IX86_BUILTIN_PMULHW128,
27700 IX86_BUILTIN_PMULLW128,
27701
27702 IX86_BUILTIN_PSADBW128,
27703 IX86_BUILTIN_PSHUFHW,
27704 IX86_BUILTIN_PSHUFLW,
27705 IX86_BUILTIN_PSHUFD,
27706
27707 IX86_BUILTIN_PSLLDQI128,
27708 IX86_BUILTIN_PSLLWI128,
27709 IX86_BUILTIN_PSLLDI128,
27710 IX86_BUILTIN_PSLLQI128,
27711 IX86_BUILTIN_PSRAWI128,
27712 IX86_BUILTIN_PSRADI128,
27713 IX86_BUILTIN_PSRLDQI128,
27714 IX86_BUILTIN_PSRLWI128,
27715 IX86_BUILTIN_PSRLDI128,
27716 IX86_BUILTIN_PSRLQI128,
27717
27718 IX86_BUILTIN_PSLLDQ128,
27719 IX86_BUILTIN_PSLLW128,
27720 IX86_BUILTIN_PSLLD128,
27721 IX86_BUILTIN_PSLLQ128,
27722 IX86_BUILTIN_PSRAW128,
27723 IX86_BUILTIN_PSRAD128,
27724 IX86_BUILTIN_PSRLW128,
27725 IX86_BUILTIN_PSRLD128,
27726 IX86_BUILTIN_PSRLQ128,
27727
27728 IX86_BUILTIN_PUNPCKHBW128,
27729 IX86_BUILTIN_PUNPCKHWD128,
27730 IX86_BUILTIN_PUNPCKHDQ128,
27731 IX86_BUILTIN_PUNPCKHQDQ128,
27732 IX86_BUILTIN_PUNPCKLBW128,
27733 IX86_BUILTIN_PUNPCKLWD128,
27734 IX86_BUILTIN_PUNPCKLDQ128,
27735 IX86_BUILTIN_PUNPCKLQDQ128,
27736
27737 IX86_BUILTIN_CLFLUSH,
27738 IX86_BUILTIN_MFENCE,
27739 IX86_BUILTIN_LFENCE,
27740 IX86_BUILTIN_PAUSE,
27741
27742 IX86_BUILTIN_FNSTENV,
27743 IX86_BUILTIN_FLDENV,
27744 IX86_BUILTIN_FNSTSW,
27745 IX86_BUILTIN_FNCLEX,
27746
27747 IX86_BUILTIN_BSRSI,
27748 IX86_BUILTIN_BSRDI,
27749 IX86_BUILTIN_RDPMC,
27750 IX86_BUILTIN_RDTSC,
27751 IX86_BUILTIN_RDTSCP,
27752 IX86_BUILTIN_ROLQI,
27753 IX86_BUILTIN_ROLHI,
27754 IX86_BUILTIN_RORQI,
27755 IX86_BUILTIN_RORHI,
27756
27757 /* SSE3. */
27758 IX86_BUILTIN_ADDSUBPS,
27759 IX86_BUILTIN_HADDPS,
27760 IX86_BUILTIN_HSUBPS,
27761 IX86_BUILTIN_MOVSHDUP,
27762 IX86_BUILTIN_MOVSLDUP,
27763 IX86_BUILTIN_ADDSUBPD,
27764 IX86_BUILTIN_HADDPD,
27765 IX86_BUILTIN_HSUBPD,
27766 IX86_BUILTIN_LDDQU,
27767
27768 IX86_BUILTIN_MONITOR,
27769 IX86_BUILTIN_MWAIT,
27770
27771 /* SSSE3. */
27772 IX86_BUILTIN_PHADDW,
27773 IX86_BUILTIN_PHADDD,
27774 IX86_BUILTIN_PHADDSW,
27775 IX86_BUILTIN_PHSUBW,
27776 IX86_BUILTIN_PHSUBD,
27777 IX86_BUILTIN_PHSUBSW,
27778 IX86_BUILTIN_PMADDUBSW,
27779 IX86_BUILTIN_PMULHRSW,
27780 IX86_BUILTIN_PSHUFB,
27781 IX86_BUILTIN_PSIGNB,
27782 IX86_BUILTIN_PSIGNW,
27783 IX86_BUILTIN_PSIGND,
27784 IX86_BUILTIN_PALIGNR,
27785 IX86_BUILTIN_PABSB,
27786 IX86_BUILTIN_PABSW,
27787 IX86_BUILTIN_PABSD,
27788
27789 IX86_BUILTIN_PHADDW128,
27790 IX86_BUILTIN_PHADDD128,
27791 IX86_BUILTIN_PHADDSW128,
27792 IX86_BUILTIN_PHSUBW128,
27793 IX86_BUILTIN_PHSUBD128,
27794 IX86_BUILTIN_PHSUBSW128,
27795 IX86_BUILTIN_PMADDUBSW128,
27796 IX86_BUILTIN_PMULHRSW128,
27797 IX86_BUILTIN_PSHUFB128,
27798 IX86_BUILTIN_PSIGNB128,
27799 IX86_BUILTIN_PSIGNW128,
27800 IX86_BUILTIN_PSIGND128,
27801 IX86_BUILTIN_PALIGNR128,
27802 IX86_BUILTIN_PABSB128,
27803 IX86_BUILTIN_PABSW128,
27804 IX86_BUILTIN_PABSD128,
27805
27806 /* AMDFAM10 - SSE4A New Instructions. */
27807 IX86_BUILTIN_MOVNTSD,
27808 IX86_BUILTIN_MOVNTSS,
27809 IX86_BUILTIN_EXTRQI,
27810 IX86_BUILTIN_EXTRQ,
27811 IX86_BUILTIN_INSERTQI,
27812 IX86_BUILTIN_INSERTQ,
27813
27814 /* SSE4.1. */
27815 IX86_BUILTIN_BLENDPD,
27816 IX86_BUILTIN_BLENDPS,
27817 IX86_BUILTIN_BLENDVPD,
27818 IX86_BUILTIN_BLENDVPS,
27819 IX86_BUILTIN_PBLENDVB128,
27820 IX86_BUILTIN_PBLENDW128,
27821
27822 IX86_BUILTIN_DPPD,
27823 IX86_BUILTIN_DPPS,
27824
27825 IX86_BUILTIN_INSERTPS128,
27826
27827 IX86_BUILTIN_MOVNTDQA,
27828 IX86_BUILTIN_MPSADBW128,
27829 IX86_BUILTIN_PACKUSDW128,
27830 IX86_BUILTIN_PCMPEQQ,
27831 IX86_BUILTIN_PHMINPOSUW128,
27832
27833 IX86_BUILTIN_PMAXSB128,
27834 IX86_BUILTIN_PMAXSD128,
27835 IX86_BUILTIN_PMAXUD128,
27836 IX86_BUILTIN_PMAXUW128,
27837
27838 IX86_BUILTIN_PMINSB128,
27839 IX86_BUILTIN_PMINSD128,
27840 IX86_BUILTIN_PMINUD128,
27841 IX86_BUILTIN_PMINUW128,
27842
27843 IX86_BUILTIN_PMOVSXBW128,
27844 IX86_BUILTIN_PMOVSXBD128,
27845 IX86_BUILTIN_PMOVSXBQ128,
27846 IX86_BUILTIN_PMOVSXWD128,
27847 IX86_BUILTIN_PMOVSXWQ128,
27848 IX86_BUILTIN_PMOVSXDQ128,
27849
27850 IX86_BUILTIN_PMOVZXBW128,
27851 IX86_BUILTIN_PMOVZXBD128,
27852 IX86_BUILTIN_PMOVZXBQ128,
27853 IX86_BUILTIN_PMOVZXWD128,
27854 IX86_BUILTIN_PMOVZXWQ128,
27855 IX86_BUILTIN_PMOVZXDQ128,
27856
27857 IX86_BUILTIN_PMULDQ128,
27858 IX86_BUILTIN_PMULLD128,
27859
27860 IX86_BUILTIN_ROUNDSD,
27861 IX86_BUILTIN_ROUNDSS,
27862
27863 IX86_BUILTIN_ROUNDPD,
27864 IX86_BUILTIN_ROUNDPS,
27865
27866 IX86_BUILTIN_FLOORPD,
27867 IX86_BUILTIN_CEILPD,
27868 IX86_BUILTIN_TRUNCPD,
27869 IX86_BUILTIN_RINTPD,
27870 IX86_BUILTIN_ROUNDPD_AZ,
27871
27872 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27873 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27874 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27875
27876 IX86_BUILTIN_FLOORPS,
27877 IX86_BUILTIN_CEILPS,
27878 IX86_BUILTIN_TRUNCPS,
27879 IX86_BUILTIN_RINTPS,
27880 IX86_BUILTIN_ROUNDPS_AZ,
27881
27882 IX86_BUILTIN_FLOORPS_SFIX,
27883 IX86_BUILTIN_CEILPS_SFIX,
27884 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27885
27886 IX86_BUILTIN_PTESTZ,
27887 IX86_BUILTIN_PTESTC,
27888 IX86_BUILTIN_PTESTNZC,
27889
27890 IX86_BUILTIN_VEC_INIT_V2SI,
27891 IX86_BUILTIN_VEC_INIT_V4HI,
27892 IX86_BUILTIN_VEC_INIT_V8QI,
27893 IX86_BUILTIN_VEC_EXT_V2DF,
27894 IX86_BUILTIN_VEC_EXT_V2DI,
27895 IX86_BUILTIN_VEC_EXT_V4SF,
27896 IX86_BUILTIN_VEC_EXT_V4SI,
27897 IX86_BUILTIN_VEC_EXT_V8HI,
27898 IX86_BUILTIN_VEC_EXT_V2SI,
27899 IX86_BUILTIN_VEC_EXT_V4HI,
27900 IX86_BUILTIN_VEC_EXT_V16QI,
27901 IX86_BUILTIN_VEC_SET_V2DI,
27902 IX86_BUILTIN_VEC_SET_V4SF,
27903 IX86_BUILTIN_VEC_SET_V4SI,
27904 IX86_BUILTIN_VEC_SET_V8HI,
27905 IX86_BUILTIN_VEC_SET_V4HI,
27906 IX86_BUILTIN_VEC_SET_V16QI,
27907
27908 IX86_BUILTIN_VEC_PACK_SFIX,
27909 IX86_BUILTIN_VEC_PACK_SFIX256,
27910
27911 /* SSE4.2. */
27912 IX86_BUILTIN_CRC32QI,
27913 IX86_BUILTIN_CRC32HI,
27914 IX86_BUILTIN_CRC32SI,
27915 IX86_BUILTIN_CRC32DI,
27916
27917 IX86_BUILTIN_PCMPESTRI128,
27918 IX86_BUILTIN_PCMPESTRM128,
27919 IX86_BUILTIN_PCMPESTRA128,
27920 IX86_BUILTIN_PCMPESTRC128,
27921 IX86_BUILTIN_PCMPESTRO128,
27922 IX86_BUILTIN_PCMPESTRS128,
27923 IX86_BUILTIN_PCMPESTRZ128,
27924 IX86_BUILTIN_PCMPISTRI128,
27925 IX86_BUILTIN_PCMPISTRM128,
27926 IX86_BUILTIN_PCMPISTRA128,
27927 IX86_BUILTIN_PCMPISTRC128,
27928 IX86_BUILTIN_PCMPISTRO128,
27929 IX86_BUILTIN_PCMPISTRS128,
27930 IX86_BUILTIN_PCMPISTRZ128,
27931
27932 IX86_BUILTIN_PCMPGTQ,
27933
27934 /* AES instructions */
27935 IX86_BUILTIN_AESENC128,
27936 IX86_BUILTIN_AESENCLAST128,
27937 IX86_BUILTIN_AESDEC128,
27938 IX86_BUILTIN_AESDECLAST128,
27939 IX86_BUILTIN_AESIMC128,
27940 IX86_BUILTIN_AESKEYGENASSIST128,
27941
27942 /* PCLMUL instruction */
27943 IX86_BUILTIN_PCLMULQDQ128,
27944
27945 /* AVX */
27946 IX86_BUILTIN_ADDPD256,
27947 IX86_BUILTIN_ADDPS256,
27948 IX86_BUILTIN_ADDSUBPD256,
27949 IX86_BUILTIN_ADDSUBPS256,
27950 IX86_BUILTIN_ANDPD256,
27951 IX86_BUILTIN_ANDPS256,
27952 IX86_BUILTIN_ANDNPD256,
27953 IX86_BUILTIN_ANDNPS256,
27954 IX86_BUILTIN_BLENDPD256,
27955 IX86_BUILTIN_BLENDPS256,
27956 IX86_BUILTIN_BLENDVPD256,
27957 IX86_BUILTIN_BLENDVPS256,
27958 IX86_BUILTIN_DIVPD256,
27959 IX86_BUILTIN_DIVPS256,
27960 IX86_BUILTIN_DPPS256,
27961 IX86_BUILTIN_HADDPD256,
27962 IX86_BUILTIN_HADDPS256,
27963 IX86_BUILTIN_HSUBPD256,
27964 IX86_BUILTIN_HSUBPS256,
27965 IX86_BUILTIN_MAXPD256,
27966 IX86_BUILTIN_MAXPS256,
27967 IX86_BUILTIN_MINPD256,
27968 IX86_BUILTIN_MINPS256,
27969 IX86_BUILTIN_MULPD256,
27970 IX86_BUILTIN_MULPS256,
27971 IX86_BUILTIN_ORPD256,
27972 IX86_BUILTIN_ORPS256,
27973 IX86_BUILTIN_SHUFPD256,
27974 IX86_BUILTIN_SHUFPS256,
27975 IX86_BUILTIN_SUBPD256,
27976 IX86_BUILTIN_SUBPS256,
27977 IX86_BUILTIN_XORPD256,
27978 IX86_BUILTIN_XORPS256,
27979 IX86_BUILTIN_CMPSD,
27980 IX86_BUILTIN_CMPSS,
27981 IX86_BUILTIN_CMPPD,
27982 IX86_BUILTIN_CMPPS,
27983 IX86_BUILTIN_CMPPD256,
27984 IX86_BUILTIN_CMPPS256,
27985 IX86_BUILTIN_CVTDQ2PD256,
27986 IX86_BUILTIN_CVTDQ2PS256,
27987 IX86_BUILTIN_CVTPD2PS256,
27988 IX86_BUILTIN_CVTPS2DQ256,
27989 IX86_BUILTIN_CVTPS2PD256,
27990 IX86_BUILTIN_CVTTPD2DQ256,
27991 IX86_BUILTIN_CVTPD2DQ256,
27992 IX86_BUILTIN_CVTTPS2DQ256,
27993 IX86_BUILTIN_EXTRACTF128PD256,
27994 IX86_BUILTIN_EXTRACTF128PS256,
27995 IX86_BUILTIN_EXTRACTF128SI256,
27996 IX86_BUILTIN_VZEROALL,
27997 IX86_BUILTIN_VZEROUPPER,
27998 IX86_BUILTIN_VPERMILVARPD,
27999 IX86_BUILTIN_VPERMILVARPS,
28000 IX86_BUILTIN_VPERMILVARPD256,
28001 IX86_BUILTIN_VPERMILVARPS256,
28002 IX86_BUILTIN_VPERMILPD,
28003 IX86_BUILTIN_VPERMILPS,
28004 IX86_BUILTIN_VPERMILPD256,
28005 IX86_BUILTIN_VPERMILPS256,
28006 IX86_BUILTIN_VPERMIL2PD,
28007 IX86_BUILTIN_VPERMIL2PS,
28008 IX86_BUILTIN_VPERMIL2PD256,
28009 IX86_BUILTIN_VPERMIL2PS256,
28010 IX86_BUILTIN_VPERM2F128PD256,
28011 IX86_BUILTIN_VPERM2F128PS256,
28012 IX86_BUILTIN_VPERM2F128SI256,
28013 IX86_BUILTIN_VBROADCASTSS,
28014 IX86_BUILTIN_VBROADCASTSD256,
28015 IX86_BUILTIN_VBROADCASTSS256,
28016 IX86_BUILTIN_VBROADCASTPD256,
28017 IX86_BUILTIN_VBROADCASTPS256,
28018 IX86_BUILTIN_VINSERTF128PD256,
28019 IX86_BUILTIN_VINSERTF128PS256,
28020 IX86_BUILTIN_VINSERTF128SI256,
28021 IX86_BUILTIN_LOADUPD256,
28022 IX86_BUILTIN_LOADUPS256,
28023 IX86_BUILTIN_STOREUPD256,
28024 IX86_BUILTIN_STOREUPS256,
28025 IX86_BUILTIN_LDDQU256,
28026 IX86_BUILTIN_MOVNTDQ256,
28027 IX86_BUILTIN_MOVNTPD256,
28028 IX86_BUILTIN_MOVNTPS256,
28029 IX86_BUILTIN_LOADDQU256,
28030 IX86_BUILTIN_STOREDQU256,
28031 IX86_BUILTIN_MASKLOADPD,
28032 IX86_BUILTIN_MASKLOADPS,
28033 IX86_BUILTIN_MASKSTOREPD,
28034 IX86_BUILTIN_MASKSTOREPS,
28035 IX86_BUILTIN_MASKLOADPD256,
28036 IX86_BUILTIN_MASKLOADPS256,
28037 IX86_BUILTIN_MASKSTOREPD256,
28038 IX86_BUILTIN_MASKSTOREPS256,
28039 IX86_BUILTIN_MOVSHDUP256,
28040 IX86_BUILTIN_MOVSLDUP256,
28041 IX86_BUILTIN_MOVDDUP256,
28042
28043 IX86_BUILTIN_SQRTPD256,
28044 IX86_BUILTIN_SQRTPS256,
28045 IX86_BUILTIN_SQRTPS_NR256,
28046 IX86_BUILTIN_RSQRTPS256,
28047 IX86_BUILTIN_RSQRTPS_NR256,
28048
28049 IX86_BUILTIN_RCPPS256,
28050
28051 IX86_BUILTIN_ROUNDPD256,
28052 IX86_BUILTIN_ROUNDPS256,
28053
28054 IX86_BUILTIN_FLOORPD256,
28055 IX86_BUILTIN_CEILPD256,
28056 IX86_BUILTIN_TRUNCPD256,
28057 IX86_BUILTIN_RINTPD256,
28058 IX86_BUILTIN_ROUNDPD_AZ256,
28059
28060 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28061 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28062 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28063
28064 IX86_BUILTIN_FLOORPS256,
28065 IX86_BUILTIN_CEILPS256,
28066 IX86_BUILTIN_TRUNCPS256,
28067 IX86_BUILTIN_RINTPS256,
28068 IX86_BUILTIN_ROUNDPS_AZ256,
28069
28070 IX86_BUILTIN_FLOORPS_SFIX256,
28071 IX86_BUILTIN_CEILPS_SFIX256,
28072 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28073
28074 IX86_BUILTIN_UNPCKHPD256,
28075 IX86_BUILTIN_UNPCKLPD256,
28076 IX86_BUILTIN_UNPCKHPS256,
28077 IX86_BUILTIN_UNPCKLPS256,
28078
28079 IX86_BUILTIN_SI256_SI,
28080 IX86_BUILTIN_PS256_PS,
28081 IX86_BUILTIN_PD256_PD,
28082 IX86_BUILTIN_SI_SI256,
28083 IX86_BUILTIN_PS_PS256,
28084 IX86_BUILTIN_PD_PD256,
28085
28086 IX86_BUILTIN_VTESTZPD,
28087 IX86_BUILTIN_VTESTCPD,
28088 IX86_BUILTIN_VTESTNZCPD,
28089 IX86_BUILTIN_VTESTZPS,
28090 IX86_BUILTIN_VTESTCPS,
28091 IX86_BUILTIN_VTESTNZCPS,
28092 IX86_BUILTIN_VTESTZPD256,
28093 IX86_BUILTIN_VTESTCPD256,
28094 IX86_BUILTIN_VTESTNZCPD256,
28095 IX86_BUILTIN_VTESTZPS256,
28096 IX86_BUILTIN_VTESTCPS256,
28097 IX86_BUILTIN_VTESTNZCPS256,
28098 IX86_BUILTIN_PTESTZ256,
28099 IX86_BUILTIN_PTESTC256,
28100 IX86_BUILTIN_PTESTNZC256,
28101
28102 IX86_BUILTIN_MOVMSKPD256,
28103 IX86_BUILTIN_MOVMSKPS256,
28104
28105 /* AVX2 */
28106 IX86_BUILTIN_MPSADBW256,
28107 IX86_BUILTIN_PABSB256,
28108 IX86_BUILTIN_PABSW256,
28109 IX86_BUILTIN_PABSD256,
28110 IX86_BUILTIN_PACKSSDW256,
28111 IX86_BUILTIN_PACKSSWB256,
28112 IX86_BUILTIN_PACKUSDW256,
28113 IX86_BUILTIN_PACKUSWB256,
28114 IX86_BUILTIN_PADDB256,
28115 IX86_BUILTIN_PADDW256,
28116 IX86_BUILTIN_PADDD256,
28117 IX86_BUILTIN_PADDQ256,
28118 IX86_BUILTIN_PADDSB256,
28119 IX86_BUILTIN_PADDSW256,
28120 IX86_BUILTIN_PADDUSB256,
28121 IX86_BUILTIN_PADDUSW256,
28122 IX86_BUILTIN_PALIGNR256,
28123 IX86_BUILTIN_AND256I,
28124 IX86_BUILTIN_ANDNOT256I,
28125 IX86_BUILTIN_PAVGB256,
28126 IX86_BUILTIN_PAVGW256,
28127 IX86_BUILTIN_PBLENDVB256,
28128 IX86_BUILTIN_PBLENDVW256,
28129 IX86_BUILTIN_PCMPEQB256,
28130 IX86_BUILTIN_PCMPEQW256,
28131 IX86_BUILTIN_PCMPEQD256,
28132 IX86_BUILTIN_PCMPEQQ256,
28133 IX86_BUILTIN_PCMPGTB256,
28134 IX86_BUILTIN_PCMPGTW256,
28135 IX86_BUILTIN_PCMPGTD256,
28136 IX86_BUILTIN_PCMPGTQ256,
28137 IX86_BUILTIN_PHADDW256,
28138 IX86_BUILTIN_PHADDD256,
28139 IX86_BUILTIN_PHADDSW256,
28140 IX86_BUILTIN_PHSUBW256,
28141 IX86_BUILTIN_PHSUBD256,
28142 IX86_BUILTIN_PHSUBSW256,
28143 IX86_BUILTIN_PMADDUBSW256,
28144 IX86_BUILTIN_PMADDWD256,
28145 IX86_BUILTIN_PMAXSB256,
28146 IX86_BUILTIN_PMAXSW256,
28147 IX86_BUILTIN_PMAXSD256,
28148 IX86_BUILTIN_PMAXUB256,
28149 IX86_BUILTIN_PMAXUW256,
28150 IX86_BUILTIN_PMAXUD256,
28151 IX86_BUILTIN_PMINSB256,
28152 IX86_BUILTIN_PMINSW256,
28153 IX86_BUILTIN_PMINSD256,
28154 IX86_BUILTIN_PMINUB256,
28155 IX86_BUILTIN_PMINUW256,
28156 IX86_BUILTIN_PMINUD256,
28157 IX86_BUILTIN_PMOVMSKB256,
28158 IX86_BUILTIN_PMOVSXBW256,
28159 IX86_BUILTIN_PMOVSXBD256,
28160 IX86_BUILTIN_PMOVSXBQ256,
28161 IX86_BUILTIN_PMOVSXWD256,
28162 IX86_BUILTIN_PMOVSXWQ256,
28163 IX86_BUILTIN_PMOVSXDQ256,
28164 IX86_BUILTIN_PMOVZXBW256,
28165 IX86_BUILTIN_PMOVZXBD256,
28166 IX86_BUILTIN_PMOVZXBQ256,
28167 IX86_BUILTIN_PMOVZXWD256,
28168 IX86_BUILTIN_PMOVZXWQ256,
28169 IX86_BUILTIN_PMOVZXDQ256,
28170 IX86_BUILTIN_PMULDQ256,
28171 IX86_BUILTIN_PMULHRSW256,
28172 IX86_BUILTIN_PMULHUW256,
28173 IX86_BUILTIN_PMULHW256,
28174 IX86_BUILTIN_PMULLW256,
28175 IX86_BUILTIN_PMULLD256,
28176 IX86_BUILTIN_PMULUDQ256,
28177 IX86_BUILTIN_POR256,
28178 IX86_BUILTIN_PSADBW256,
28179 IX86_BUILTIN_PSHUFB256,
28180 IX86_BUILTIN_PSHUFD256,
28181 IX86_BUILTIN_PSHUFHW256,
28182 IX86_BUILTIN_PSHUFLW256,
28183 IX86_BUILTIN_PSIGNB256,
28184 IX86_BUILTIN_PSIGNW256,
28185 IX86_BUILTIN_PSIGND256,
28186 IX86_BUILTIN_PSLLDQI256,
28187 IX86_BUILTIN_PSLLWI256,
28188 IX86_BUILTIN_PSLLW256,
28189 IX86_BUILTIN_PSLLDI256,
28190 IX86_BUILTIN_PSLLD256,
28191 IX86_BUILTIN_PSLLQI256,
28192 IX86_BUILTIN_PSLLQ256,
28193 IX86_BUILTIN_PSRAWI256,
28194 IX86_BUILTIN_PSRAW256,
28195 IX86_BUILTIN_PSRADI256,
28196 IX86_BUILTIN_PSRAD256,
28197 IX86_BUILTIN_PSRLDQI256,
28198 IX86_BUILTIN_PSRLWI256,
28199 IX86_BUILTIN_PSRLW256,
28200 IX86_BUILTIN_PSRLDI256,
28201 IX86_BUILTIN_PSRLD256,
28202 IX86_BUILTIN_PSRLQI256,
28203 IX86_BUILTIN_PSRLQ256,
28204 IX86_BUILTIN_PSUBB256,
28205 IX86_BUILTIN_PSUBW256,
28206 IX86_BUILTIN_PSUBD256,
28207 IX86_BUILTIN_PSUBQ256,
28208 IX86_BUILTIN_PSUBSB256,
28209 IX86_BUILTIN_PSUBSW256,
28210 IX86_BUILTIN_PSUBUSB256,
28211 IX86_BUILTIN_PSUBUSW256,
28212 IX86_BUILTIN_PUNPCKHBW256,
28213 IX86_BUILTIN_PUNPCKHWD256,
28214 IX86_BUILTIN_PUNPCKHDQ256,
28215 IX86_BUILTIN_PUNPCKHQDQ256,
28216 IX86_BUILTIN_PUNPCKLBW256,
28217 IX86_BUILTIN_PUNPCKLWD256,
28218 IX86_BUILTIN_PUNPCKLDQ256,
28219 IX86_BUILTIN_PUNPCKLQDQ256,
28220 IX86_BUILTIN_PXOR256,
28221 IX86_BUILTIN_MOVNTDQA256,
28222 IX86_BUILTIN_VBROADCASTSS_PS,
28223 IX86_BUILTIN_VBROADCASTSS_PS256,
28224 IX86_BUILTIN_VBROADCASTSD_PD256,
28225 IX86_BUILTIN_VBROADCASTSI256,
28226 IX86_BUILTIN_PBLENDD256,
28227 IX86_BUILTIN_PBLENDD128,
28228 IX86_BUILTIN_PBROADCASTB256,
28229 IX86_BUILTIN_PBROADCASTW256,
28230 IX86_BUILTIN_PBROADCASTD256,
28231 IX86_BUILTIN_PBROADCASTQ256,
28232 IX86_BUILTIN_PBROADCASTB128,
28233 IX86_BUILTIN_PBROADCASTW128,
28234 IX86_BUILTIN_PBROADCASTD128,
28235 IX86_BUILTIN_PBROADCASTQ128,
28236 IX86_BUILTIN_VPERMVARSI256,
28237 IX86_BUILTIN_VPERMDF256,
28238 IX86_BUILTIN_VPERMVARSF256,
28239 IX86_BUILTIN_VPERMDI256,
28240 IX86_BUILTIN_VPERMTI256,
28241 IX86_BUILTIN_VEXTRACT128I256,
28242 IX86_BUILTIN_VINSERT128I256,
28243 IX86_BUILTIN_MASKLOADD,
28244 IX86_BUILTIN_MASKLOADQ,
28245 IX86_BUILTIN_MASKLOADD256,
28246 IX86_BUILTIN_MASKLOADQ256,
28247 IX86_BUILTIN_MASKSTORED,
28248 IX86_BUILTIN_MASKSTOREQ,
28249 IX86_BUILTIN_MASKSTORED256,
28250 IX86_BUILTIN_MASKSTOREQ256,
28251 IX86_BUILTIN_PSLLVV4DI,
28252 IX86_BUILTIN_PSLLVV2DI,
28253 IX86_BUILTIN_PSLLVV8SI,
28254 IX86_BUILTIN_PSLLVV4SI,
28255 IX86_BUILTIN_PSRAVV8SI,
28256 IX86_BUILTIN_PSRAVV4SI,
28257 IX86_BUILTIN_PSRLVV4DI,
28258 IX86_BUILTIN_PSRLVV2DI,
28259 IX86_BUILTIN_PSRLVV8SI,
28260 IX86_BUILTIN_PSRLVV4SI,
28261
28262 IX86_BUILTIN_GATHERSIV2DF,
28263 IX86_BUILTIN_GATHERSIV4DF,
28264 IX86_BUILTIN_GATHERDIV2DF,
28265 IX86_BUILTIN_GATHERDIV4DF,
28266 IX86_BUILTIN_GATHERSIV4SF,
28267 IX86_BUILTIN_GATHERSIV8SF,
28268 IX86_BUILTIN_GATHERDIV4SF,
28269 IX86_BUILTIN_GATHERDIV8SF,
28270 IX86_BUILTIN_GATHERSIV2DI,
28271 IX86_BUILTIN_GATHERSIV4DI,
28272 IX86_BUILTIN_GATHERDIV2DI,
28273 IX86_BUILTIN_GATHERDIV4DI,
28274 IX86_BUILTIN_GATHERSIV4SI,
28275 IX86_BUILTIN_GATHERSIV8SI,
28276 IX86_BUILTIN_GATHERDIV4SI,
28277 IX86_BUILTIN_GATHERDIV8SI,
28278
28279 /* AVX512F */
28280 IX86_BUILTIN_SI512_SI256,
28281 IX86_BUILTIN_PD512_PD256,
28282 IX86_BUILTIN_PS512_PS256,
28283 IX86_BUILTIN_SI512_SI,
28284 IX86_BUILTIN_PD512_PD,
28285 IX86_BUILTIN_PS512_PS,
28286 IX86_BUILTIN_ADDPD512,
28287 IX86_BUILTIN_ADDPS512,
28288 IX86_BUILTIN_ADDSD_ROUND,
28289 IX86_BUILTIN_ADDSS_ROUND,
28290 IX86_BUILTIN_ALIGND512,
28291 IX86_BUILTIN_ALIGNQ512,
28292 IX86_BUILTIN_BLENDMD512,
28293 IX86_BUILTIN_BLENDMPD512,
28294 IX86_BUILTIN_BLENDMPS512,
28295 IX86_BUILTIN_BLENDMQ512,
28296 IX86_BUILTIN_BROADCASTF32X4_512,
28297 IX86_BUILTIN_BROADCASTF64X4_512,
28298 IX86_BUILTIN_BROADCASTI32X4_512,
28299 IX86_BUILTIN_BROADCASTI64X4_512,
28300 IX86_BUILTIN_BROADCASTSD512,
28301 IX86_BUILTIN_BROADCASTSS512,
28302 IX86_BUILTIN_CMPD512,
28303 IX86_BUILTIN_CMPPD512,
28304 IX86_BUILTIN_CMPPS512,
28305 IX86_BUILTIN_CMPQ512,
28306 IX86_BUILTIN_CMPSD_MASK,
28307 IX86_BUILTIN_CMPSS_MASK,
28308 IX86_BUILTIN_COMIDF,
28309 IX86_BUILTIN_COMISF,
28310 IX86_BUILTIN_COMPRESSPD512,
28311 IX86_BUILTIN_COMPRESSPDSTORE512,
28312 IX86_BUILTIN_COMPRESSPS512,
28313 IX86_BUILTIN_COMPRESSPSSTORE512,
28314 IX86_BUILTIN_CVTDQ2PD512,
28315 IX86_BUILTIN_CVTDQ2PS512,
28316 IX86_BUILTIN_CVTPD2DQ512,
28317 IX86_BUILTIN_CVTPD2PS512,
28318 IX86_BUILTIN_CVTPD2UDQ512,
28319 IX86_BUILTIN_CVTPH2PS512,
28320 IX86_BUILTIN_CVTPS2DQ512,
28321 IX86_BUILTIN_CVTPS2PD512,
28322 IX86_BUILTIN_CVTPS2PH512,
28323 IX86_BUILTIN_CVTPS2UDQ512,
28324 IX86_BUILTIN_CVTSD2SS_ROUND,
28325 IX86_BUILTIN_CVTSI2SD64,
28326 IX86_BUILTIN_CVTSI2SS32,
28327 IX86_BUILTIN_CVTSI2SS64,
28328 IX86_BUILTIN_CVTSS2SD_ROUND,
28329 IX86_BUILTIN_CVTTPD2DQ512,
28330 IX86_BUILTIN_CVTTPD2UDQ512,
28331 IX86_BUILTIN_CVTTPS2DQ512,
28332 IX86_BUILTIN_CVTTPS2UDQ512,
28333 IX86_BUILTIN_CVTUDQ2PD512,
28334 IX86_BUILTIN_CVTUDQ2PS512,
28335 IX86_BUILTIN_CVTUSI2SD32,
28336 IX86_BUILTIN_CVTUSI2SD64,
28337 IX86_BUILTIN_CVTUSI2SS32,
28338 IX86_BUILTIN_CVTUSI2SS64,
28339 IX86_BUILTIN_DIVPD512,
28340 IX86_BUILTIN_DIVPS512,
28341 IX86_BUILTIN_DIVSD_ROUND,
28342 IX86_BUILTIN_DIVSS_ROUND,
28343 IX86_BUILTIN_EXPANDPD512,
28344 IX86_BUILTIN_EXPANDPD512Z,
28345 IX86_BUILTIN_EXPANDPDLOAD512,
28346 IX86_BUILTIN_EXPANDPDLOAD512Z,
28347 IX86_BUILTIN_EXPANDPS512,
28348 IX86_BUILTIN_EXPANDPS512Z,
28349 IX86_BUILTIN_EXPANDPSLOAD512,
28350 IX86_BUILTIN_EXPANDPSLOAD512Z,
28351 IX86_BUILTIN_EXTRACTF32X4,
28352 IX86_BUILTIN_EXTRACTF64X4,
28353 IX86_BUILTIN_EXTRACTI32X4,
28354 IX86_BUILTIN_EXTRACTI64X4,
28355 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28356 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28357 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28358 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28359 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28360 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28361 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28362 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28363 IX86_BUILTIN_GETEXPPD512,
28364 IX86_BUILTIN_GETEXPPS512,
28365 IX86_BUILTIN_GETEXPSD128,
28366 IX86_BUILTIN_GETEXPSS128,
28367 IX86_BUILTIN_GETMANTPD512,
28368 IX86_BUILTIN_GETMANTPS512,
28369 IX86_BUILTIN_GETMANTSD128,
28370 IX86_BUILTIN_GETMANTSS128,
28371 IX86_BUILTIN_INSERTF32X4,
28372 IX86_BUILTIN_INSERTF64X4,
28373 IX86_BUILTIN_INSERTI32X4,
28374 IX86_BUILTIN_INSERTI64X4,
28375 IX86_BUILTIN_LOADAPD512,
28376 IX86_BUILTIN_LOADAPS512,
28377 IX86_BUILTIN_LOADDQUDI512,
28378 IX86_BUILTIN_LOADDQUSI512,
28379 IX86_BUILTIN_LOADUPD512,
28380 IX86_BUILTIN_LOADUPS512,
28381 IX86_BUILTIN_MAXPD512,
28382 IX86_BUILTIN_MAXPS512,
28383 IX86_BUILTIN_MAXSD_ROUND,
28384 IX86_BUILTIN_MAXSS_ROUND,
28385 IX86_BUILTIN_MINPD512,
28386 IX86_BUILTIN_MINPS512,
28387 IX86_BUILTIN_MINSD_ROUND,
28388 IX86_BUILTIN_MINSS_ROUND,
28389 IX86_BUILTIN_MOVAPD512,
28390 IX86_BUILTIN_MOVAPS512,
28391 IX86_BUILTIN_MOVDDUP512,
28392 IX86_BUILTIN_MOVDQA32LOAD512,
28393 IX86_BUILTIN_MOVDQA32STORE512,
28394 IX86_BUILTIN_MOVDQA32_512,
28395 IX86_BUILTIN_MOVDQA64LOAD512,
28396 IX86_BUILTIN_MOVDQA64STORE512,
28397 IX86_BUILTIN_MOVDQA64_512,
28398 IX86_BUILTIN_MOVNTDQ512,
28399 IX86_BUILTIN_MOVNTDQA512,
28400 IX86_BUILTIN_MOVNTPD512,
28401 IX86_BUILTIN_MOVNTPS512,
28402 IX86_BUILTIN_MOVSHDUP512,
28403 IX86_BUILTIN_MOVSLDUP512,
28404 IX86_BUILTIN_MULPD512,
28405 IX86_BUILTIN_MULPS512,
28406 IX86_BUILTIN_MULSD_ROUND,
28407 IX86_BUILTIN_MULSS_ROUND,
28408 IX86_BUILTIN_PABSD512,
28409 IX86_BUILTIN_PABSQ512,
28410 IX86_BUILTIN_PADDD512,
28411 IX86_BUILTIN_PADDQ512,
28412 IX86_BUILTIN_PANDD512,
28413 IX86_BUILTIN_PANDND512,
28414 IX86_BUILTIN_PANDNQ512,
28415 IX86_BUILTIN_PANDQ512,
28416 IX86_BUILTIN_PBROADCASTD512,
28417 IX86_BUILTIN_PBROADCASTD512_GPR,
28418 IX86_BUILTIN_PBROADCASTMB512,
28419 IX86_BUILTIN_PBROADCASTMW512,
28420 IX86_BUILTIN_PBROADCASTQ512,
28421 IX86_BUILTIN_PBROADCASTQ512_GPR,
28422 IX86_BUILTIN_PBROADCASTQ512_MEM,
28423 IX86_BUILTIN_PCMPEQD512_MASK,
28424 IX86_BUILTIN_PCMPEQQ512_MASK,
28425 IX86_BUILTIN_PCMPGTD512_MASK,
28426 IX86_BUILTIN_PCMPGTQ512_MASK,
28427 IX86_BUILTIN_PCOMPRESSD512,
28428 IX86_BUILTIN_PCOMPRESSDSTORE512,
28429 IX86_BUILTIN_PCOMPRESSQ512,
28430 IX86_BUILTIN_PCOMPRESSQSTORE512,
28431 IX86_BUILTIN_PEXPANDD512,
28432 IX86_BUILTIN_PEXPANDD512Z,
28433 IX86_BUILTIN_PEXPANDDLOAD512,
28434 IX86_BUILTIN_PEXPANDDLOAD512Z,
28435 IX86_BUILTIN_PEXPANDQ512,
28436 IX86_BUILTIN_PEXPANDQ512Z,
28437 IX86_BUILTIN_PEXPANDQLOAD512,
28438 IX86_BUILTIN_PEXPANDQLOAD512Z,
28439 IX86_BUILTIN_PMAXSD512,
28440 IX86_BUILTIN_PMAXSQ512,
28441 IX86_BUILTIN_PMAXUD512,
28442 IX86_BUILTIN_PMAXUQ512,
28443 IX86_BUILTIN_PMINSD512,
28444 IX86_BUILTIN_PMINSQ512,
28445 IX86_BUILTIN_PMINUD512,
28446 IX86_BUILTIN_PMINUQ512,
28447 IX86_BUILTIN_PMOVDB512,
28448 IX86_BUILTIN_PMOVDB512_MEM,
28449 IX86_BUILTIN_PMOVDW512,
28450 IX86_BUILTIN_PMOVDW512_MEM,
28451 IX86_BUILTIN_PMOVQB512,
28452 IX86_BUILTIN_PMOVQB512_MEM,
28453 IX86_BUILTIN_PMOVQD512,
28454 IX86_BUILTIN_PMOVQD512_MEM,
28455 IX86_BUILTIN_PMOVQW512,
28456 IX86_BUILTIN_PMOVQW512_MEM,
28457 IX86_BUILTIN_PMOVSDB512,
28458 IX86_BUILTIN_PMOVSDB512_MEM,
28459 IX86_BUILTIN_PMOVSDW512,
28460 IX86_BUILTIN_PMOVSDW512_MEM,
28461 IX86_BUILTIN_PMOVSQB512,
28462 IX86_BUILTIN_PMOVSQB512_MEM,
28463 IX86_BUILTIN_PMOVSQD512,
28464 IX86_BUILTIN_PMOVSQD512_MEM,
28465 IX86_BUILTIN_PMOVSQW512,
28466 IX86_BUILTIN_PMOVSQW512_MEM,
28467 IX86_BUILTIN_PMOVSXBD512,
28468 IX86_BUILTIN_PMOVSXBQ512,
28469 IX86_BUILTIN_PMOVSXDQ512,
28470 IX86_BUILTIN_PMOVSXWD512,
28471 IX86_BUILTIN_PMOVSXWQ512,
28472 IX86_BUILTIN_PMOVUSDB512,
28473 IX86_BUILTIN_PMOVUSDB512_MEM,
28474 IX86_BUILTIN_PMOVUSDW512,
28475 IX86_BUILTIN_PMOVUSDW512_MEM,
28476 IX86_BUILTIN_PMOVUSQB512,
28477 IX86_BUILTIN_PMOVUSQB512_MEM,
28478 IX86_BUILTIN_PMOVUSQD512,
28479 IX86_BUILTIN_PMOVUSQD512_MEM,
28480 IX86_BUILTIN_PMOVUSQW512,
28481 IX86_BUILTIN_PMOVUSQW512_MEM,
28482 IX86_BUILTIN_PMOVZXBD512,
28483 IX86_BUILTIN_PMOVZXBQ512,
28484 IX86_BUILTIN_PMOVZXDQ512,
28485 IX86_BUILTIN_PMOVZXWD512,
28486 IX86_BUILTIN_PMOVZXWQ512,
28487 IX86_BUILTIN_PMULDQ512,
28488 IX86_BUILTIN_PMULLD512,
28489 IX86_BUILTIN_PMULUDQ512,
28490 IX86_BUILTIN_PORD512,
28491 IX86_BUILTIN_PORQ512,
28492 IX86_BUILTIN_PROLD512,
28493 IX86_BUILTIN_PROLQ512,
28494 IX86_BUILTIN_PROLVD512,
28495 IX86_BUILTIN_PROLVQ512,
28496 IX86_BUILTIN_PRORD512,
28497 IX86_BUILTIN_PRORQ512,
28498 IX86_BUILTIN_PRORVD512,
28499 IX86_BUILTIN_PRORVQ512,
28500 IX86_BUILTIN_PSHUFD512,
28501 IX86_BUILTIN_PSLLD512,
28502 IX86_BUILTIN_PSLLDI512,
28503 IX86_BUILTIN_PSLLQ512,
28504 IX86_BUILTIN_PSLLQI512,
28505 IX86_BUILTIN_PSLLVV16SI,
28506 IX86_BUILTIN_PSLLVV8DI,
28507 IX86_BUILTIN_PSRAD512,
28508 IX86_BUILTIN_PSRADI512,
28509 IX86_BUILTIN_PSRAQ512,
28510 IX86_BUILTIN_PSRAQI512,
28511 IX86_BUILTIN_PSRAVV16SI,
28512 IX86_BUILTIN_PSRAVV8DI,
28513 IX86_BUILTIN_PSRLD512,
28514 IX86_BUILTIN_PSRLDI512,
28515 IX86_BUILTIN_PSRLQ512,
28516 IX86_BUILTIN_PSRLQI512,
28517 IX86_BUILTIN_PSRLVV16SI,
28518 IX86_BUILTIN_PSRLVV8DI,
28519 IX86_BUILTIN_PSUBD512,
28520 IX86_BUILTIN_PSUBQ512,
28521 IX86_BUILTIN_PTESTMD512,
28522 IX86_BUILTIN_PTESTMQ512,
28523 IX86_BUILTIN_PTESTNMD512,
28524 IX86_BUILTIN_PTESTNMQ512,
28525 IX86_BUILTIN_PUNPCKHDQ512,
28526 IX86_BUILTIN_PUNPCKHQDQ512,
28527 IX86_BUILTIN_PUNPCKLDQ512,
28528 IX86_BUILTIN_PUNPCKLQDQ512,
28529 IX86_BUILTIN_PXORD512,
28530 IX86_BUILTIN_PXORQ512,
28531 IX86_BUILTIN_RCP14PD512,
28532 IX86_BUILTIN_RCP14PS512,
28533 IX86_BUILTIN_RCP14SD,
28534 IX86_BUILTIN_RCP14SS,
28535 IX86_BUILTIN_RNDSCALEPD,
28536 IX86_BUILTIN_RNDSCALEPS,
28537 IX86_BUILTIN_RNDSCALESD,
28538 IX86_BUILTIN_RNDSCALESS,
28539 IX86_BUILTIN_RSQRT14PD512,
28540 IX86_BUILTIN_RSQRT14PS512,
28541 IX86_BUILTIN_RSQRT14SD,
28542 IX86_BUILTIN_RSQRT14SS,
28543 IX86_BUILTIN_SCALEFPD512,
28544 IX86_BUILTIN_SCALEFPS512,
28545 IX86_BUILTIN_SCALEFSD,
28546 IX86_BUILTIN_SCALEFSS,
28547 IX86_BUILTIN_SHUFPD512,
28548 IX86_BUILTIN_SHUFPS512,
28549 IX86_BUILTIN_SHUF_F32x4,
28550 IX86_BUILTIN_SHUF_F64x2,
28551 IX86_BUILTIN_SHUF_I32x4,
28552 IX86_BUILTIN_SHUF_I64x2,
28553 IX86_BUILTIN_SQRTPD512,
28554 IX86_BUILTIN_SQRTPD512_MASK,
28555 IX86_BUILTIN_SQRTPS512_MASK,
28556 IX86_BUILTIN_SQRTPS_NR512,
28557 IX86_BUILTIN_SQRTSD_ROUND,
28558 IX86_BUILTIN_SQRTSS_ROUND,
28559 IX86_BUILTIN_STOREAPD512,
28560 IX86_BUILTIN_STOREAPS512,
28561 IX86_BUILTIN_STOREDQUDI512,
28562 IX86_BUILTIN_STOREDQUSI512,
28563 IX86_BUILTIN_STOREUPD512,
28564 IX86_BUILTIN_STOREUPS512,
28565 IX86_BUILTIN_SUBPD512,
28566 IX86_BUILTIN_SUBPS512,
28567 IX86_BUILTIN_SUBSD_ROUND,
28568 IX86_BUILTIN_SUBSS_ROUND,
28569 IX86_BUILTIN_UCMPD512,
28570 IX86_BUILTIN_UCMPQ512,
28571 IX86_BUILTIN_UNPCKHPD512,
28572 IX86_BUILTIN_UNPCKHPS512,
28573 IX86_BUILTIN_UNPCKLPD512,
28574 IX86_BUILTIN_UNPCKLPS512,
28575 IX86_BUILTIN_VCVTSD2SI32,
28576 IX86_BUILTIN_VCVTSD2SI64,
28577 IX86_BUILTIN_VCVTSD2USI32,
28578 IX86_BUILTIN_VCVTSD2USI64,
28579 IX86_BUILTIN_VCVTSS2SI32,
28580 IX86_BUILTIN_VCVTSS2SI64,
28581 IX86_BUILTIN_VCVTSS2USI32,
28582 IX86_BUILTIN_VCVTSS2USI64,
28583 IX86_BUILTIN_VCVTTSD2SI32,
28584 IX86_BUILTIN_VCVTTSD2SI64,
28585 IX86_BUILTIN_VCVTTSD2USI32,
28586 IX86_BUILTIN_VCVTTSD2USI64,
28587 IX86_BUILTIN_VCVTTSS2SI32,
28588 IX86_BUILTIN_VCVTTSS2SI64,
28589 IX86_BUILTIN_VCVTTSS2USI32,
28590 IX86_BUILTIN_VCVTTSS2USI64,
28591 IX86_BUILTIN_VFMADDPD512_MASK,
28592 IX86_BUILTIN_VFMADDPD512_MASK3,
28593 IX86_BUILTIN_VFMADDPD512_MASKZ,
28594 IX86_BUILTIN_VFMADDPS512_MASK,
28595 IX86_BUILTIN_VFMADDPS512_MASK3,
28596 IX86_BUILTIN_VFMADDPS512_MASKZ,
28597 IX86_BUILTIN_VFMADDSD3_ROUND,
28598 IX86_BUILTIN_VFMADDSS3_ROUND,
28599 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28600 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28601 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28602 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28603 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28604 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28605 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28606 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28607 IX86_BUILTIN_VFMSUBPD512_MASK3,
28608 IX86_BUILTIN_VFMSUBPS512_MASK3,
28609 IX86_BUILTIN_VFMSUBSD3_MASK3,
28610 IX86_BUILTIN_VFMSUBSS3_MASK3,
28611 IX86_BUILTIN_VFNMADDPD512_MASK,
28612 IX86_BUILTIN_VFNMADDPS512_MASK,
28613 IX86_BUILTIN_VFNMSUBPD512_MASK,
28614 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28615 IX86_BUILTIN_VFNMSUBPS512_MASK,
28616 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28617 IX86_BUILTIN_VPCLZCNTD512,
28618 IX86_BUILTIN_VPCLZCNTQ512,
28619 IX86_BUILTIN_VPCONFLICTD512,
28620 IX86_BUILTIN_VPCONFLICTQ512,
28621 IX86_BUILTIN_VPERMDF512,
28622 IX86_BUILTIN_VPERMDI512,
28623 IX86_BUILTIN_VPERMI2VARD512,
28624 IX86_BUILTIN_VPERMI2VARPD512,
28625 IX86_BUILTIN_VPERMI2VARPS512,
28626 IX86_BUILTIN_VPERMI2VARQ512,
28627 IX86_BUILTIN_VPERMILPD512,
28628 IX86_BUILTIN_VPERMILPS512,
28629 IX86_BUILTIN_VPERMILVARPD512,
28630 IX86_BUILTIN_VPERMILVARPS512,
28631 IX86_BUILTIN_VPERMT2VARD512,
28632 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28633 IX86_BUILTIN_VPERMT2VARPD512,
28634 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28635 IX86_BUILTIN_VPERMT2VARPS512,
28636 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28637 IX86_BUILTIN_VPERMT2VARQ512,
28638 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28639 IX86_BUILTIN_VPERMVARDF512,
28640 IX86_BUILTIN_VPERMVARDI512,
28641 IX86_BUILTIN_VPERMVARSF512,
28642 IX86_BUILTIN_VPERMVARSI512,
28643 IX86_BUILTIN_VTERNLOGD512_MASK,
28644 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28645 IX86_BUILTIN_VTERNLOGQ512_MASK,
28646 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28647
28648 /* Mask arithmetic operations */
28649 IX86_BUILTIN_KAND16,
28650 IX86_BUILTIN_KANDN16,
28651 IX86_BUILTIN_KNOT16,
28652 IX86_BUILTIN_KOR16,
28653 IX86_BUILTIN_KORTESTC16,
28654 IX86_BUILTIN_KORTESTZ16,
28655 IX86_BUILTIN_KUNPCKBW,
28656 IX86_BUILTIN_KXNOR16,
28657 IX86_BUILTIN_KXOR16,
28658 IX86_BUILTIN_KMOV16,
28659
28660 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28661 where all operands are 32-byte or 64-byte wide respectively. */
28662 IX86_BUILTIN_GATHERALTSIV4DF,
28663 IX86_BUILTIN_GATHERALTDIV8SF,
28664 IX86_BUILTIN_GATHERALTSIV4DI,
28665 IX86_BUILTIN_GATHERALTDIV8SI,
28666 IX86_BUILTIN_GATHER3ALTDIV16SF,
28667 IX86_BUILTIN_GATHER3ALTDIV16SI,
28668 IX86_BUILTIN_GATHER3ALTSIV8DF,
28669 IX86_BUILTIN_GATHER3ALTSIV8DI,
28670 IX86_BUILTIN_GATHER3DIV16SF,
28671 IX86_BUILTIN_GATHER3DIV16SI,
28672 IX86_BUILTIN_GATHER3DIV8DF,
28673 IX86_BUILTIN_GATHER3DIV8DI,
28674 IX86_BUILTIN_GATHER3SIV16SF,
28675 IX86_BUILTIN_GATHER3SIV16SI,
28676 IX86_BUILTIN_GATHER3SIV8DF,
28677 IX86_BUILTIN_GATHER3SIV8DI,
28678 IX86_BUILTIN_SCATTERDIV16SF,
28679 IX86_BUILTIN_SCATTERDIV16SI,
28680 IX86_BUILTIN_SCATTERDIV8DF,
28681 IX86_BUILTIN_SCATTERDIV8DI,
28682 IX86_BUILTIN_SCATTERSIV16SF,
28683 IX86_BUILTIN_SCATTERSIV16SI,
28684 IX86_BUILTIN_SCATTERSIV8DF,
28685 IX86_BUILTIN_SCATTERSIV8DI,
28686
28687 /* AVX512PF */
28688 IX86_BUILTIN_GATHERPFQPD,
28689 IX86_BUILTIN_GATHERPFDPS,
28690 IX86_BUILTIN_GATHERPFDPD,
28691 IX86_BUILTIN_GATHERPFQPS,
28692 IX86_BUILTIN_SCATTERPFDPD,
28693 IX86_BUILTIN_SCATTERPFDPS,
28694 IX86_BUILTIN_SCATTERPFQPD,
28695 IX86_BUILTIN_SCATTERPFQPS,
28696
28697 /* AVX-512ER */
28698 IX86_BUILTIN_EXP2PD_MASK,
28699 IX86_BUILTIN_EXP2PS_MASK,
28700 IX86_BUILTIN_EXP2PS,
28701 IX86_BUILTIN_RCP28PD,
28702 IX86_BUILTIN_RCP28PS,
28703 IX86_BUILTIN_RCP28SD,
28704 IX86_BUILTIN_RCP28SS,
28705 IX86_BUILTIN_RSQRT28PD,
28706 IX86_BUILTIN_RSQRT28PS,
28707 IX86_BUILTIN_RSQRT28SD,
28708 IX86_BUILTIN_RSQRT28SS,
28709
28710 /* SHA builtins. */
28711 IX86_BUILTIN_SHA1MSG1,
28712 IX86_BUILTIN_SHA1MSG2,
28713 IX86_BUILTIN_SHA1NEXTE,
28714 IX86_BUILTIN_SHA1RNDS4,
28715 IX86_BUILTIN_SHA256MSG1,
28716 IX86_BUILTIN_SHA256MSG2,
28717 IX86_BUILTIN_SHA256RNDS2,
28718
28719 /* CLFLUSHOPT instructions. */
28720 IX86_BUILTIN_CLFLUSHOPT,
28721
28722 /* TFmode support builtins. */
28723 IX86_BUILTIN_INFQ,
28724 IX86_BUILTIN_HUGE_VALQ,
28725 IX86_BUILTIN_FABSQ,
28726 IX86_BUILTIN_COPYSIGNQ,
28727
28728 /* Vectorizer support builtins. */
28729 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28730 IX86_BUILTIN_CPYSGNPS,
28731 IX86_BUILTIN_CPYSGNPD,
28732 IX86_BUILTIN_CPYSGNPS256,
28733 IX86_BUILTIN_CPYSGNPS512,
28734 IX86_BUILTIN_CPYSGNPD256,
28735 IX86_BUILTIN_CPYSGNPD512,
28736 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28737 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28738
28739
28740 /* FMA4 instructions. */
28741 IX86_BUILTIN_VFMADDSS,
28742 IX86_BUILTIN_VFMADDSD,
28743 IX86_BUILTIN_VFMADDPS,
28744 IX86_BUILTIN_VFMADDPD,
28745 IX86_BUILTIN_VFMADDPS256,
28746 IX86_BUILTIN_VFMADDPD256,
28747 IX86_BUILTIN_VFMADDSUBPS,
28748 IX86_BUILTIN_VFMADDSUBPD,
28749 IX86_BUILTIN_VFMADDSUBPS256,
28750 IX86_BUILTIN_VFMADDSUBPD256,
28751
28752 /* FMA3 instructions. */
28753 IX86_BUILTIN_VFMADDSS3,
28754 IX86_BUILTIN_VFMADDSD3,
28755
28756 /* XOP instructions. */
28757 IX86_BUILTIN_VPCMOV,
28758 IX86_BUILTIN_VPCMOV_V2DI,
28759 IX86_BUILTIN_VPCMOV_V4SI,
28760 IX86_BUILTIN_VPCMOV_V8HI,
28761 IX86_BUILTIN_VPCMOV_V16QI,
28762 IX86_BUILTIN_VPCMOV_V4SF,
28763 IX86_BUILTIN_VPCMOV_V2DF,
28764 IX86_BUILTIN_VPCMOV256,
28765 IX86_BUILTIN_VPCMOV_V4DI256,
28766 IX86_BUILTIN_VPCMOV_V8SI256,
28767 IX86_BUILTIN_VPCMOV_V16HI256,
28768 IX86_BUILTIN_VPCMOV_V32QI256,
28769 IX86_BUILTIN_VPCMOV_V8SF256,
28770 IX86_BUILTIN_VPCMOV_V4DF256,
28771
28772 IX86_BUILTIN_VPPERM,
28773
28774 IX86_BUILTIN_VPMACSSWW,
28775 IX86_BUILTIN_VPMACSWW,
28776 IX86_BUILTIN_VPMACSSWD,
28777 IX86_BUILTIN_VPMACSWD,
28778 IX86_BUILTIN_VPMACSSDD,
28779 IX86_BUILTIN_VPMACSDD,
28780 IX86_BUILTIN_VPMACSSDQL,
28781 IX86_BUILTIN_VPMACSSDQH,
28782 IX86_BUILTIN_VPMACSDQL,
28783 IX86_BUILTIN_VPMACSDQH,
28784 IX86_BUILTIN_VPMADCSSWD,
28785 IX86_BUILTIN_VPMADCSWD,
28786
28787 IX86_BUILTIN_VPHADDBW,
28788 IX86_BUILTIN_VPHADDBD,
28789 IX86_BUILTIN_VPHADDBQ,
28790 IX86_BUILTIN_VPHADDWD,
28791 IX86_BUILTIN_VPHADDWQ,
28792 IX86_BUILTIN_VPHADDDQ,
28793 IX86_BUILTIN_VPHADDUBW,
28794 IX86_BUILTIN_VPHADDUBD,
28795 IX86_BUILTIN_VPHADDUBQ,
28796 IX86_BUILTIN_VPHADDUWD,
28797 IX86_BUILTIN_VPHADDUWQ,
28798 IX86_BUILTIN_VPHADDUDQ,
28799 IX86_BUILTIN_VPHSUBBW,
28800 IX86_BUILTIN_VPHSUBWD,
28801 IX86_BUILTIN_VPHSUBDQ,
28802
28803 IX86_BUILTIN_VPROTB,
28804 IX86_BUILTIN_VPROTW,
28805 IX86_BUILTIN_VPROTD,
28806 IX86_BUILTIN_VPROTQ,
28807 IX86_BUILTIN_VPROTB_IMM,
28808 IX86_BUILTIN_VPROTW_IMM,
28809 IX86_BUILTIN_VPROTD_IMM,
28810 IX86_BUILTIN_VPROTQ_IMM,
28811
28812 IX86_BUILTIN_VPSHLB,
28813 IX86_BUILTIN_VPSHLW,
28814 IX86_BUILTIN_VPSHLD,
28815 IX86_BUILTIN_VPSHLQ,
28816 IX86_BUILTIN_VPSHAB,
28817 IX86_BUILTIN_VPSHAW,
28818 IX86_BUILTIN_VPSHAD,
28819 IX86_BUILTIN_VPSHAQ,
28820
28821 IX86_BUILTIN_VFRCZSS,
28822 IX86_BUILTIN_VFRCZSD,
28823 IX86_BUILTIN_VFRCZPS,
28824 IX86_BUILTIN_VFRCZPD,
28825 IX86_BUILTIN_VFRCZPS256,
28826 IX86_BUILTIN_VFRCZPD256,
28827
28828 IX86_BUILTIN_VPCOMEQUB,
28829 IX86_BUILTIN_VPCOMNEUB,
28830 IX86_BUILTIN_VPCOMLTUB,
28831 IX86_BUILTIN_VPCOMLEUB,
28832 IX86_BUILTIN_VPCOMGTUB,
28833 IX86_BUILTIN_VPCOMGEUB,
28834 IX86_BUILTIN_VPCOMFALSEUB,
28835 IX86_BUILTIN_VPCOMTRUEUB,
28836
28837 IX86_BUILTIN_VPCOMEQUW,
28838 IX86_BUILTIN_VPCOMNEUW,
28839 IX86_BUILTIN_VPCOMLTUW,
28840 IX86_BUILTIN_VPCOMLEUW,
28841 IX86_BUILTIN_VPCOMGTUW,
28842 IX86_BUILTIN_VPCOMGEUW,
28843 IX86_BUILTIN_VPCOMFALSEUW,
28844 IX86_BUILTIN_VPCOMTRUEUW,
28845
28846 IX86_BUILTIN_VPCOMEQUD,
28847 IX86_BUILTIN_VPCOMNEUD,
28848 IX86_BUILTIN_VPCOMLTUD,
28849 IX86_BUILTIN_VPCOMLEUD,
28850 IX86_BUILTIN_VPCOMGTUD,
28851 IX86_BUILTIN_VPCOMGEUD,
28852 IX86_BUILTIN_VPCOMFALSEUD,
28853 IX86_BUILTIN_VPCOMTRUEUD,
28854
28855 IX86_BUILTIN_VPCOMEQUQ,
28856 IX86_BUILTIN_VPCOMNEUQ,
28857 IX86_BUILTIN_VPCOMLTUQ,
28858 IX86_BUILTIN_VPCOMLEUQ,
28859 IX86_BUILTIN_VPCOMGTUQ,
28860 IX86_BUILTIN_VPCOMGEUQ,
28861 IX86_BUILTIN_VPCOMFALSEUQ,
28862 IX86_BUILTIN_VPCOMTRUEUQ,
28863
28864 IX86_BUILTIN_VPCOMEQB,
28865 IX86_BUILTIN_VPCOMNEB,
28866 IX86_BUILTIN_VPCOMLTB,
28867 IX86_BUILTIN_VPCOMLEB,
28868 IX86_BUILTIN_VPCOMGTB,
28869 IX86_BUILTIN_VPCOMGEB,
28870 IX86_BUILTIN_VPCOMFALSEB,
28871 IX86_BUILTIN_VPCOMTRUEB,
28872
28873 IX86_BUILTIN_VPCOMEQW,
28874 IX86_BUILTIN_VPCOMNEW,
28875 IX86_BUILTIN_VPCOMLTW,
28876 IX86_BUILTIN_VPCOMLEW,
28877 IX86_BUILTIN_VPCOMGTW,
28878 IX86_BUILTIN_VPCOMGEW,
28879 IX86_BUILTIN_VPCOMFALSEW,
28880 IX86_BUILTIN_VPCOMTRUEW,
28881
28882 IX86_BUILTIN_VPCOMEQD,
28883 IX86_BUILTIN_VPCOMNED,
28884 IX86_BUILTIN_VPCOMLTD,
28885 IX86_BUILTIN_VPCOMLED,
28886 IX86_BUILTIN_VPCOMGTD,
28887 IX86_BUILTIN_VPCOMGED,
28888 IX86_BUILTIN_VPCOMFALSED,
28889 IX86_BUILTIN_VPCOMTRUED,
28890
28891 IX86_BUILTIN_VPCOMEQQ,
28892 IX86_BUILTIN_VPCOMNEQ,
28893 IX86_BUILTIN_VPCOMLTQ,
28894 IX86_BUILTIN_VPCOMLEQ,
28895 IX86_BUILTIN_VPCOMGTQ,
28896 IX86_BUILTIN_VPCOMGEQ,
28897 IX86_BUILTIN_VPCOMFALSEQ,
28898 IX86_BUILTIN_VPCOMTRUEQ,
28899
28900 /* LWP instructions. */
28901 IX86_BUILTIN_LLWPCB,
28902 IX86_BUILTIN_SLWPCB,
28903 IX86_BUILTIN_LWPVAL32,
28904 IX86_BUILTIN_LWPVAL64,
28905 IX86_BUILTIN_LWPINS32,
28906 IX86_BUILTIN_LWPINS64,
28907
28908 IX86_BUILTIN_CLZS,
28909
28910 /* RTM */
28911 IX86_BUILTIN_XBEGIN,
28912 IX86_BUILTIN_XEND,
28913 IX86_BUILTIN_XABORT,
28914 IX86_BUILTIN_XTEST,
28915
28916 /* BMI instructions. */
28917 IX86_BUILTIN_BEXTR32,
28918 IX86_BUILTIN_BEXTR64,
28919 IX86_BUILTIN_CTZS,
28920
28921 /* TBM instructions. */
28922 IX86_BUILTIN_BEXTRI32,
28923 IX86_BUILTIN_BEXTRI64,
28924
28925 /* BMI2 instructions. */
28926 IX86_BUILTIN_BZHI32,
28927 IX86_BUILTIN_BZHI64,
28928 IX86_BUILTIN_PDEP32,
28929 IX86_BUILTIN_PDEP64,
28930 IX86_BUILTIN_PEXT32,
28931 IX86_BUILTIN_PEXT64,
28932
28933 /* ADX instructions. */
28934 IX86_BUILTIN_ADDCARRYX32,
28935 IX86_BUILTIN_ADDCARRYX64,
28936
28937 /* SBB instructions. */
28938 IX86_BUILTIN_SBB32,
28939 IX86_BUILTIN_SBB64,
28940
28941 /* FSGSBASE instructions. */
28942 IX86_BUILTIN_RDFSBASE32,
28943 IX86_BUILTIN_RDFSBASE64,
28944 IX86_BUILTIN_RDGSBASE32,
28945 IX86_BUILTIN_RDGSBASE64,
28946 IX86_BUILTIN_WRFSBASE32,
28947 IX86_BUILTIN_WRFSBASE64,
28948 IX86_BUILTIN_WRGSBASE32,
28949 IX86_BUILTIN_WRGSBASE64,
28950
28951 /* RDRND instructions. */
28952 IX86_BUILTIN_RDRAND16_STEP,
28953 IX86_BUILTIN_RDRAND32_STEP,
28954 IX86_BUILTIN_RDRAND64_STEP,
28955
28956 /* RDSEED instructions. */
28957 IX86_BUILTIN_RDSEED16_STEP,
28958 IX86_BUILTIN_RDSEED32_STEP,
28959 IX86_BUILTIN_RDSEED64_STEP,
28960
28961 /* F16C instructions. */
28962 IX86_BUILTIN_CVTPH2PS,
28963 IX86_BUILTIN_CVTPH2PS256,
28964 IX86_BUILTIN_CVTPS2PH,
28965 IX86_BUILTIN_CVTPS2PH256,
28966
28967 /* CFString built-in for darwin */
28968 IX86_BUILTIN_CFSTRING,
28969
28970 /* Builtins to get CPU type and supported features. */
28971 IX86_BUILTIN_CPU_INIT,
28972 IX86_BUILTIN_CPU_IS,
28973 IX86_BUILTIN_CPU_SUPPORTS,
28974
28975 /* Read/write FLAGS register built-ins. */
28976 IX86_BUILTIN_READ_FLAGS,
28977 IX86_BUILTIN_WRITE_FLAGS,
28978
28979 IX86_BUILTIN_MAX
28980 };
28981
28982 /* Table for the ix86 builtin decls. */
28983 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28984
28985 /* Table of all of the builtin functions that are possible with different ISA's
28986 but are waiting to be built until a function is declared to use that
28987 ISA. */
28988 struct builtin_isa {
28989 const char *name; /* function name */
28990 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28991 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28992 bool const_p; /* true if the declaration is constant */
28993 bool set_and_not_built_p;
28994 };
28995
28996 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28997
28998
28999 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
29000 of which isa_flags to use in the ix86_builtins_isa array. Stores the
29001 function decl in the ix86_builtins array. Returns the function decl or
29002 NULL_TREE, if the builtin was not added.
29003
29004 If the front end has a special hook for builtin functions, delay adding
29005 builtin functions that aren't in the current ISA until the ISA is changed
29006 with function specific optimization. Doing so, can save about 300K for the
29007 default compiler. When the builtin is expanded, check at that time whether
29008 it is valid.
29009
29010 If the front end doesn't have a special hook, record all builtins, even if
29011 it isn't an instruction set in the current ISA in case the user uses
29012 function specific options for a different ISA, so that we don't get scope
29013 errors if a builtin is added in the middle of a function scope. */
29014
29015 static inline tree
29016 def_builtin (HOST_WIDE_INT mask, const char *name,
29017 enum ix86_builtin_func_type tcode,
29018 enum ix86_builtins code)
29019 {
29020 tree decl = NULL_TREE;
29021
29022 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
29023 {
29024 ix86_builtins_isa[(int) code].isa = mask;
29025
29026 mask &= ~OPTION_MASK_ISA_64BIT;
29027 if (mask == 0
29028 || (mask & ix86_isa_flags) != 0
29029 || (lang_hooks.builtin_function
29030 == lang_hooks.builtin_function_ext_scope))
29031
29032 {
29033 tree type = ix86_get_builtin_func_type (tcode);
29034 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
29035 NULL, NULL_TREE);
29036 ix86_builtins[(int) code] = decl;
29037 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
29038 }
29039 else
29040 {
29041 ix86_builtins[(int) code] = NULL_TREE;
29042 ix86_builtins_isa[(int) code].tcode = tcode;
29043 ix86_builtins_isa[(int) code].name = name;
29044 ix86_builtins_isa[(int) code].const_p = false;
29045 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
29046 }
29047 }
29048
29049 return decl;
29050 }
29051
29052 /* Like def_builtin, but also marks the function decl "const". */
29053
29054 static inline tree
29055 def_builtin_const (HOST_WIDE_INT mask, const char *name,
29056 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
29057 {
29058 tree decl = def_builtin (mask, name, tcode, code);
29059 if (decl)
29060 TREE_READONLY (decl) = 1;
29061 else
29062 ix86_builtins_isa[(int) code].const_p = true;
29063
29064 return decl;
29065 }
29066
29067 /* Add any new builtin functions for a given ISA that may not have been
29068 declared. This saves a bit of space compared to adding all of the
29069 declarations to the tree, even if we didn't use them. */
29070
29071 static void
29072 ix86_add_new_builtins (HOST_WIDE_INT isa)
29073 {
29074 int i;
29075
29076 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
29077 {
29078 if ((ix86_builtins_isa[i].isa & isa) != 0
29079 && ix86_builtins_isa[i].set_and_not_built_p)
29080 {
29081 tree decl, type;
29082
29083 /* Don't define the builtin again. */
29084 ix86_builtins_isa[i].set_and_not_built_p = false;
29085
29086 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
29087 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
29088 type, i, BUILT_IN_MD, NULL,
29089 NULL_TREE);
29090
29091 ix86_builtins[i] = decl;
29092 if (ix86_builtins_isa[i].const_p)
29093 TREE_READONLY (decl) = 1;
29094 }
29095 }
29096 }
29097
29098 /* Bits for builtin_description.flag. */
29099
29100 /* Set when we don't support the comparison natively, and should
29101 swap_comparison in order to support it. */
29102 #define BUILTIN_DESC_SWAP_OPERANDS 1
29103
29104 struct builtin_description
29105 {
29106 const HOST_WIDE_INT mask;
29107 const enum insn_code icode;
29108 const char *const name;
29109 const enum ix86_builtins code;
29110 const enum rtx_code comparison;
29111 const int flag;
29112 };
29113
29114 static const struct builtin_description bdesc_comi[] =
29115 {
29116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
29117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
29118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
29119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
29120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
29121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
29122 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
29123 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
29124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
29125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
29126 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
29127 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
29128 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
29129 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
29130 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
29131 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
29132 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
29133 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
29134 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
29135 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
29136 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
29137 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
29138 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
29139 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
29140 };
29141
29142 static const struct builtin_description bdesc_pcmpestr[] =
29143 {
29144 /* SSE4.2 */
29145 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
29146 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
29147 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
29148 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29149 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29150 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29151 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29152 };
29153
29154 static const struct builtin_description bdesc_pcmpistr[] =
29155 {
29156 /* SSE4.2 */
29157 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29158 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29159 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29160 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29161 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29162 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29163 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29164 };
29165
29166 /* Special builtins with variable number of arguments. */
29167 static const struct builtin_description bdesc_special_args[] =
29168 {
29169 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29170 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29171 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29172
29173 /* 80387 (for use internally for atomic compound assignment). */
29174 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29175 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29176 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
29177 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29178
29179 /* MMX */
29180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29181
29182 /* 3DNow! */
29183 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29184
29185 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29186 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29187 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29188 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29189 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29190 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29191 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29192 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29193 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29194
29195 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29196 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29197 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29198 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29199 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29200 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29201 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29202 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29203
29204 /* SSE */
29205 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29206 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29207 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29208
29209 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29210 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29211 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29212 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29213
29214 /* SSE or 3DNow!A */
29215 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29216 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29217
29218 /* SSE2 */
29219 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29226 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29229
29230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29232
29233 /* SSE3 */
29234 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29235
29236 /* SSE4.1 */
29237 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29238
29239 /* SSE4A */
29240 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29241 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29242
29243 /* AVX */
29244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29246
29247 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29248 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29249 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29252
29253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29260
29261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29264
29265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29273
29274 /* AVX2 */
29275 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29276 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29277 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29278 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29279 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29280 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29281 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29282 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29283 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29284
29285 /* AVX512F */
29286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29333
29334 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29335 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29336 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29337 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29338 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29339 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29340
29341 /* FSGSBASE */
29342 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29343 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29344 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29345 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29346 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29347 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29348 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29349 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29350
29351 /* RTM */
29352 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29353 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29354 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29355 };
29356
29357 /* Builtins with variable number of arguments. */
29358 static const struct builtin_description bdesc_args[] =
29359 {
29360 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29361 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29362 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29363 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29364 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29365 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29366 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29367
29368 /* MMX */
29369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29372 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29375
29376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29379 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29383 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29384
29385 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29387
29388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29392
29393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29399
29400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29404 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29405 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29406
29407 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29408 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29409 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29410
29411 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29412
29413 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29414 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29415 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29416 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29417 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29418 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29419
29420 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29421 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29422 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29424 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29425 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29426
29427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29429 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29430 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29431
29432 /* 3DNow! */
29433 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29434 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29435 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29436 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29437
29438 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29439 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29440 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29441 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29442 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29443 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29444 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29445 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29446 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29447 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29448 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29449 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29450 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29451 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29452 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29453
29454 /* 3DNow!A */
29455 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29456 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29457 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29458 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29459 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29460 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29461
29462 /* SSE */
29463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29465 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29467 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29471 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29474 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29475
29476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29477
29478 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29479 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29480 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29486
29487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29498 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29507
29508 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29509 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29512
29513 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29515 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29516 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29517
29518 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29519
29520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29523 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29524 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29525
29526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29528 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29529
29530 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29531
29532 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29533 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29534 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29535
29536 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29537 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29538
29539 /* SSE MMX or 3Dnow!A */
29540 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29541 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29542 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29543
29544 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29545 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29546 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29547 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29548
29549 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29550 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29551
29552 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29553
29554 /* SSE2 */
29555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29556
29557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29561 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29562
29563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29568
29569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29570
29571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29573 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29574 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29575
29576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29578 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29579
29580 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29581 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29582 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29583 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29588
29589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29609
29610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29611 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29613 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29614
29615 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29617 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29618 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29619
29620 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29621
29622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29623 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29624 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29625
29626 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29627
29628 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29629 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29630 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29631 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29632 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29633 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29634 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29635 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29636
29637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29645
29646 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29647 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29648
29649 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29651 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29652 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29653
29654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29656
29657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29662 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29663
29664 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29665 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29666 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29668
29669 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29670 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29671 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29672 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29673 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29674 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29675 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29676 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29677
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29679 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29681
29682 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29684
29685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29686 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29687
29688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29689
29690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29691 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29694
29695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29696 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29697 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29698 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29699 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29700 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29701 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29702
29703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29704 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29705 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29706 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29707 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29708 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29709 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29710
29711 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29712 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29713 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29714 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29715
29716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29719
29720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29721
29722 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29723
29724 /* SSE2 MMX */
29725 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29726 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29727
29728 /* SSE3 */
29729 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29730 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29731
29732 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29733 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29734 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29735 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29736 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29737 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29738
29739 /* SSSE3 */
29740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29741 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29742 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29743 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29744 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29745 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29746
29747 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29748 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29749 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29750 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29751 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29752 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29753 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29754 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29755 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29756 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29757 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29758 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29759 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29760 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29761 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29762 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29763 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29764 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29765 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29766 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29767 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29768 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29769 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29770 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29771
29772 /* SSSE3. */
29773 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29774 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29775
29776 /* SSE4.1 */
29777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29780 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29781 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29782 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29783 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29784 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29785 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29786 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29787
29788 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29789 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29790 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29791 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29792 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29793 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29794 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29795 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29796 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29797 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29798 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29799 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29800 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29801
29802 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29803 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29804 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29805 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29806 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29807 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29808 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29809 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29810 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29811 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29812 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29813 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29814
29815 /* SSE4.1 */
29816 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29817 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29818 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29819 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29820
29821 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29822 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29823 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29824 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29825
29826 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29827 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29828
29829 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29830 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29831
29832 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29833 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29834 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29835 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29836
29837 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29838 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29839
29840 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29841 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29842
29843 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29844 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29845 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29846
29847 /* SSE4.2 */
29848 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29849 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29850 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29851 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29852 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29853
29854 /* SSE4A */
29855 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29856 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29857 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29858 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29859
29860 /* AES */
29861 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29862 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29863
29864 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29865 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29866 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29867 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29868
29869 /* PCLMUL */
29870 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29871
29872 /* AVX */
29873 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29874 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29877 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29878 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29881 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29884 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29885 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29886 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29887 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29888 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29889 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29890 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29891 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29892 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29893 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29894 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29895 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29896 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29897 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29898 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29899
29900 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29901 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29902 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29903 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29904
29905 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29906 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29907 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29908 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29909 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29910 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29911 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29912 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29913 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29914 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29915 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29916 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29917 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29918 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29919 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29920 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29921 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29922 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29923 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29924 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29925 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29926 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29927 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29928 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29929 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29930 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29931 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29932 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29933 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29934 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29935 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29936 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29937 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29938 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29939
29940 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29941 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29942 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29943
29944 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29945 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29946 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29947 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29948 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29949
29950 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29951
29952 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29953 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29954
29955 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29956 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29957 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29958 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29959
29960 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29961 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29962
29963 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29964 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29965
29966 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29967 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29969 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29970
29971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29973
29974 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29975 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29976
29977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29979 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29980 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29981
29982 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29983 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29984 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29985 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29986 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29987 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29988
29989 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29990 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29991 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29996 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
30000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
30001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
30004
30005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
30006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
30007
30008 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
30009 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
30010
30011 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
30012
30013 /* AVX2 */
30014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
30015 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
30016 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
30017 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
30018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
30021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
30022 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30023 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30024 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30025 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
30031 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
30036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
30037 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30038 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30039 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30040 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30041 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30042 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30043 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30044 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30045 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30046 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30047 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
30053 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30054 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30055 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30056 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30057 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30058 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30059 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30060 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30061 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30062 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30063 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30064 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30065 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
30066 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30067 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30068 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30069 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30070 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30071 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30072 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
30073 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
30074 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
30075 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
30076 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
30077 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
30078 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30079 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30080 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30081 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30082 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30083 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30084 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
30085 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30086 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
30087 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30088 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
30089 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30090 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
30091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30095 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30096 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30097 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30098 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30099 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30100 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30101 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30102 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30103 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30104 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30105 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
30106 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
30107 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
30108 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
30109 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
30110 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
30111 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
30112 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30113 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30114 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30115 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30116 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30117 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30118 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30119 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30120 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30121 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30122 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30123 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30124 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30125 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30126 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30127 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30128 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30129 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30130 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
30131 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
30132 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30133 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30134 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
30135 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
30136 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
30137 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30138 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30139 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30140 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30141 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30142 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30143 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30144 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30145 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30146 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
30147 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
30148 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30149 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30150 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30151 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30152 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30153 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30154 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30155 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30156 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30157 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30158 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30159 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30160
30161 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30162
30163 /* BMI */
30164 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30165 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30166 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30167
30168 /* TBM */
30169 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30170 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30171
30172 /* F16C */
30173 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30174 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30175 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30176 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30177
30178 /* BMI2 */
30179 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30180 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30181 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30182 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30183 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30184 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30185
30186 /* AVX512F */
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30242 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30243 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30245 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30246 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30316 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30318 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30354 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30355 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30356 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30357 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30377 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30379 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30381 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30383 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30384
30385 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30387 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30389 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30391 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30393
30394 /* Mask arithmetic operations */
30395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30405
30406 /* SHA */
30407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30414 };
30415
30416 /* Builtins with rounding support. */
30417 static const struct builtin_description bdesc_round_args[] =
30418 {
30419 /* AVX512F */
30420 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30421 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30422 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30423 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30424 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30425 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30426 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30427 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30428 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30429 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30430 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30431 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30432 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30433 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30434 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30435 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30436 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30437 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30438 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30439 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30440 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30441 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30442 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30443 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30444 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30445 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30446 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30447 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30448 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30449 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30450 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30451 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30452 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30453 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30454 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30455 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30456 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30457 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30458 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30459 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30460 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30461 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30462 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30463 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30464 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30465 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30466 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30467 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30468 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30469 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30470 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30471 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30472 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30473 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30474 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30475 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30476 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30477 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30478 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30479 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30480 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30481 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30482 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30483 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30484 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30485 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30486 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30487 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30488 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30489 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30490 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30491 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30492 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30493 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30494 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30495 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30496 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30497 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30498 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30499 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30500 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30501 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30502 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30503 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30504 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30505 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30506 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30507 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30508 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30509 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30510 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30511 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30512 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30513 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30514 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30515 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30516 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30517 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30518 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30519 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30520 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30521 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30522 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30523 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30524 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30525 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30526 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30527 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30528 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30529 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30530 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30531 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30532 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30533 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30534 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30535 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30536 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30537 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30538 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30539
30540 /* AVX512ER */
30541 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30542 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30543 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30544 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30545 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30546 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30547 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30548 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30549 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30550 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30551 };
30552
30553 /* FMA4 and XOP. */
30554 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30555 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30556 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30557 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30558 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30559 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30560 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30561 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30562 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30563 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30564 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30565 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30566 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30567 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30568 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30569 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30570 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30571 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30572 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30573 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30574 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30575 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30576 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30577 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30578 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30579 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30580 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30581 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30582 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30583 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30584 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30585 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30586 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30587 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30588 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30589 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30590 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30591 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30592 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30593 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30594 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30595 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30596 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30597 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30598 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30599 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30600 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30601 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30602 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30603 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30604 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30605 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30606
30607 static const struct builtin_description bdesc_multi_arg[] =
30608 {
30609 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30610 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30611 UNKNOWN, (int)MULTI_ARG_3_SF },
30612 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30613 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30614 UNKNOWN, (int)MULTI_ARG_3_DF },
30615
30616 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30617 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30618 UNKNOWN, (int)MULTI_ARG_3_SF },
30619 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30620 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30621 UNKNOWN, (int)MULTI_ARG_3_DF },
30622
30623 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30624 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30625 UNKNOWN, (int)MULTI_ARG_3_SF },
30626 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30627 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30628 UNKNOWN, (int)MULTI_ARG_3_DF },
30629 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30630 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30631 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30632 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30633 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30634 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30635
30636 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30637 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30638 UNKNOWN, (int)MULTI_ARG_3_SF },
30639 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30640 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30641 UNKNOWN, (int)MULTI_ARG_3_DF },
30642 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30643 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30644 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30645 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30646 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30647 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30648
30649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30652 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30656
30657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30660 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30664
30665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30666
30667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30669 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30672 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30677 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30678 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30679
30680 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30681 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30682 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30683 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30684 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30685 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30686 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30687 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30688 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30689 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30690 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30691 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30692 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30693 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30694 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30695 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30696
30697 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30698 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30699 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30700 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30701 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30702 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30703
30704 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30705 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30706 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30707 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30708 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30709 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30710 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30711 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30712 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30713 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30714 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30715 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30716 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30717 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30718 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30719
30720 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30721 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30722 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30723 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30724 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30725 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30726 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30727
30728 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30729 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30730 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30731 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30732 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30733 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30734 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30735
30736 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30737 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30738 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30739 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30740 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30741 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30742 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30743
30744 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30745 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30746 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30747 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30748 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30749 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30750 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30751
30752 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30753 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30754 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30755 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30756 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30757 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30758 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30759
30760 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30761 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30762 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30763 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30764 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30765 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30766 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30767
30768 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30769 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30770 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30771 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30772 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30773 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30774 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30775
30776 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30777 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30778 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30779 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30780 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30781 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30782 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30783
30784 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30785 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30786 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30787 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30788 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30789 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30790 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30791 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30792
30793 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30794 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30795 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30796 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30797 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30798 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30799 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30800 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30801
30802 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30803 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30804 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30805 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30806
30807 };
30808 \f
30809 /* TM vector builtins. */
30810
30811 /* Reuse the existing x86-specific `struct builtin_description' cause
30812 we're lazy. Add casts to make them fit. */
30813 static const struct builtin_description bdesc_tm[] =
30814 {
30815 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30816 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30817 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30818 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30819 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30820 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30821 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30822
30823 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30824 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30825 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30826 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30827 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30828 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30829 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30830
30831 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30832 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30833 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30834 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30835 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30836 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30837 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30838
30839 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30840 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30841 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30842 };
30843
30844 /* TM callbacks. */
30845
30846 /* Return the builtin decl needed to load a vector of TYPE. */
30847
30848 static tree
30849 ix86_builtin_tm_load (tree type)
30850 {
30851 if (TREE_CODE (type) == VECTOR_TYPE)
30852 {
30853 switch (tree_to_uhwi (TYPE_SIZE (type)))
30854 {
30855 case 64:
30856 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30857 case 128:
30858 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30859 case 256:
30860 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30861 }
30862 }
30863 return NULL_TREE;
30864 }
30865
30866 /* Return the builtin decl needed to store a vector of TYPE. */
30867
30868 static tree
30869 ix86_builtin_tm_store (tree type)
30870 {
30871 if (TREE_CODE (type) == VECTOR_TYPE)
30872 {
30873 switch (tree_to_uhwi (TYPE_SIZE (type)))
30874 {
30875 case 64:
30876 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30877 case 128:
30878 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30879 case 256:
30880 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30881 }
30882 }
30883 return NULL_TREE;
30884 }
30885 \f
30886 /* Initialize the transactional memory vector load/store builtins. */
30887
30888 static void
30889 ix86_init_tm_builtins (void)
30890 {
30891 enum ix86_builtin_func_type ftype;
30892 const struct builtin_description *d;
30893 size_t i;
30894 tree decl;
30895 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30896 tree attrs_log, attrs_type_log;
30897
30898 if (!flag_tm)
30899 return;
30900
30901 /* If there are no builtins defined, we must be compiling in a
30902 language without trans-mem support. */
30903 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30904 return;
30905
30906 /* Use whatever attributes a normal TM load has. */
30907 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30908 attrs_load = DECL_ATTRIBUTES (decl);
30909 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30910 /* Use whatever attributes a normal TM store has. */
30911 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30912 attrs_store = DECL_ATTRIBUTES (decl);
30913 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30914 /* Use whatever attributes a normal TM log has. */
30915 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30916 attrs_log = DECL_ATTRIBUTES (decl);
30917 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30918
30919 for (i = 0, d = bdesc_tm;
30920 i < ARRAY_SIZE (bdesc_tm);
30921 i++, d++)
30922 {
30923 if ((d->mask & ix86_isa_flags) != 0
30924 || (lang_hooks.builtin_function
30925 == lang_hooks.builtin_function_ext_scope))
30926 {
30927 tree type, attrs, attrs_type;
30928 enum built_in_function code = (enum built_in_function) d->code;
30929
30930 ftype = (enum ix86_builtin_func_type) d->flag;
30931 type = ix86_get_builtin_func_type (ftype);
30932
30933 if (BUILTIN_TM_LOAD_P (code))
30934 {
30935 attrs = attrs_load;
30936 attrs_type = attrs_type_load;
30937 }
30938 else if (BUILTIN_TM_STORE_P (code))
30939 {
30940 attrs = attrs_store;
30941 attrs_type = attrs_type_store;
30942 }
30943 else
30944 {
30945 attrs = attrs_log;
30946 attrs_type = attrs_type_log;
30947 }
30948 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30949 /* The builtin without the prefix for
30950 calling it directly. */
30951 d->name + strlen ("__builtin_"),
30952 attrs);
30953 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30954 set the TYPE_ATTRIBUTES. */
30955 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30956
30957 set_builtin_decl (code, decl, false);
30958 }
30959 }
30960 }
30961
30962 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30963 in the current target ISA to allow the user to compile particular modules
30964 with different target specific options that differ from the command line
30965 options. */
30966 static void
30967 ix86_init_mmx_sse_builtins (void)
30968 {
30969 const struct builtin_description * d;
30970 enum ix86_builtin_func_type ftype;
30971 size_t i;
30972
30973 /* Add all special builtins with variable number of operands. */
30974 for (i = 0, d = bdesc_special_args;
30975 i < ARRAY_SIZE (bdesc_special_args);
30976 i++, d++)
30977 {
30978 if (d->name == 0)
30979 continue;
30980
30981 ftype = (enum ix86_builtin_func_type) d->flag;
30982 def_builtin (d->mask, d->name, ftype, d->code);
30983 }
30984
30985 /* Add all builtins with variable number of operands. */
30986 for (i = 0, d = bdesc_args;
30987 i < ARRAY_SIZE (bdesc_args);
30988 i++, d++)
30989 {
30990 if (d->name == 0)
30991 continue;
30992
30993 ftype = (enum ix86_builtin_func_type) d->flag;
30994 def_builtin_const (d->mask, d->name, ftype, d->code);
30995 }
30996
30997 /* Add all builtins with rounding. */
30998 for (i = 0, d = bdesc_round_args;
30999 i < ARRAY_SIZE (bdesc_round_args);
31000 i++, d++)
31001 {
31002 if (d->name == 0)
31003 continue;
31004
31005 ftype = (enum ix86_builtin_func_type) d->flag;
31006 def_builtin_const (d->mask, d->name, ftype, d->code);
31007 }
31008
31009 /* pcmpestr[im] insns. */
31010 for (i = 0, d = bdesc_pcmpestr;
31011 i < ARRAY_SIZE (bdesc_pcmpestr);
31012 i++, d++)
31013 {
31014 if (d->code == IX86_BUILTIN_PCMPESTRM128)
31015 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
31016 else
31017 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
31018 def_builtin_const (d->mask, d->name, ftype, d->code);
31019 }
31020
31021 /* pcmpistr[im] insns. */
31022 for (i = 0, d = bdesc_pcmpistr;
31023 i < ARRAY_SIZE (bdesc_pcmpistr);
31024 i++, d++)
31025 {
31026 if (d->code == IX86_BUILTIN_PCMPISTRM128)
31027 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
31028 else
31029 ftype = INT_FTYPE_V16QI_V16QI_INT;
31030 def_builtin_const (d->mask, d->name, ftype, d->code);
31031 }
31032
31033 /* comi/ucomi insns. */
31034 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
31035 {
31036 if (d->mask == OPTION_MASK_ISA_SSE2)
31037 ftype = INT_FTYPE_V2DF_V2DF;
31038 else
31039 ftype = INT_FTYPE_V4SF_V4SF;
31040 def_builtin_const (d->mask, d->name, ftype, d->code);
31041 }
31042
31043 /* SSE */
31044 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
31045 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
31046 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
31047 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
31048
31049 /* SSE or 3DNow!A */
31050 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31051 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
31052 IX86_BUILTIN_MASKMOVQ);
31053
31054 /* SSE2 */
31055 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
31056 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
31057
31058 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
31059 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
31060 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
31061 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
31062
31063 /* SSE3. */
31064 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
31065 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
31066 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
31067 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
31068
31069 /* AES */
31070 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
31071 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
31072 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
31073 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
31074 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
31075 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
31076 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
31077 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
31078 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
31079 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
31080 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
31081 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
31082
31083 /* PCLMUL */
31084 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
31085 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
31086
31087 /* RDRND */
31088 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
31089 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
31090 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
31091 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
31092 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
31093 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
31094 IX86_BUILTIN_RDRAND64_STEP);
31095
31096 /* AVX2 */
31097 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
31098 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
31099 IX86_BUILTIN_GATHERSIV2DF);
31100
31101 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
31102 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
31103 IX86_BUILTIN_GATHERSIV4DF);
31104
31105 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
31106 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
31107 IX86_BUILTIN_GATHERDIV2DF);
31108
31109 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
31110 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
31111 IX86_BUILTIN_GATHERDIV4DF);
31112
31113 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
31114 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
31115 IX86_BUILTIN_GATHERSIV4SF);
31116
31117 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
31118 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
31119 IX86_BUILTIN_GATHERSIV8SF);
31120
31121 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
31122 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31123 IX86_BUILTIN_GATHERDIV4SF);
31124
31125 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31126 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31127 IX86_BUILTIN_GATHERDIV8SF);
31128
31129 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31130 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31131 IX86_BUILTIN_GATHERSIV2DI);
31132
31133 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31134 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31135 IX86_BUILTIN_GATHERSIV4DI);
31136
31137 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31138 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31139 IX86_BUILTIN_GATHERDIV2DI);
31140
31141 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31142 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31143 IX86_BUILTIN_GATHERDIV4DI);
31144
31145 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31146 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31147 IX86_BUILTIN_GATHERSIV4SI);
31148
31149 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31150 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31151 IX86_BUILTIN_GATHERSIV8SI);
31152
31153 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31154 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31155 IX86_BUILTIN_GATHERDIV4SI);
31156
31157 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31158 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31159 IX86_BUILTIN_GATHERDIV8SI);
31160
31161 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31162 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31163 IX86_BUILTIN_GATHERALTSIV4DF);
31164
31165 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31166 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31167 IX86_BUILTIN_GATHERALTDIV8SF);
31168
31169 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31170 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31171 IX86_BUILTIN_GATHERALTSIV4DI);
31172
31173 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31174 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31175 IX86_BUILTIN_GATHERALTDIV8SI);
31176
31177 /* AVX512F */
31178 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31179 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31180 IX86_BUILTIN_GATHER3SIV16SF);
31181
31182 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31183 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31184 IX86_BUILTIN_GATHER3SIV8DF);
31185
31186 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31187 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31188 IX86_BUILTIN_GATHER3DIV16SF);
31189
31190 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31191 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31192 IX86_BUILTIN_GATHER3DIV8DF);
31193
31194 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31195 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31196 IX86_BUILTIN_GATHER3SIV16SI);
31197
31198 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31199 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31200 IX86_BUILTIN_GATHER3SIV8DI);
31201
31202 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31203 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31204 IX86_BUILTIN_GATHER3DIV16SI);
31205
31206 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31207 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31208 IX86_BUILTIN_GATHER3DIV8DI);
31209
31210 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31211 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31212 IX86_BUILTIN_GATHER3ALTSIV8DF);
31213
31214 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31215 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31216 IX86_BUILTIN_GATHER3ALTDIV16SF);
31217
31218 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31219 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31220 IX86_BUILTIN_GATHER3ALTSIV8DI);
31221
31222 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31223 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31224 IX86_BUILTIN_GATHER3ALTDIV16SI);
31225
31226 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31227 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31228 IX86_BUILTIN_SCATTERSIV16SF);
31229
31230 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31231 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31232 IX86_BUILTIN_SCATTERSIV8DF);
31233
31234 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31235 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31236 IX86_BUILTIN_SCATTERDIV16SF);
31237
31238 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31239 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31240 IX86_BUILTIN_SCATTERDIV8DF);
31241
31242 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31243 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31244 IX86_BUILTIN_SCATTERSIV16SI);
31245
31246 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31247 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31248 IX86_BUILTIN_SCATTERSIV8DI);
31249
31250 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31251 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31252 IX86_BUILTIN_SCATTERDIV16SI);
31253
31254 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31255 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31256 IX86_BUILTIN_SCATTERDIV8DI);
31257
31258 /* AVX512PF */
31259 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31260 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31261 IX86_BUILTIN_GATHERPFDPD);
31262 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31263 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31264 IX86_BUILTIN_GATHERPFDPS);
31265 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31266 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31267 IX86_BUILTIN_GATHERPFQPD);
31268 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31269 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31270 IX86_BUILTIN_GATHERPFQPS);
31271 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31272 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31273 IX86_BUILTIN_SCATTERPFDPD);
31274 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31275 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31276 IX86_BUILTIN_SCATTERPFDPS);
31277 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31278 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31279 IX86_BUILTIN_SCATTERPFQPD);
31280 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31281 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31282 IX86_BUILTIN_SCATTERPFQPS);
31283
31284 /* SHA */
31285 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31286 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31287 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31288 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31289 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31290 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31291 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31292 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31293 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31294 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31295 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31296 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31297 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31298 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31299
31300 /* RTM. */
31301 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31302 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31303
31304 /* MMX access to the vec_init patterns. */
31305 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31306 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31307
31308 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31309 V4HI_FTYPE_HI_HI_HI_HI,
31310 IX86_BUILTIN_VEC_INIT_V4HI);
31311
31312 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31313 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31314 IX86_BUILTIN_VEC_INIT_V8QI);
31315
31316 /* Access to the vec_extract patterns. */
31317 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31318 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31319 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31320 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31321 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31322 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31323 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31324 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31325 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31326 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31327
31328 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31329 "__builtin_ia32_vec_ext_v4hi",
31330 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31331
31332 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31333 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31334
31335 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31336 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31337
31338 /* Access to the vec_set patterns. */
31339 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31340 "__builtin_ia32_vec_set_v2di",
31341 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31342
31343 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31344 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31345
31346 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31347 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31348
31349 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31350 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31351
31352 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31353 "__builtin_ia32_vec_set_v4hi",
31354 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31355
31356 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31357 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31358
31359 /* RDSEED */
31360 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31361 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31362 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31363 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31364 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31365 "__builtin_ia32_rdseed_di_step",
31366 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31367
31368 /* ADCX */
31369 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31370 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31371 def_builtin (OPTION_MASK_ISA_64BIT,
31372 "__builtin_ia32_addcarryx_u64",
31373 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31374 IX86_BUILTIN_ADDCARRYX64);
31375
31376 /* SBB */
31377 def_builtin (0, "__builtin_ia32_sbb_u32",
31378 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31379 def_builtin (OPTION_MASK_ISA_64BIT,
31380 "__builtin_ia32_sbb_u64",
31381 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31382 IX86_BUILTIN_SBB64);
31383
31384 /* Read/write FLAGS. */
31385 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31386 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31387 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31388 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31389 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31390 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31391 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31392 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31393
31394 /* CLFLUSHOPT. */
31395 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31396 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31397
31398 /* Add FMA4 multi-arg argument instructions */
31399 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31400 {
31401 if (d->name == 0)
31402 continue;
31403
31404 ftype = (enum ix86_builtin_func_type) d->flag;
31405 def_builtin_const (d->mask, d->name, ftype, d->code);
31406 }
31407 }
31408
31409 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31410 to return a pointer to VERSION_DECL if the outcome of the expression
31411 formed by PREDICATE_CHAIN is true. This function will be called during
31412 version dispatch to decide which function version to execute. It returns
31413 the basic block at the end, to which more conditions can be added. */
31414
31415 static basic_block
31416 add_condition_to_bb (tree function_decl, tree version_decl,
31417 tree predicate_chain, basic_block new_bb)
31418 {
31419 gimple return_stmt;
31420 tree convert_expr, result_var;
31421 gimple convert_stmt;
31422 gimple call_cond_stmt;
31423 gimple if_else_stmt;
31424
31425 basic_block bb1, bb2, bb3;
31426 edge e12, e23;
31427
31428 tree cond_var, and_expr_var = NULL_TREE;
31429 gimple_seq gseq;
31430
31431 tree predicate_decl, predicate_arg;
31432
31433 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31434
31435 gcc_assert (new_bb != NULL);
31436 gseq = bb_seq (new_bb);
31437
31438
31439 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31440 build_fold_addr_expr (version_decl));
31441 result_var = create_tmp_var (ptr_type_node, NULL);
31442 convert_stmt = gimple_build_assign (result_var, convert_expr);
31443 return_stmt = gimple_build_return (result_var);
31444
31445 if (predicate_chain == NULL_TREE)
31446 {
31447 gimple_seq_add_stmt (&gseq, convert_stmt);
31448 gimple_seq_add_stmt (&gseq, return_stmt);
31449 set_bb_seq (new_bb, gseq);
31450 gimple_set_bb (convert_stmt, new_bb);
31451 gimple_set_bb (return_stmt, new_bb);
31452 pop_cfun ();
31453 return new_bb;
31454 }
31455
31456 while (predicate_chain != NULL)
31457 {
31458 cond_var = create_tmp_var (integer_type_node, NULL);
31459 predicate_decl = TREE_PURPOSE (predicate_chain);
31460 predicate_arg = TREE_VALUE (predicate_chain);
31461 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31462 gimple_call_set_lhs (call_cond_stmt, cond_var);
31463
31464 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31465 gimple_set_bb (call_cond_stmt, new_bb);
31466 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31467
31468 predicate_chain = TREE_CHAIN (predicate_chain);
31469
31470 if (and_expr_var == NULL)
31471 and_expr_var = cond_var;
31472 else
31473 {
31474 gimple assign_stmt;
31475 /* Use MIN_EXPR to check if any integer is zero?.
31476 and_expr_var = min_expr <cond_var, and_expr_var> */
31477 assign_stmt = gimple_build_assign (and_expr_var,
31478 build2 (MIN_EXPR, integer_type_node,
31479 cond_var, and_expr_var));
31480
31481 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31482 gimple_set_bb (assign_stmt, new_bb);
31483 gimple_seq_add_stmt (&gseq, assign_stmt);
31484 }
31485 }
31486
31487 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31488 integer_zero_node,
31489 NULL_TREE, NULL_TREE);
31490 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31491 gimple_set_bb (if_else_stmt, new_bb);
31492 gimple_seq_add_stmt (&gseq, if_else_stmt);
31493
31494 gimple_seq_add_stmt (&gseq, convert_stmt);
31495 gimple_seq_add_stmt (&gseq, return_stmt);
31496 set_bb_seq (new_bb, gseq);
31497
31498 bb1 = new_bb;
31499 e12 = split_block (bb1, if_else_stmt);
31500 bb2 = e12->dest;
31501 e12->flags &= ~EDGE_FALLTHRU;
31502 e12->flags |= EDGE_TRUE_VALUE;
31503
31504 e23 = split_block (bb2, return_stmt);
31505
31506 gimple_set_bb (convert_stmt, bb2);
31507 gimple_set_bb (return_stmt, bb2);
31508
31509 bb3 = e23->dest;
31510 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31511
31512 remove_edge (e23);
31513 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31514
31515 pop_cfun ();
31516
31517 return bb3;
31518 }
31519
31520 /* This parses the attribute arguments to target in DECL and determines
31521 the right builtin to use to match the platform specification.
31522 It returns the priority value for this version decl. If PREDICATE_LIST
31523 is not NULL, it stores the list of cpu features that need to be checked
31524 before dispatching this function. */
31525
31526 static unsigned int
31527 get_builtin_code_for_version (tree decl, tree *predicate_list)
31528 {
31529 tree attrs;
31530 struct cl_target_option cur_target;
31531 tree target_node;
31532 struct cl_target_option *new_target;
31533 const char *arg_str = NULL;
31534 const char *attrs_str = NULL;
31535 char *tok_str = NULL;
31536 char *token;
31537
31538 /* Priority of i386 features, greater value is higher priority. This is
31539 used to decide the order in which function dispatch must happen. For
31540 instance, a version specialized for SSE4.2 should be checked for dispatch
31541 before a version for SSE3, as SSE4.2 implies SSE3. */
31542 enum feature_priority
31543 {
31544 P_ZERO = 0,
31545 P_MMX,
31546 P_SSE,
31547 P_SSE2,
31548 P_SSE3,
31549 P_SSSE3,
31550 P_PROC_SSSE3,
31551 P_SSE4_A,
31552 P_PROC_SSE4_A,
31553 P_SSE4_1,
31554 P_SSE4_2,
31555 P_PROC_SSE4_2,
31556 P_POPCNT,
31557 P_AVX,
31558 P_PROC_AVX,
31559 P_FMA4,
31560 P_XOP,
31561 P_PROC_XOP,
31562 P_FMA,
31563 P_PROC_FMA,
31564 P_AVX2,
31565 P_PROC_AVX2
31566 };
31567
31568 enum feature_priority priority = P_ZERO;
31569
31570 /* These are the target attribute strings for which a dispatcher is
31571 available, from fold_builtin_cpu. */
31572
31573 static struct _feature_list
31574 {
31575 const char *const name;
31576 const enum feature_priority priority;
31577 }
31578 const feature_list[] =
31579 {
31580 {"mmx", P_MMX},
31581 {"sse", P_SSE},
31582 {"sse2", P_SSE2},
31583 {"sse3", P_SSE3},
31584 {"sse4a", P_SSE4_A},
31585 {"ssse3", P_SSSE3},
31586 {"sse4.1", P_SSE4_1},
31587 {"sse4.2", P_SSE4_2},
31588 {"popcnt", P_POPCNT},
31589 {"avx", P_AVX},
31590 {"fma4", P_FMA4},
31591 {"xop", P_XOP},
31592 {"fma", P_FMA},
31593 {"avx2", P_AVX2}
31594 };
31595
31596
31597 static unsigned int NUM_FEATURES
31598 = sizeof (feature_list) / sizeof (struct _feature_list);
31599
31600 unsigned int i;
31601
31602 tree predicate_chain = NULL_TREE;
31603 tree predicate_decl, predicate_arg;
31604
31605 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31606 gcc_assert (attrs != NULL);
31607
31608 attrs = TREE_VALUE (TREE_VALUE (attrs));
31609
31610 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31611 attrs_str = TREE_STRING_POINTER (attrs);
31612
31613 /* Return priority zero for default function. */
31614 if (strcmp (attrs_str, "default") == 0)
31615 return 0;
31616
31617 /* Handle arch= if specified. For priority, set it to be 1 more than
31618 the best instruction set the processor can handle. For instance, if
31619 there is a version for atom and a version for ssse3 (the highest ISA
31620 priority for atom), the atom version must be checked for dispatch
31621 before the ssse3 version. */
31622 if (strstr (attrs_str, "arch=") != NULL)
31623 {
31624 cl_target_option_save (&cur_target, &global_options);
31625 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31626 &global_options_set);
31627
31628 gcc_assert (target_node);
31629 new_target = TREE_TARGET_OPTION (target_node);
31630 gcc_assert (new_target);
31631
31632 if (new_target->arch_specified && new_target->arch > 0)
31633 {
31634 switch (new_target->arch)
31635 {
31636 case PROCESSOR_CORE2:
31637 arg_str = "core2";
31638 priority = P_PROC_SSSE3;
31639 break;
31640 case PROCESSOR_NEHALEM:
31641 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31642 arg_str = "westmere";
31643 else
31644 /* We translate "arch=corei7" and "arch=nehalem" to
31645 "corei7" so that it will be mapped to M_INTEL_COREI7
31646 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31647 arg_str = "corei7";
31648 priority = P_PROC_SSE4_2;
31649 break;
31650 case PROCESSOR_SANDYBRIDGE:
31651 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31652 arg_str = "ivybridge";
31653 else
31654 arg_str = "sandybridge";
31655 priority = P_PROC_AVX;
31656 break;
31657 case PROCESSOR_HASWELL:
31658 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31659 arg_str = "broadwell";
31660 else
31661 arg_str = "haswell";
31662 priority = P_PROC_AVX2;
31663 break;
31664 case PROCESSOR_BONNELL:
31665 arg_str = "bonnell";
31666 priority = P_PROC_SSSE3;
31667 break;
31668 case PROCESSOR_SILVERMONT:
31669 arg_str = "silvermont";
31670 priority = P_PROC_SSE4_2;
31671 break;
31672 case PROCESSOR_AMDFAM10:
31673 arg_str = "amdfam10h";
31674 priority = P_PROC_SSE4_A;
31675 break;
31676 case PROCESSOR_BTVER1:
31677 arg_str = "btver1";
31678 priority = P_PROC_SSE4_A;
31679 break;
31680 case PROCESSOR_BTVER2:
31681 arg_str = "btver2";
31682 priority = P_PROC_AVX;
31683 break;
31684 case PROCESSOR_BDVER1:
31685 arg_str = "bdver1";
31686 priority = P_PROC_XOP;
31687 break;
31688 case PROCESSOR_BDVER2:
31689 arg_str = "bdver2";
31690 priority = P_PROC_FMA;
31691 break;
31692 case PROCESSOR_BDVER3:
31693 arg_str = "bdver3";
31694 priority = P_PROC_FMA;
31695 break;
31696 case PROCESSOR_BDVER4:
31697 arg_str = "bdver4";
31698 priority = P_PROC_AVX2;
31699 break;
31700 }
31701 }
31702
31703 cl_target_option_restore (&global_options, &cur_target);
31704
31705 if (predicate_list && arg_str == NULL)
31706 {
31707 error_at (DECL_SOURCE_LOCATION (decl),
31708 "No dispatcher found for the versioning attributes");
31709 return 0;
31710 }
31711
31712 if (predicate_list)
31713 {
31714 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31715 /* For a C string literal the length includes the trailing NULL. */
31716 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31717 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31718 predicate_chain);
31719 }
31720 }
31721
31722 /* Process feature name. */
31723 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31724 strcpy (tok_str, attrs_str);
31725 token = strtok (tok_str, ",");
31726 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31727
31728 while (token != NULL)
31729 {
31730 /* Do not process "arch=" */
31731 if (strncmp (token, "arch=", 5) == 0)
31732 {
31733 token = strtok (NULL, ",");
31734 continue;
31735 }
31736 for (i = 0; i < NUM_FEATURES; ++i)
31737 {
31738 if (strcmp (token, feature_list[i].name) == 0)
31739 {
31740 if (predicate_list)
31741 {
31742 predicate_arg = build_string_literal (
31743 strlen (feature_list[i].name) + 1,
31744 feature_list[i].name);
31745 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31746 predicate_chain);
31747 }
31748 /* Find the maximum priority feature. */
31749 if (feature_list[i].priority > priority)
31750 priority = feature_list[i].priority;
31751
31752 break;
31753 }
31754 }
31755 if (predicate_list && i == NUM_FEATURES)
31756 {
31757 error_at (DECL_SOURCE_LOCATION (decl),
31758 "No dispatcher found for %s", token);
31759 return 0;
31760 }
31761 token = strtok (NULL, ",");
31762 }
31763 free (tok_str);
31764
31765 if (predicate_list && predicate_chain == NULL_TREE)
31766 {
31767 error_at (DECL_SOURCE_LOCATION (decl),
31768 "No dispatcher found for the versioning attributes : %s",
31769 attrs_str);
31770 return 0;
31771 }
31772 else if (predicate_list)
31773 {
31774 predicate_chain = nreverse (predicate_chain);
31775 *predicate_list = predicate_chain;
31776 }
31777
31778 return priority;
31779 }
31780
31781 /* This compares the priority of target features in function DECL1
31782 and DECL2. It returns positive value if DECL1 is higher priority,
31783 negative value if DECL2 is higher priority and 0 if they are the
31784 same. */
31785
31786 static int
31787 ix86_compare_version_priority (tree decl1, tree decl2)
31788 {
31789 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31790 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31791
31792 return (int)priority1 - (int)priority2;
31793 }
31794
31795 /* V1 and V2 point to function versions with different priorities
31796 based on the target ISA. This function compares their priorities. */
31797
31798 static int
31799 feature_compare (const void *v1, const void *v2)
31800 {
31801 typedef struct _function_version_info
31802 {
31803 tree version_decl;
31804 tree predicate_chain;
31805 unsigned int dispatch_priority;
31806 } function_version_info;
31807
31808 const function_version_info c1 = *(const function_version_info *)v1;
31809 const function_version_info c2 = *(const function_version_info *)v2;
31810 return (c2.dispatch_priority - c1.dispatch_priority);
31811 }
31812
31813 /* This function generates the dispatch function for
31814 multi-versioned functions. DISPATCH_DECL is the function which will
31815 contain the dispatch logic. FNDECLS are the function choices for
31816 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31817 in DISPATCH_DECL in which the dispatch code is generated. */
31818
31819 static int
31820 dispatch_function_versions (tree dispatch_decl,
31821 void *fndecls_p,
31822 basic_block *empty_bb)
31823 {
31824 tree default_decl;
31825 gimple ifunc_cpu_init_stmt;
31826 gimple_seq gseq;
31827 int ix;
31828 tree ele;
31829 vec<tree> *fndecls;
31830 unsigned int num_versions = 0;
31831 unsigned int actual_versions = 0;
31832 unsigned int i;
31833
31834 struct _function_version_info
31835 {
31836 tree version_decl;
31837 tree predicate_chain;
31838 unsigned int dispatch_priority;
31839 }*function_version_info;
31840
31841 gcc_assert (dispatch_decl != NULL
31842 && fndecls_p != NULL
31843 && empty_bb != NULL);
31844
31845 /*fndecls_p is actually a vector. */
31846 fndecls = static_cast<vec<tree> *> (fndecls_p);
31847
31848 /* At least one more version other than the default. */
31849 num_versions = fndecls->length ();
31850 gcc_assert (num_versions >= 2);
31851
31852 function_version_info = (struct _function_version_info *)
31853 XNEWVEC (struct _function_version_info, (num_versions - 1));
31854
31855 /* The first version in the vector is the default decl. */
31856 default_decl = (*fndecls)[0];
31857
31858 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31859
31860 gseq = bb_seq (*empty_bb);
31861 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31862 constructors, so explicity call __builtin_cpu_init here. */
31863 ifunc_cpu_init_stmt = gimple_build_call_vec (
31864 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31865 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31866 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31867 set_bb_seq (*empty_bb, gseq);
31868
31869 pop_cfun ();
31870
31871
31872 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31873 {
31874 tree version_decl = ele;
31875 tree predicate_chain = NULL_TREE;
31876 unsigned int priority;
31877 /* Get attribute string, parse it and find the right predicate decl.
31878 The predicate function could be a lengthy combination of many
31879 features, like arch-type and various isa-variants. */
31880 priority = get_builtin_code_for_version (version_decl,
31881 &predicate_chain);
31882
31883 if (predicate_chain == NULL_TREE)
31884 continue;
31885
31886 function_version_info [actual_versions].version_decl = version_decl;
31887 function_version_info [actual_versions].predicate_chain
31888 = predicate_chain;
31889 function_version_info [actual_versions].dispatch_priority = priority;
31890 actual_versions++;
31891 }
31892
31893 /* Sort the versions according to descending order of dispatch priority. The
31894 priority is based on the ISA. This is not a perfect solution. There
31895 could still be ambiguity. If more than one function version is suitable
31896 to execute, which one should be dispatched? In future, allow the user
31897 to specify a dispatch priority next to the version. */
31898 qsort (function_version_info, actual_versions,
31899 sizeof (struct _function_version_info), feature_compare);
31900
31901 for (i = 0; i < actual_versions; ++i)
31902 *empty_bb = add_condition_to_bb (dispatch_decl,
31903 function_version_info[i].version_decl,
31904 function_version_info[i].predicate_chain,
31905 *empty_bb);
31906
31907 /* dispatch default version at the end. */
31908 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31909 NULL, *empty_bb);
31910
31911 free (function_version_info);
31912 return 0;
31913 }
31914
31915 /* Comparator function to be used in qsort routine to sort attribute
31916 specification strings to "target". */
31917
31918 static int
31919 attr_strcmp (const void *v1, const void *v2)
31920 {
31921 const char *c1 = *(char *const*)v1;
31922 const char *c2 = *(char *const*)v2;
31923 return strcmp (c1, c2);
31924 }
31925
31926 /* ARGLIST is the argument to target attribute. This function tokenizes
31927 the comma separated arguments, sorts them and returns a string which
31928 is a unique identifier for the comma separated arguments. It also
31929 replaces non-identifier characters "=,-" with "_". */
31930
31931 static char *
31932 sorted_attr_string (tree arglist)
31933 {
31934 tree arg;
31935 size_t str_len_sum = 0;
31936 char **args = NULL;
31937 char *attr_str, *ret_str;
31938 char *attr = NULL;
31939 unsigned int argnum = 1;
31940 unsigned int i;
31941
31942 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31943 {
31944 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31945 size_t len = strlen (str);
31946 str_len_sum += len + 1;
31947 if (arg != arglist)
31948 argnum++;
31949 for (i = 0; i < strlen (str); i++)
31950 if (str[i] == ',')
31951 argnum++;
31952 }
31953
31954 attr_str = XNEWVEC (char, str_len_sum);
31955 str_len_sum = 0;
31956 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31957 {
31958 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31959 size_t len = strlen (str);
31960 memcpy (attr_str + str_len_sum, str, len);
31961 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31962 str_len_sum += len + 1;
31963 }
31964
31965 /* Replace "=,-" with "_". */
31966 for (i = 0; i < strlen (attr_str); i++)
31967 if (attr_str[i] == '=' || attr_str[i]== '-')
31968 attr_str[i] = '_';
31969
31970 if (argnum == 1)
31971 return attr_str;
31972
31973 args = XNEWVEC (char *, argnum);
31974
31975 i = 0;
31976 attr = strtok (attr_str, ",");
31977 while (attr != NULL)
31978 {
31979 args[i] = attr;
31980 i++;
31981 attr = strtok (NULL, ",");
31982 }
31983
31984 qsort (args, argnum, sizeof (char *), attr_strcmp);
31985
31986 ret_str = XNEWVEC (char, str_len_sum);
31987 str_len_sum = 0;
31988 for (i = 0; i < argnum; i++)
31989 {
31990 size_t len = strlen (args[i]);
31991 memcpy (ret_str + str_len_sum, args[i], len);
31992 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31993 str_len_sum += len + 1;
31994 }
31995
31996 XDELETEVEC (args);
31997 XDELETEVEC (attr_str);
31998 return ret_str;
31999 }
32000
32001 /* This function changes the assembler name for functions that are
32002 versions. If DECL is a function version and has a "target"
32003 attribute, it appends the attribute string to its assembler name. */
32004
32005 static tree
32006 ix86_mangle_function_version_assembler_name (tree decl, tree id)
32007 {
32008 tree version_attr;
32009 const char *orig_name, *version_string;
32010 char *attr_str, *assembler_name;
32011
32012 if (DECL_DECLARED_INLINE_P (decl)
32013 && lookup_attribute ("gnu_inline",
32014 DECL_ATTRIBUTES (decl)))
32015 error_at (DECL_SOURCE_LOCATION (decl),
32016 "Function versions cannot be marked as gnu_inline,"
32017 " bodies have to be generated");
32018
32019 if (DECL_VIRTUAL_P (decl)
32020 || DECL_VINDEX (decl))
32021 sorry ("Virtual function multiversioning not supported");
32022
32023 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32024
32025 /* target attribute string cannot be NULL. */
32026 gcc_assert (version_attr != NULL_TREE);
32027
32028 orig_name = IDENTIFIER_POINTER (id);
32029 version_string
32030 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
32031
32032 if (strcmp (version_string, "default") == 0)
32033 return id;
32034
32035 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
32036 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
32037
32038 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
32039
32040 /* Allow assembler name to be modified if already set. */
32041 if (DECL_ASSEMBLER_NAME_SET_P (decl))
32042 SET_DECL_RTL (decl, NULL);
32043
32044 tree ret = get_identifier (assembler_name);
32045 XDELETEVEC (attr_str);
32046 XDELETEVEC (assembler_name);
32047 return ret;
32048 }
32049
32050 /* This function returns true if FN1 and FN2 are versions of the same function,
32051 that is, the target strings of the function decls are different. This assumes
32052 that FN1 and FN2 have the same signature. */
32053
32054 static bool
32055 ix86_function_versions (tree fn1, tree fn2)
32056 {
32057 tree attr1, attr2;
32058 char *target1, *target2;
32059 bool result;
32060
32061 if (TREE_CODE (fn1) != FUNCTION_DECL
32062 || TREE_CODE (fn2) != FUNCTION_DECL)
32063 return false;
32064
32065 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
32066 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
32067
32068 /* At least one function decl should have the target attribute specified. */
32069 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
32070 return false;
32071
32072 /* Diagnose missing target attribute if one of the decls is already
32073 multi-versioned. */
32074 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
32075 {
32076 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
32077 {
32078 if (attr2 != NULL_TREE)
32079 {
32080 tree tem = fn1;
32081 fn1 = fn2;
32082 fn2 = tem;
32083 attr1 = attr2;
32084 }
32085 error_at (DECL_SOURCE_LOCATION (fn2),
32086 "missing %<target%> attribute for multi-versioned %D",
32087 fn2);
32088 inform (DECL_SOURCE_LOCATION (fn1),
32089 "previous declaration of %D", fn1);
32090 /* Prevent diagnosing of the same error multiple times. */
32091 DECL_ATTRIBUTES (fn2)
32092 = tree_cons (get_identifier ("target"),
32093 copy_node (TREE_VALUE (attr1)),
32094 DECL_ATTRIBUTES (fn2));
32095 }
32096 return false;
32097 }
32098
32099 target1 = sorted_attr_string (TREE_VALUE (attr1));
32100 target2 = sorted_attr_string (TREE_VALUE (attr2));
32101
32102 /* The sorted target strings must be different for fn1 and fn2
32103 to be versions. */
32104 if (strcmp (target1, target2) == 0)
32105 result = false;
32106 else
32107 result = true;
32108
32109 XDELETEVEC (target1);
32110 XDELETEVEC (target2);
32111
32112 return result;
32113 }
32114
32115 static tree
32116 ix86_mangle_decl_assembler_name (tree decl, tree id)
32117 {
32118 /* For function version, add the target suffix to the assembler name. */
32119 if (TREE_CODE (decl) == FUNCTION_DECL
32120 && DECL_FUNCTION_VERSIONED (decl))
32121 id = ix86_mangle_function_version_assembler_name (decl, id);
32122 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32123 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32124 #endif
32125
32126 return id;
32127 }
32128
32129 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
32130 is true, append the full path name of the source file. */
32131
32132 static char *
32133 make_name (tree decl, const char *suffix, bool make_unique)
32134 {
32135 char *global_var_name;
32136 int name_len;
32137 const char *name;
32138 const char *unique_name = NULL;
32139
32140 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32141
32142 /* Get a unique name that can be used globally without any chances
32143 of collision at link time. */
32144 if (make_unique)
32145 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
32146
32147 name_len = strlen (name) + strlen (suffix) + 2;
32148
32149 if (make_unique)
32150 name_len += strlen (unique_name) + 1;
32151 global_var_name = XNEWVEC (char, name_len);
32152
32153 /* Use '.' to concatenate names as it is demangler friendly. */
32154 if (make_unique)
32155 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
32156 suffix);
32157 else
32158 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
32159
32160 return global_var_name;
32161 }
32162
32163 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32164
32165 /* Make a dispatcher declaration for the multi-versioned function DECL.
32166 Calls to DECL function will be replaced with calls to the dispatcher
32167 by the front-end. Return the decl created. */
32168
32169 static tree
32170 make_dispatcher_decl (const tree decl)
32171 {
32172 tree func_decl;
32173 char *func_name;
32174 tree fn_type, func_type;
32175 bool is_uniq = false;
32176
32177 if (TREE_PUBLIC (decl) == 0)
32178 is_uniq = true;
32179
32180 func_name = make_name (decl, "ifunc", is_uniq);
32181
32182 fn_type = TREE_TYPE (decl);
32183 func_type = build_function_type (TREE_TYPE (fn_type),
32184 TYPE_ARG_TYPES (fn_type));
32185
32186 func_decl = build_fn_decl (func_name, func_type);
32187 XDELETEVEC (func_name);
32188 TREE_USED (func_decl) = 1;
32189 DECL_CONTEXT (func_decl) = NULL_TREE;
32190 DECL_INITIAL (func_decl) = error_mark_node;
32191 DECL_ARTIFICIAL (func_decl) = 1;
32192 /* Mark this func as external, the resolver will flip it again if
32193 it gets generated. */
32194 DECL_EXTERNAL (func_decl) = 1;
32195 /* This will be of type IFUNCs have to be externally visible. */
32196 TREE_PUBLIC (func_decl) = 1;
32197
32198 return func_decl;
32199 }
32200
32201 #endif
32202
32203 /* Returns true if decl is multi-versioned and DECL is the default function,
32204 that is it is not tagged with target specific optimization. */
32205
32206 static bool
32207 is_function_default_version (const tree decl)
32208 {
32209 if (TREE_CODE (decl) != FUNCTION_DECL
32210 || !DECL_FUNCTION_VERSIONED (decl))
32211 return false;
32212 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32213 gcc_assert (attr);
32214 attr = TREE_VALUE (TREE_VALUE (attr));
32215 return (TREE_CODE (attr) == STRING_CST
32216 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32217 }
32218
32219 /* Make a dispatcher declaration for the multi-versioned function DECL.
32220 Calls to DECL function will be replaced with calls to the dispatcher
32221 by the front-end. Returns the decl of the dispatcher function. */
32222
32223 static tree
32224 ix86_get_function_versions_dispatcher (void *decl)
32225 {
32226 tree fn = (tree) decl;
32227 struct cgraph_node *node = NULL;
32228 struct cgraph_node *default_node = NULL;
32229 struct cgraph_function_version_info *node_v = NULL;
32230 struct cgraph_function_version_info *first_v = NULL;
32231
32232 tree dispatch_decl = NULL;
32233
32234 struct cgraph_function_version_info *default_version_info = NULL;
32235
32236 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32237
32238 node = cgraph_node::get (fn);
32239 gcc_assert (node != NULL);
32240
32241 node_v = node->function_version ();
32242 gcc_assert (node_v != NULL);
32243
32244 if (node_v->dispatcher_resolver != NULL)
32245 return node_v->dispatcher_resolver;
32246
32247 /* Find the default version and make it the first node. */
32248 first_v = node_v;
32249 /* Go to the beginning of the chain. */
32250 while (first_v->prev != NULL)
32251 first_v = first_v->prev;
32252 default_version_info = first_v;
32253 while (default_version_info != NULL)
32254 {
32255 if (is_function_default_version
32256 (default_version_info->this_node->decl))
32257 break;
32258 default_version_info = default_version_info->next;
32259 }
32260
32261 /* If there is no default node, just return NULL. */
32262 if (default_version_info == NULL)
32263 return NULL;
32264
32265 /* Make default info the first node. */
32266 if (first_v != default_version_info)
32267 {
32268 default_version_info->prev->next = default_version_info->next;
32269 if (default_version_info->next)
32270 default_version_info->next->prev = default_version_info->prev;
32271 first_v->prev = default_version_info;
32272 default_version_info->next = first_v;
32273 default_version_info->prev = NULL;
32274 }
32275
32276 default_node = default_version_info->this_node;
32277
32278 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32279 if (targetm.has_ifunc_p ())
32280 {
32281 struct cgraph_function_version_info *it_v = NULL;
32282 struct cgraph_node *dispatcher_node = NULL;
32283 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32284
32285 /* Right now, the dispatching is done via ifunc. */
32286 dispatch_decl = make_dispatcher_decl (default_node->decl);
32287
32288 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32289 gcc_assert (dispatcher_node != NULL);
32290 dispatcher_node->dispatcher_function = 1;
32291 dispatcher_version_info
32292 = dispatcher_node->insert_new_function_version ();
32293 dispatcher_version_info->next = default_version_info;
32294 dispatcher_node->definition = 1;
32295
32296 /* Set the dispatcher for all the versions. */
32297 it_v = default_version_info;
32298 while (it_v != NULL)
32299 {
32300 it_v->dispatcher_resolver = dispatch_decl;
32301 it_v = it_v->next;
32302 }
32303 }
32304 else
32305 #endif
32306 {
32307 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32308 "multiversioning needs ifunc which is not supported "
32309 "on this target");
32310 }
32311
32312 return dispatch_decl;
32313 }
32314
32315 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32316 it to CHAIN. */
32317
32318 static tree
32319 make_attribute (const char *name, const char *arg_name, tree chain)
32320 {
32321 tree attr_name;
32322 tree attr_arg_name;
32323 tree attr_args;
32324 tree attr;
32325
32326 attr_name = get_identifier (name);
32327 attr_arg_name = build_string (strlen (arg_name), arg_name);
32328 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32329 attr = tree_cons (attr_name, attr_args, chain);
32330 return attr;
32331 }
32332
32333 /* Make the resolver function decl to dispatch the versions of
32334 a multi-versioned function, DEFAULT_DECL. Create an
32335 empty basic block in the resolver and store the pointer in
32336 EMPTY_BB. Return the decl of the resolver function. */
32337
32338 static tree
32339 make_resolver_func (const tree default_decl,
32340 const tree dispatch_decl,
32341 basic_block *empty_bb)
32342 {
32343 char *resolver_name;
32344 tree decl, type, decl_name, t;
32345 bool is_uniq = false;
32346
32347 /* IFUNC's have to be globally visible. So, if the default_decl is
32348 not, then the name of the IFUNC should be made unique. */
32349 if (TREE_PUBLIC (default_decl) == 0)
32350 is_uniq = true;
32351
32352 /* Append the filename to the resolver function if the versions are
32353 not externally visible. This is because the resolver function has
32354 to be externally visible for the loader to find it. So, appending
32355 the filename will prevent conflicts with a resolver function from
32356 another module which is based on the same version name. */
32357 resolver_name = make_name (default_decl, "resolver", is_uniq);
32358
32359 /* The resolver function should return a (void *). */
32360 type = build_function_type_list (ptr_type_node, NULL_TREE);
32361
32362 decl = build_fn_decl (resolver_name, type);
32363 decl_name = get_identifier (resolver_name);
32364 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32365
32366 DECL_NAME (decl) = decl_name;
32367 TREE_USED (decl) = 1;
32368 DECL_ARTIFICIAL (decl) = 1;
32369 DECL_IGNORED_P (decl) = 0;
32370 /* IFUNC resolvers have to be externally visible. */
32371 TREE_PUBLIC (decl) = 1;
32372 DECL_UNINLINABLE (decl) = 1;
32373
32374 /* Resolver is not external, body is generated. */
32375 DECL_EXTERNAL (decl) = 0;
32376 DECL_EXTERNAL (dispatch_decl) = 0;
32377
32378 DECL_CONTEXT (decl) = NULL_TREE;
32379 DECL_INITIAL (decl) = make_node (BLOCK);
32380 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32381
32382 if (DECL_COMDAT_GROUP (default_decl)
32383 || TREE_PUBLIC (default_decl))
32384 {
32385 /* In this case, each translation unit with a call to this
32386 versioned function will put out a resolver. Ensure it
32387 is comdat to keep just one copy. */
32388 DECL_COMDAT (decl) = 1;
32389 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32390 }
32391 /* Build result decl and add to function_decl. */
32392 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32393 DECL_ARTIFICIAL (t) = 1;
32394 DECL_IGNORED_P (t) = 1;
32395 DECL_RESULT (decl) = t;
32396
32397 gimplify_function_tree (decl);
32398 push_cfun (DECL_STRUCT_FUNCTION (decl));
32399 *empty_bb = init_lowered_empty_function (decl, false);
32400
32401 cgraph_node::add_new_function (decl, true);
32402 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32403
32404 pop_cfun ();
32405
32406 gcc_assert (dispatch_decl != NULL);
32407 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32408 DECL_ATTRIBUTES (dispatch_decl)
32409 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32410
32411 /* Create the alias for dispatch to resolver here. */
32412 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32413 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32414 XDELETEVEC (resolver_name);
32415 return decl;
32416 }
32417
32418 /* Generate the dispatching code body to dispatch multi-versioned function
32419 DECL. The target hook is called to process the "target" attributes and
32420 provide the code to dispatch the right function at run-time. NODE points
32421 to the dispatcher decl whose body will be created. */
32422
32423 static tree
32424 ix86_generate_version_dispatcher_body (void *node_p)
32425 {
32426 tree resolver_decl;
32427 basic_block empty_bb;
32428 tree default_ver_decl;
32429 struct cgraph_node *versn;
32430 struct cgraph_node *node;
32431
32432 struct cgraph_function_version_info *node_version_info = NULL;
32433 struct cgraph_function_version_info *versn_info = NULL;
32434
32435 node = (cgraph_node *)node_p;
32436
32437 node_version_info = node->function_version ();
32438 gcc_assert (node->dispatcher_function
32439 && node_version_info != NULL);
32440
32441 if (node_version_info->dispatcher_resolver)
32442 return node_version_info->dispatcher_resolver;
32443
32444 /* The first version in the chain corresponds to the default version. */
32445 default_ver_decl = node_version_info->next->this_node->decl;
32446
32447 /* node is going to be an alias, so remove the finalized bit. */
32448 node->definition = false;
32449
32450 resolver_decl = make_resolver_func (default_ver_decl,
32451 node->decl, &empty_bb);
32452
32453 node_version_info->dispatcher_resolver = resolver_decl;
32454
32455 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32456
32457 auto_vec<tree, 2> fn_ver_vec;
32458
32459 for (versn_info = node_version_info->next; versn_info;
32460 versn_info = versn_info->next)
32461 {
32462 versn = versn_info->this_node;
32463 /* Check for virtual functions here again, as by this time it should
32464 have been determined if this function needs a vtable index or
32465 not. This happens for methods in derived classes that override
32466 virtual methods in base classes but are not explicitly marked as
32467 virtual. */
32468 if (DECL_VINDEX (versn->decl))
32469 sorry ("Virtual function multiversioning not supported");
32470
32471 fn_ver_vec.safe_push (versn->decl);
32472 }
32473
32474 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32475 cgraph_edge::rebuild_edges ();
32476 pop_cfun ();
32477 return resolver_decl;
32478 }
32479 /* This builds the processor_model struct type defined in
32480 libgcc/config/i386/cpuinfo.c */
32481
32482 static tree
32483 build_processor_model_struct (void)
32484 {
32485 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32486 "__cpu_features"};
32487 tree field = NULL_TREE, field_chain = NULL_TREE;
32488 int i;
32489 tree type = make_node (RECORD_TYPE);
32490
32491 /* The first 3 fields are unsigned int. */
32492 for (i = 0; i < 3; ++i)
32493 {
32494 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32495 get_identifier (field_name[i]), unsigned_type_node);
32496 if (field_chain != NULL_TREE)
32497 DECL_CHAIN (field) = field_chain;
32498 field_chain = field;
32499 }
32500
32501 /* The last field is an array of unsigned integers of size one. */
32502 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32503 get_identifier (field_name[3]),
32504 build_array_type (unsigned_type_node,
32505 build_index_type (size_one_node)));
32506 if (field_chain != NULL_TREE)
32507 DECL_CHAIN (field) = field_chain;
32508 field_chain = field;
32509
32510 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32511 return type;
32512 }
32513
32514 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32515
32516 static tree
32517 make_var_decl (tree type, const char *name)
32518 {
32519 tree new_decl;
32520
32521 new_decl = build_decl (UNKNOWN_LOCATION,
32522 VAR_DECL,
32523 get_identifier(name),
32524 type);
32525
32526 DECL_EXTERNAL (new_decl) = 1;
32527 TREE_STATIC (new_decl) = 1;
32528 TREE_PUBLIC (new_decl) = 1;
32529 DECL_INITIAL (new_decl) = 0;
32530 DECL_ARTIFICIAL (new_decl) = 0;
32531 DECL_PRESERVE_P (new_decl) = 1;
32532
32533 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32534 assemble_variable (new_decl, 0, 0, 0);
32535
32536 return new_decl;
32537 }
32538
32539 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32540 into an integer defined in libgcc/config/i386/cpuinfo.c */
32541
32542 static tree
32543 fold_builtin_cpu (tree fndecl, tree *args)
32544 {
32545 unsigned int i;
32546 enum ix86_builtins fn_code = (enum ix86_builtins)
32547 DECL_FUNCTION_CODE (fndecl);
32548 tree param_string_cst = NULL;
32549
32550 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32551 enum processor_features
32552 {
32553 F_CMOV = 0,
32554 F_MMX,
32555 F_POPCNT,
32556 F_SSE,
32557 F_SSE2,
32558 F_SSE3,
32559 F_SSSE3,
32560 F_SSE4_1,
32561 F_SSE4_2,
32562 F_AVX,
32563 F_AVX2,
32564 F_SSE4_A,
32565 F_FMA4,
32566 F_XOP,
32567 F_FMA,
32568 F_MAX
32569 };
32570
32571 /* These are the values for vendor types and cpu types and subtypes
32572 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32573 the corresponding start value. */
32574 enum processor_model
32575 {
32576 M_INTEL = 1,
32577 M_AMD,
32578 M_CPU_TYPE_START,
32579 M_INTEL_BONNELL,
32580 M_INTEL_CORE2,
32581 M_INTEL_COREI7,
32582 M_AMDFAM10H,
32583 M_AMDFAM15H,
32584 M_INTEL_SILVERMONT,
32585 M_AMD_BTVER1,
32586 M_AMD_BTVER2,
32587 M_CPU_SUBTYPE_START,
32588 M_INTEL_COREI7_NEHALEM,
32589 M_INTEL_COREI7_WESTMERE,
32590 M_INTEL_COREI7_SANDYBRIDGE,
32591 M_AMDFAM10H_BARCELONA,
32592 M_AMDFAM10H_SHANGHAI,
32593 M_AMDFAM10H_ISTANBUL,
32594 M_AMDFAM15H_BDVER1,
32595 M_AMDFAM15H_BDVER2,
32596 M_AMDFAM15H_BDVER3,
32597 M_AMDFAM15H_BDVER4,
32598 M_INTEL_COREI7_IVYBRIDGE,
32599 M_INTEL_COREI7_HASWELL
32600 };
32601
32602 static struct _arch_names_table
32603 {
32604 const char *const name;
32605 const enum processor_model model;
32606 }
32607 const arch_names_table[] =
32608 {
32609 {"amd", M_AMD},
32610 {"intel", M_INTEL},
32611 {"atom", M_INTEL_BONNELL},
32612 {"slm", M_INTEL_SILVERMONT},
32613 {"core2", M_INTEL_CORE2},
32614 {"corei7", M_INTEL_COREI7},
32615 {"nehalem", M_INTEL_COREI7_NEHALEM},
32616 {"westmere", M_INTEL_COREI7_WESTMERE},
32617 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32618 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32619 {"haswell", M_INTEL_COREI7_HASWELL},
32620 {"bonnell", M_INTEL_BONNELL},
32621 {"silvermont", M_INTEL_SILVERMONT},
32622 {"amdfam10h", M_AMDFAM10H},
32623 {"barcelona", M_AMDFAM10H_BARCELONA},
32624 {"shanghai", M_AMDFAM10H_SHANGHAI},
32625 {"istanbul", M_AMDFAM10H_ISTANBUL},
32626 {"btver1", M_AMD_BTVER1},
32627 {"amdfam15h", M_AMDFAM15H},
32628 {"bdver1", M_AMDFAM15H_BDVER1},
32629 {"bdver2", M_AMDFAM15H_BDVER2},
32630 {"bdver3", M_AMDFAM15H_BDVER3},
32631 {"bdver4", M_AMDFAM15H_BDVER4},
32632 {"btver2", M_AMD_BTVER2},
32633 };
32634
32635 static struct _isa_names_table
32636 {
32637 const char *const name;
32638 const enum processor_features feature;
32639 }
32640 const isa_names_table[] =
32641 {
32642 {"cmov", F_CMOV},
32643 {"mmx", F_MMX},
32644 {"popcnt", F_POPCNT},
32645 {"sse", F_SSE},
32646 {"sse2", F_SSE2},
32647 {"sse3", F_SSE3},
32648 {"ssse3", F_SSSE3},
32649 {"sse4a", F_SSE4_A},
32650 {"sse4.1", F_SSE4_1},
32651 {"sse4.2", F_SSE4_2},
32652 {"avx", F_AVX},
32653 {"fma4", F_FMA4},
32654 {"xop", F_XOP},
32655 {"fma", F_FMA},
32656 {"avx2", F_AVX2}
32657 };
32658
32659 tree __processor_model_type = build_processor_model_struct ();
32660 tree __cpu_model_var = make_var_decl (__processor_model_type,
32661 "__cpu_model");
32662
32663
32664 varpool_node::add (__cpu_model_var);
32665
32666 gcc_assert ((args != NULL) && (*args != NULL));
32667
32668 param_string_cst = *args;
32669 while (param_string_cst
32670 && TREE_CODE (param_string_cst) != STRING_CST)
32671 {
32672 /* *args must be a expr that can contain other EXPRS leading to a
32673 STRING_CST. */
32674 if (!EXPR_P (param_string_cst))
32675 {
32676 error ("Parameter to builtin must be a string constant or literal");
32677 return integer_zero_node;
32678 }
32679 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32680 }
32681
32682 gcc_assert (param_string_cst);
32683
32684 if (fn_code == IX86_BUILTIN_CPU_IS)
32685 {
32686 tree ref;
32687 tree field;
32688 tree final;
32689
32690 unsigned int field_val = 0;
32691 unsigned int NUM_ARCH_NAMES
32692 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32693
32694 for (i = 0; i < NUM_ARCH_NAMES; i++)
32695 if (strcmp (arch_names_table[i].name,
32696 TREE_STRING_POINTER (param_string_cst)) == 0)
32697 break;
32698
32699 if (i == NUM_ARCH_NAMES)
32700 {
32701 error ("Parameter to builtin not valid: %s",
32702 TREE_STRING_POINTER (param_string_cst));
32703 return integer_zero_node;
32704 }
32705
32706 field = TYPE_FIELDS (__processor_model_type);
32707 field_val = arch_names_table[i].model;
32708
32709 /* CPU types are stored in the next field. */
32710 if (field_val > M_CPU_TYPE_START
32711 && field_val < M_CPU_SUBTYPE_START)
32712 {
32713 field = DECL_CHAIN (field);
32714 field_val -= M_CPU_TYPE_START;
32715 }
32716
32717 /* CPU subtypes are stored in the next field. */
32718 if (field_val > M_CPU_SUBTYPE_START)
32719 {
32720 field = DECL_CHAIN ( DECL_CHAIN (field));
32721 field_val -= M_CPU_SUBTYPE_START;
32722 }
32723
32724 /* Get the appropriate field in __cpu_model. */
32725 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32726 field, NULL_TREE);
32727
32728 /* Check the value. */
32729 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32730 build_int_cstu (unsigned_type_node, field_val));
32731 return build1 (CONVERT_EXPR, integer_type_node, final);
32732 }
32733 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32734 {
32735 tree ref;
32736 tree array_elt;
32737 tree field;
32738 tree final;
32739
32740 unsigned int field_val = 0;
32741 unsigned int NUM_ISA_NAMES
32742 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32743
32744 for (i = 0; i < NUM_ISA_NAMES; i++)
32745 if (strcmp (isa_names_table[i].name,
32746 TREE_STRING_POINTER (param_string_cst)) == 0)
32747 break;
32748
32749 if (i == NUM_ISA_NAMES)
32750 {
32751 error ("Parameter to builtin not valid: %s",
32752 TREE_STRING_POINTER (param_string_cst));
32753 return integer_zero_node;
32754 }
32755
32756 field = TYPE_FIELDS (__processor_model_type);
32757 /* Get the last field, which is __cpu_features. */
32758 while (DECL_CHAIN (field))
32759 field = DECL_CHAIN (field);
32760
32761 /* Get the appropriate field: __cpu_model.__cpu_features */
32762 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32763 field, NULL_TREE);
32764
32765 /* Access the 0th element of __cpu_features array. */
32766 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32767 integer_zero_node, NULL_TREE, NULL_TREE);
32768
32769 field_val = (1 << isa_names_table[i].feature);
32770 /* Return __cpu_model.__cpu_features[0] & field_val */
32771 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32772 build_int_cstu (unsigned_type_node, field_val));
32773 return build1 (CONVERT_EXPR, integer_type_node, final);
32774 }
32775 gcc_unreachable ();
32776 }
32777
32778 static tree
32779 ix86_fold_builtin (tree fndecl, int n_args,
32780 tree *args, bool ignore ATTRIBUTE_UNUSED)
32781 {
32782 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32783 {
32784 enum ix86_builtins fn_code = (enum ix86_builtins)
32785 DECL_FUNCTION_CODE (fndecl);
32786 if (fn_code == IX86_BUILTIN_CPU_IS
32787 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32788 {
32789 gcc_assert (n_args == 1);
32790 return fold_builtin_cpu (fndecl, args);
32791 }
32792 }
32793
32794 #ifdef SUBTARGET_FOLD_BUILTIN
32795 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32796 #endif
32797
32798 return NULL_TREE;
32799 }
32800
32801 /* Make builtins to detect cpu type and features supported. NAME is
32802 the builtin name, CODE is the builtin code, and FTYPE is the function
32803 type of the builtin. */
32804
32805 static void
32806 make_cpu_type_builtin (const char* name, int code,
32807 enum ix86_builtin_func_type ftype, bool is_const)
32808 {
32809 tree decl;
32810 tree type;
32811
32812 type = ix86_get_builtin_func_type (ftype);
32813 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32814 NULL, NULL_TREE);
32815 gcc_assert (decl != NULL_TREE);
32816 ix86_builtins[(int) code] = decl;
32817 TREE_READONLY (decl) = is_const;
32818 }
32819
32820 /* Make builtins to get CPU type and features supported. The created
32821 builtins are :
32822
32823 __builtin_cpu_init (), to detect cpu type and features,
32824 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32825 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32826 */
32827
32828 static void
32829 ix86_init_platform_type_builtins (void)
32830 {
32831 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32832 INT_FTYPE_VOID, false);
32833 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32834 INT_FTYPE_PCCHAR, true);
32835 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32836 INT_FTYPE_PCCHAR, true);
32837 }
32838
32839 /* Internal method for ix86_init_builtins. */
32840
32841 static void
32842 ix86_init_builtins_va_builtins_abi (void)
32843 {
32844 tree ms_va_ref, sysv_va_ref;
32845 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32846 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32847 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32848 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32849
32850 if (!TARGET_64BIT)
32851 return;
32852 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32853 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32854 ms_va_ref = build_reference_type (ms_va_list_type_node);
32855 sysv_va_ref =
32856 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32857
32858 fnvoid_va_end_ms =
32859 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32860 fnvoid_va_start_ms =
32861 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32862 fnvoid_va_end_sysv =
32863 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32864 fnvoid_va_start_sysv =
32865 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32866 NULL_TREE);
32867 fnvoid_va_copy_ms =
32868 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32869 NULL_TREE);
32870 fnvoid_va_copy_sysv =
32871 build_function_type_list (void_type_node, sysv_va_ref,
32872 sysv_va_ref, NULL_TREE);
32873
32874 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32875 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32876 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32877 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32878 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32879 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32880 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32881 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32882 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32883 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32884 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32885 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32886 }
32887
32888 static void
32889 ix86_init_builtin_types (void)
32890 {
32891 tree float128_type_node, float80_type_node;
32892
32893 /* The __float80 type. */
32894 float80_type_node = long_double_type_node;
32895 if (TYPE_MODE (float80_type_node) != XFmode)
32896 {
32897 /* The __float80 type. */
32898 float80_type_node = make_node (REAL_TYPE);
32899
32900 TYPE_PRECISION (float80_type_node) = 80;
32901 layout_type (float80_type_node);
32902 }
32903 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32904
32905 /* The __float128 type. */
32906 float128_type_node = make_node (REAL_TYPE);
32907 TYPE_PRECISION (float128_type_node) = 128;
32908 layout_type (float128_type_node);
32909 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32910
32911 /* This macro is built by i386-builtin-types.awk. */
32912 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32913 }
32914
32915 static void
32916 ix86_init_builtins (void)
32917 {
32918 tree t;
32919
32920 ix86_init_builtin_types ();
32921
32922 /* Builtins to get CPU type and features. */
32923 ix86_init_platform_type_builtins ();
32924
32925 /* TFmode support builtins. */
32926 def_builtin_const (0, "__builtin_infq",
32927 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32928 def_builtin_const (0, "__builtin_huge_valq",
32929 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32930
32931 /* We will expand them to normal call if SSE isn't available since
32932 they are used by libgcc. */
32933 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32934 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32935 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32936 TREE_READONLY (t) = 1;
32937 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32938
32939 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32940 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32941 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32942 TREE_READONLY (t) = 1;
32943 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32944
32945 ix86_init_tm_builtins ();
32946 ix86_init_mmx_sse_builtins ();
32947
32948 if (TARGET_LP64)
32949 ix86_init_builtins_va_builtins_abi ();
32950
32951 #ifdef SUBTARGET_INIT_BUILTINS
32952 SUBTARGET_INIT_BUILTINS;
32953 #endif
32954 }
32955
32956 /* Return the ix86 builtin for CODE. */
32957
32958 static tree
32959 ix86_builtin_decl (unsigned code, bool)
32960 {
32961 if (code >= IX86_BUILTIN_MAX)
32962 return error_mark_node;
32963
32964 return ix86_builtins[code];
32965 }
32966
32967 /* Errors in the source file can cause expand_expr to return const0_rtx
32968 where we expect a vector. To avoid crashing, use one of the vector
32969 clear instructions. */
32970 static rtx
32971 safe_vector_operand (rtx x, enum machine_mode mode)
32972 {
32973 if (x == const0_rtx)
32974 x = CONST0_RTX (mode);
32975 return x;
32976 }
32977
32978 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32979
32980 static rtx
32981 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32982 {
32983 rtx pat;
32984 tree arg0 = CALL_EXPR_ARG (exp, 0);
32985 tree arg1 = CALL_EXPR_ARG (exp, 1);
32986 rtx op0 = expand_normal (arg0);
32987 rtx op1 = expand_normal (arg1);
32988 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32989 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32990 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32991
32992 if (VECTOR_MODE_P (mode0))
32993 op0 = safe_vector_operand (op0, mode0);
32994 if (VECTOR_MODE_P (mode1))
32995 op1 = safe_vector_operand (op1, mode1);
32996
32997 if (optimize || !target
32998 || GET_MODE (target) != tmode
32999 || !insn_data[icode].operand[0].predicate (target, tmode))
33000 target = gen_reg_rtx (tmode);
33001
33002 if (GET_MODE (op1) == SImode && mode1 == TImode)
33003 {
33004 rtx x = gen_reg_rtx (V4SImode);
33005 emit_insn (gen_sse2_loadd (x, op1));
33006 op1 = gen_lowpart (TImode, x);
33007 }
33008
33009 if (!insn_data[icode].operand[1].predicate (op0, mode0))
33010 op0 = copy_to_mode_reg (mode0, op0);
33011 if (!insn_data[icode].operand[2].predicate (op1, mode1))
33012 op1 = copy_to_mode_reg (mode1, op1);
33013
33014 pat = GEN_FCN (icode) (target, op0, op1);
33015 if (! pat)
33016 return 0;
33017
33018 emit_insn (pat);
33019
33020 return target;
33021 }
33022
33023 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
33024
33025 static rtx
33026 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
33027 enum ix86_builtin_func_type m_type,
33028 enum rtx_code sub_code)
33029 {
33030 rtx pat;
33031 int i;
33032 int nargs;
33033 bool comparison_p = false;
33034 bool tf_p = false;
33035 bool last_arg_constant = false;
33036 int num_memory = 0;
33037 struct {
33038 rtx op;
33039 enum machine_mode mode;
33040 } args[4];
33041
33042 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33043
33044 switch (m_type)
33045 {
33046 case MULTI_ARG_4_DF2_DI_I:
33047 case MULTI_ARG_4_DF2_DI_I1:
33048 case MULTI_ARG_4_SF2_SI_I:
33049 case MULTI_ARG_4_SF2_SI_I1:
33050 nargs = 4;
33051 last_arg_constant = true;
33052 break;
33053
33054 case MULTI_ARG_3_SF:
33055 case MULTI_ARG_3_DF:
33056 case MULTI_ARG_3_SF2:
33057 case MULTI_ARG_3_DF2:
33058 case MULTI_ARG_3_DI:
33059 case MULTI_ARG_3_SI:
33060 case MULTI_ARG_3_SI_DI:
33061 case MULTI_ARG_3_HI:
33062 case MULTI_ARG_3_HI_SI:
33063 case MULTI_ARG_3_QI:
33064 case MULTI_ARG_3_DI2:
33065 case MULTI_ARG_3_SI2:
33066 case MULTI_ARG_3_HI2:
33067 case MULTI_ARG_3_QI2:
33068 nargs = 3;
33069 break;
33070
33071 case MULTI_ARG_2_SF:
33072 case MULTI_ARG_2_DF:
33073 case MULTI_ARG_2_DI:
33074 case MULTI_ARG_2_SI:
33075 case MULTI_ARG_2_HI:
33076 case MULTI_ARG_2_QI:
33077 nargs = 2;
33078 break;
33079
33080 case MULTI_ARG_2_DI_IMM:
33081 case MULTI_ARG_2_SI_IMM:
33082 case MULTI_ARG_2_HI_IMM:
33083 case MULTI_ARG_2_QI_IMM:
33084 nargs = 2;
33085 last_arg_constant = true;
33086 break;
33087
33088 case MULTI_ARG_1_SF:
33089 case MULTI_ARG_1_DF:
33090 case MULTI_ARG_1_SF2:
33091 case MULTI_ARG_1_DF2:
33092 case MULTI_ARG_1_DI:
33093 case MULTI_ARG_1_SI:
33094 case MULTI_ARG_1_HI:
33095 case MULTI_ARG_1_QI:
33096 case MULTI_ARG_1_SI_DI:
33097 case MULTI_ARG_1_HI_DI:
33098 case MULTI_ARG_1_HI_SI:
33099 case MULTI_ARG_1_QI_DI:
33100 case MULTI_ARG_1_QI_SI:
33101 case MULTI_ARG_1_QI_HI:
33102 nargs = 1;
33103 break;
33104
33105 case MULTI_ARG_2_DI_CMP:
33106 case MULTI_ARG_2_SI_CMP:
33107 case MULTI_ARG_2_HI_CMP:
33108 case MULTI_ARG_2_QI_CMP:
33109 nargs = 2;
33110 comparison_p = true;
33111 break;
33112
33113 case MULTI_ARG_2_SF_TF:
33114 case MULTI_ARG_2_DF_TF:
33115 case MULTI_ARG_2_DI_TF:
33116 case MULTI_ARG_2_SI_TF:
33117 case MULTI_ARG_2_HI_TF:
33118 case MULTI_ARG_2_QI_TF:
33119 nargs = 2;
33120 tf_p = true;
33121 break;
33122
33123 default:
33124 gcc_unreachable ();
33125 }
33126
33127 if (optimize || !target
33128 || GET_MODE (target) != tmode
33129 || !insn_data[icode].operand[0].predicate (target, tmode))
33130 target = gen_reg_rtx (tmode);
33131
33132 gcc_assert (nargs <= 4);
33133
33134 for (i = 0; i < nargs; i++)
33135 {
33136 tree arg = CALL_EXPR_ARG (exp, i);
33137 rtx op = expand_normal (arg);
33138 int adjust = (comparison_p) ? 1 : 0;
33139 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33140
33141 if (last_arg_constant && i == nargs - 1)
33142 {
33143 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33144 {
33145 enum insn_code new_icode = icode;
33146 switch (icode)
33147 {
33148 case CODE_FOR_xop_vpermil2v2df3:
33149 case CODE_FOR_xop_vpermil2v4sf3:
33150 case CODE_FOR_xop_vpermil2v4df3:
33151 case CODE_FOR_xop_vpermil2v8sf3:
33152 error ("the last argument must be a 2-bit immediate");
33153 return gen_reg_rtx (tmode);
33154 case CODE_FOR_xop_rotlv2di3:
33155 new_icode = CODE_FOR_rotlv2di3;
33156 goto xop_rotl;
33157 case CODE_FOR_xop_rotlv4si3:
33158 new_icode = CODE_FOR_rotlv4si3;
33159 goto xop_rotl;
33160 case CODE_FOR_xop_rotlv8hi3:
33161 new_icode = CODE_FOR_rotlv8hi3;
33162 goto xop_rotl;
33163 case CODE_FOR_xop_rotlv16qi3:
33164 new_icode = CODE_FOR_rotlv16qi3;
33165 xop_rotl:
33166 if (CONST_INT_P (op))
33167 {
33168 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33169 op = GEN_INT (INTVAL (op) & mask);
33170 gcc_checking_assert
33171 (insn_data[icode].operand[i + 1].predicate (op, mode));
33172 }
33173 else
33174 {
33175 gcc_checking_assert
33176 (nargs == 2
33177 && insn_data[new_icode].operand[0].mode == tmode
33178 && insn_data[new_icode].operand[1].mode == tmode
33179 && insn_data[new_icode].operand[2].mode == mode
33180 && insn_data[new_icode].operand[0].predicate
33181 == insn_data[icode].operand[0].predicate
33182 && insn_data[new_icode].operand[1].predicate
33183 == insn_data[icode].operand[1].predicate);
33184 icode = new_icode;
33185 goto non_constant;
33186 }
33187 break;
33188 default:
33189 gcc_unreachable ();
33190 }
33191 }
33192 }
33193 else
33194 {
33195 non_constant:
33196 if (VECTOR_MODE_P (mode))
33197 op = safe_vector_operand (op, mode);
33198
33199 /* If we aren't optimizing, only allow one memory operand to be
33200 generated. */
33201 if (memory_operand (op, mode))
33202 num_memory++;
33203
33204 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33205
33206 if (optimize
33207 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33208 || num_memory > 1)
33209 op = force_reg (mode, op);
33210 }
33211
33212 args[i].op = op;
33213 args[i].mode = mode;
33214 }
33215
33216 switch (nargs)
33217 {
33218 case 1:
33219 pat = GEN_FCN (icode) (target, args[0].op);
33220 break;
33221
33222 case 2:
33223 if (tf_p)
33224 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33225 GEN_INT ((int)sub_code));
33226 else if (! comparison_p)
33227 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33228 else
33229 {
33230 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33231 args[0].op,
33232 args[1].op);
33233
33234 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33235 }
33236 break;
33237
33238 case 3:
33239 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33240 break;
33241
33242 case 4:
33243 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33244 break;
33245
33246 default:
33247 gcc_unreachable ();
33248 }
33249
33250 if (! pat)
33251 return 0;
33252
33253 emit_insn (pat);
33254 return target;
33255 }
33256
33257 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33258 insns with vec_merge. */
33259
33260 static rtx
33261 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33262 rtx target)
33263 {
33264 rtx pat;
33265 tree arg0 = CALL_EXPR_ARG (exp, 0);
33266 rtx op1, op0 = expand_normal (arg0);
33267 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33268 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33269
33270 if (optimize || !target
33271 || GET_MODE (target) != tmode
33272 || !insn_data[icode].operand[0].predicate (target, tmode))
33273 target = gen_reg_rtx (tmode);
33274
33275 if (VECTOR_MODE_P (mode0))
33276 op0 = safe_vector_operand (op0, mode0);
33277
33278 if ((optimize && !register_operand (op0, mode0))
33279 || !insn_data[icode].operand[1].predicate (op0, mode0))
33280 op0 = copy_to_mode_reg (mode0, op0);
33281
33282 op1 = op0;
33283 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33284 op1 = copy_to_mode_reg (mode0, op1);
33285
33286 pat = GEN_FCN (icode) (target, op0, op1);
33287 if (! pat)
33288 return 0;
33289 emit_insn (pat);
33290 return target;
33291 }
33292
33293 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33294
33295 static rtx
33296 ix86_expand_sse_compare (const struct builtin_description *d,
33297 tree exp, rtx target, bool swap)
33298 {
33299 rtx pat;
33300 tree arg0 = CALL_EXPR_ARG (exp, 0);
33301 tree arg1 = CALL_EXPR_ARG (exp, 1);
33302 rtx op0 = expand_normal (arg0);
33303 rtx op1 = expand_normal (arg1);
33304 rtx op2;
33305 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33306 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33307 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33308 enum rtx_code comparison = d->comparison;
33309
33310 if (VECTOR_MODE_P (mode0))
33311 op0 = safe_vector_operand (op0, mode0);
33312 if (VECTOR_MODE_P (mode1))
33313 op1 = safe_vector_operand (op1, mode1);
33314
33315 /* Swap operands if we have a comparison that isn't available in
33316 hardware. */
33317 if (swap)
33318 {
33319 rtx tmp = gen_reg_rtx (mode1);
33320 emit_move_insn (tmp, op1);
33321 op1 = op0;
33322 op0 = tmp;
33323 }
33324
33325 if (optimize || !target
33326 || GET_MODE (target) != tmode
33327 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33328 target = gen_reg_rtx (tmode);
33329
33330 if ((optimize && !register_operand (op0, mode0))
33331 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33332 op0 = copy_to_mode_reg (mode0, op0);
33333 if ((optimize && !register_operand (op1, mode1))
33334 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33335 op1 = copy_to_mode_reg (mode1, op1);
33336
33337 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33338 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33339 if (! pat)
33340 return 0;
33341 emit_insn (pat);
33342 return target;
33343 }
33344
33345 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33346
33347 static rtx
33348 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33349 rtx target)
33350 {
33351 rtx pat;
33352 tree arg0 = CALL_EXPR_ARG (exp, 0);
33353 tree arg1 = CALL_EXPR_ARG (exp, 1);
33354 rtx op0 = expand_normal (arg0);
33355 rtx op1 = expand_normal (arg1);
33356 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33357 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33358 enum rtx_code comparison = d->comparison;
33359
33360 if (VECTOR_MODE_P (mode0))
33361 op0 = safe_vector_operand (op0, mode0);
33362 if (VECTOR_MODE_P (mode1))
33363 op1 = safe_vector_operand (op1, mode1);
33364
33365 /* Swap operands if we have a comparison that isn't available in
33366 hardware. */
33367 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33368 {
33369 rtx tmp = op1;
33370 op1 = op0;
33371 op0 = tmp;
33372 }
33373
33374 target = gen_reg_rtx (SImode);
33375 emit_move_insn (target, const0_rtx);
33376 target = gen_rtx_SUBREG (QImode, target, 0);
33377
33378 if ((optimize && !register_operand (op0, mode0))
33379 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33380 op0 = copy_to_mode_reg (mode0, op0);
33381 if ((optimize && !register_operand (op1, mode1))
33382 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33383 op1 = copy_to_mode_reg (mode1, op1);
33384
33385 pat = GEN_FCN (d->icode) (op0, op1);
33386 if (! pat)
33387 return 0;
33388 emit_insn (pat);
33389 emit_insn (gen_rtx_SET (VOIDmode,
33390 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33391 gen_rtx_fmt_ee (comparison, QImode,
33392 SET_DEST (pat),
33393 const0_rtx)));
33394
33395 return SUBREG_REG (target);
33396 }
33397
33398 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33399
33400 static rtx
33401 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33402 rtx target)
33403 {
33404 rtx pat;
33405 tree arg0 = CALL_EXPR_ARG (exp, 0);
33406 rtx op1, op0 = expand_normal (arg0);
33407 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33408 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33409
33410 if (optimize || target == 0
33411 || GET_MODE (target) != tmode
33412 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33413 target = gen_reg_rtx (tmode);
33414
33415 if (VECTOR_MODE_P (mode0))
33416 op0 = safe_vector_operand (op0, mode0);
33417
33418 if ((optimize && !register_operand (op0, mode0))
33419 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33420 op0 = copy_to_mode_reg (mode0, op0);
33421
33422 op1 = GEN_INT (d->comparison);
33423
33424 pat = GEN_FCN (d->icode) (target, op0, op1);
33425 if (! pat)
33426 return 0;
33427 emit_insn (pat);
33428 return target;
33429 }
33430
33431 static rtx
33432 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33433 tree exp, rtx target)
33434 {
33435 rtx pat;
33436 tree arg0 = CALL_EXPR_ARG (exp, 0);
33437 tree arg1 = CALL_EXPR_ARG (exp, 1);
33438 rtx op0 = expand_normal (arg0);
33439 rtx op1 = expand_normal (arg1);
33440 rtx op2;
33441 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33442 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33443 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33444
33445 if (optimize || target == 0
33446 || GET_MODE (target) != tmode
33447 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33448 target = gen_reg_rtx (tmode);
33449
33450 op0 = safe_vector_operand (op0, mode0);
33451 op1 = safe_vector_operand (op1, mode1);
33452
33453 if ((optimize && !register_operand (op0, mode0))
33454 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33455 op0 = copy_to_mode_reg (mode0, op0);
33456 if ((optimize && !register_operand (op1, mode1))
33457 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33458 op1 = copy_to_mode_reg (mode1, op1);
33459
33460 op2 = GEN_INT (d->comparison);
33461
33462 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33463 if (! pat)
33464 return 0;
33465 emit_insn (pat);
33466 return target;
33467 }
33468
33469 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33470
33471 static rtx
33472 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33473 rtx target)
33474 {
33475 rtx pat;
33476 tree arg0 = CALL_EXPR_ARG (exp, 0);
33477 tree arg1 = CALL_EXPR_ARG (exp, 1);
33478 rtx op0 = expand_normal (arg0);
33479 rtx op1 = expand_normal (arg1);
33480 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33481 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33482 enum rtx_code comparison = d->comparison;
33483
33484 if (VECTOR_MODE_P (mode0))
33485 op0 = safe_vector_operand (op0, mode0);
33486 if (VECTOR_MODE_P (mode1))
33487 op1 = safe_vector_operand (op1, mode1);
33488
33489 target = gen_reg_rtx (SImode);
33490 emit_move_insn (target, const0_rtx);
33491 target = gen_rtx_SUBREG (QImode, target, 0);
33492
33493 if ((optimize && !register_operand (op0, mode0))
33494 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33495 op0 = copy_to_mode_reg (mode0, op0);
33496 if ((optimize && !register_operand (op1, mode1))
33497 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33498 op1 = copy_to_mode_reg (mode1, op1);
33499
33500 pat = GEN_FCN (d->icode) (op0, op1);
33501 if (! pat)
33502 return 0;
33503 emit_insn (pat);
33504 emit_insn (gen_rtx_SET (VOIDmode,
33505 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33506 gen_rtx_fmt_ee (comparison, QImode,
33507 SET_DEST (pat),
33508 const0_rtx)));
33509
33510 return SUBREG_REG (target);
33511 }
33512
33513 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33514
33515 static rtx
33516 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33517 tree exp, rtx target)
33518 {
33519 rtx pat;
33520 tree arg0 = CALL_EXPR_ARG (exp, 0);
33521 tree arg1 = CALL_EXPR_ARG (exp, 1);
33522 tree arg2 = CALL_EXPR_ARG (exp, 2);
33523 tree arg3 = CALL_EXPR_ARG (exp, 3);
33524 tree arg4 = CALL_EXPR_ARG (exp, 4);
33525 rtx scratch0, scratch1;
33526 rtx op0 = expand_normal (arg0);
33527 rtx op1 = expand_normal (arg1);
33528 rtx op2 = expand_normal (arg2);
33529 rtx op3 = expand_normal (arg3);
33530 rtx op4 = expand_normal (arg4);
33531 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33532
33533 tmode0 = insn_data[d->icode].operand[0].mode;
33534 tmode1 = insn_data[d->icode].operand[1].mode;
33535 modev2 = insn_data[d->icode].operand[2].mode;
33536 modei3 = insn_data[d->icode].operand[3].mode;
33537 modev4 = insn_data[d->icode].operand[4].mode;
33538 modei5 = insn_data[d->icode].operand[5].mode;
33539 modeimm = insn_data[d->icode].operand[6].mode;
33540
33541 if (VECTOR_MODE_P (modev2))
33542 op0 = safe_vector_operand (op0, modev2);
33543 if (VECTOR_MODE_P (modev4))
33544 op2 = safe_vector_operand (op2, modev4);
33545
33546 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33547 op0 = copy_to_mode_reg (modev2, op0);
33548 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33549 op1 = copy_to_mode_reg (modei3, op1);
33550 if ((optimize && !register_operand (op2, modev4))
33551 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33552 op2 = copy_to_mode_reg (modev4, op2);
33553 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33554 op3 = copy_to_mode_reg (modei5, op3);
33555
33556 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33557 {
33558 error ("the fifth argument must be an 8-bit immediate");
33559 return const0_rtx;
33560 }
33561
33562 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33563 {
33564 if (optimize || !target
33565 || GET_MODE (target) != tmode0
33566 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33567 target = gen_reg_rtx (tmode0);
33568
33569 scratch1 = gen_reg_rtx (tmode1);
33570
33571 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33572 }
33573 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33574 {
33575 if (optimize || !target
33576 || GET_MODE (target) != tmode1
33577 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33578 target = gen_reg_rtx (tmode1);
33579
33580 scratch0 = gen_reg_rtx (tmode0);
33581
33582 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33583 }
33584 else
33585 {
33586 gcc_assert (d->flag);
33587
33588 scratch0 = gen_reg_rtx (tmode0);
33589 scratch1 = gen_reg_rtx (tmode1);
33590
33591 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33592 }
33593
33594 if (! pat)
33595 return 0;
33596
33597 emit_insn (pat);
33598
33599 if (d->flag)
33600 {
33601 target = gen_reg_rtx (SImode);
33602 emit_move_insn (target, const0_rtx);
33603 target = gen_rtx_SUBREG (QImode, target, 0);
33604
33605 emit_insn
33606 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33607 gen_rtx_fmt_ee (EQ, QImode,
33608 gen_rtx_REG ((enum machine_mode) d->flag,
33609 FLAGS_REG),
33610 const0_rtx)));
33611 return SUBREG_REG (target);
33612 }
33613 else
33614 return target;
33615 }
33616
33617
33618 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33619
33620 static rtx
33621 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33622 tree exp, rtx target)
33623 {
33624 rtx pat;
33625 tree arg0 = CALL_EXPR_ARG (exp, 0);
33626 tree arg1 = CALL_EXPR_ARG (exp, 1);
33627 tree arg2 = CALL_EXPR_ARG (exp, 2);
33628 rtx scratch0, scratch1;
33629 rtx op0 = expand_normal (arg0);
33630 rtx op1 = expand_normal (arg1);
33631 rtx op2 = expand_normal (arg2);
33632 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33633
33634 tmode0 = insn_data[d->icode].operand[0].mode;
33635 tmode1 = insn_data[d->icode].operand[1].mode;
33636 modev2 = insn_data[d->icode].operand[2].mode;
33637 modev3 = insn_data[d->icode].operand[3].mode;
33638 modeimm = insn_data[d->icode].operand[4].mode;
33639
33640 if (VECTOR_MODE_P (modev2))
33641 op0 = safe_vector_operand (op0, modev2);
33642 if (VECTOR_MODE_P (modev3))
33643 op1 = safe_vector_operand (op1, modev3);
33644
33645 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33646 op0 = copy_to_mode_reg (modev2, op0);
33647 if ((optimize && !register_operand (op1, modev3))
33648 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33649 op1 = copy_to_mode_reg (modev3, op1);
33650
33651 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33652 {
33653 error ("the third argument must be an 8-bit immediate");
33654 return const0_rtx;
33655 }
33656
33657 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33658 {
33659 if (optimize || !target
33660 || GET_MODE (target) != tmode0
33661 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33662 target = gen_reg_rtx (tmode0);
33663
33664 scratch1 = gen_reg_rtx (tmode1);
33665
33666 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33667 }
33668 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33669 {
33670 if (optimize || !target
33671 || GET_MODE (target) != tmode1
33672 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33673 target = gen_reg_rtx (tmode1);
33674
33675 scratch0 = gen_reg_rtx (tmode0);
33676
33677 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33678 }
33679 else
33680 {
33681 gcc_assert (d->flag);
33682
33683 scratch0 = gen_reg_rtx (tmode0);
33684 scratch1 = gen_reg_rtx (tmode1);
33685
33686 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33687 }
33688
33689 if (! pat)
33690 return 0;
33691
33692 emit_insn (pat);
33693
33694 if (d->flag)
33695 {
33696 target = gen_reg_rtx (SImode);
33697 emit_move_insn (target, const0_rtx);
33698 target = gen_rtx_SUBREG (QImode, target, 0);
33699
33700 emit_insn
33701 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33702 gen_rtx_fmt_ee (EQ, QImode,
33703 gen_rtx_REG ((enum machine_mode) d->flag,
33704 FLAGS_REG),
33705 const0_rtx)));
33706 return SUBREG_REG (target);
33707 }
33708 else
33709 return target;
33710 }
33711
33712 /* Subroutine of ix86_expand_builtin to take care of insns with
33713 variable number of operands. */
33714
33715 static rtx
33716 ix86_expand_args_builtin (const struct builtin_description *d,
33717 tree exp, rtx target)
33718 {
33719 rtx pat, real_target;
33720 unsigned int i, nargs;
33721 unsigned int nargs_constant = 0;
33722 unsigned int mask_pos = 0;
33723 int num_memory = 0;
33724 struct
33725 {
33726 rtx op;
33727 enum machine_mode mode;
33728 } args[6];
33729 bool last_arg_count = false;
33730 enum insn_code icode = d->icode;
33731 const struct insn_data_d *insn_p = &insn_data[icode];
33732 enum machine_mode tmode = insn_p->operand[0].mode;
33733 enum machine_mode rmode = VOIDmode;
33734 bool swap = false;
33735 enum rtx_code comparison = d->comparison;
33736
33737 switch ((enum ix86_builtin_func_type) d->flag)
33738 {
33739 case V2DF_FTYPE_V2DF_ROUND:
33740 case V4DF_FTYPE_V4DF_ROUND:
33741 case V4SF_FTYPE_V4SF_ROUND:
33742 case V8SF_FTYPE_V8SF_ROUND:
33743 case V4SI_FTYPE_V4SF_ROUND:
33744 case V8SI_FTYPE_V8SF_ROUND:
33745 return ix86_expand_sse_round (d, exp, target);
33746 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33747 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33748 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33749 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33750 case INT_FTYPE_V8SF_V8SF_PTEST:
33751 case INT_FTYPE_V4DI_V4DI_PTEST:
33752 case INT_FTYPE_V4DF_V4DF_PTEST:
33753 case INT_FTYPE_V4SF_V4SF_PTEST:
33754 case INT_FTYPE_V2DI_V2DI_PTEST:
33755 case INT_FTYPE_V2DF_V2DF_PTEST:
33756 return ix86_expand_sse_ptest (d, exp, target);
33757 case FLOAT128_FTYPE_FLOAT128:
33758 case FLOAT_FTYPE_FLOAT:
33759 case INT_FTYPE_INT:
33760 case UINT64_FTYPE_INT:
33761 case UINT16_FTYPE_UINT16:
33762 case INT64_FTYPE_INT64:
33763 case INT64_FTYPE_V4SF:
33764 case INT64_FTYPE_V2DF:
33765 case INT_FTYPE_V16QI:
33766 case INT_FTYPE_V8QI:
33767 case INT_FTYPE_V8SF:
33768 case INT_FTYPE_V4DF:
33769 case INT_FTYPE_V4SF:
33770 case INT_FTYPE_V2DF:
33771 case INT_FTYPE_V32QI:
33772 case V16QI_FTYPE_V16QI:
33773 case V8SI_FTYPE_V8SF:
33774 case V8SI_FTYPE_V4SI:
33775 case V8HI_FTYPE_V8HI:
33776 case V8HI_FTYPE_V16QI:
33777 case V8QI_FTYPE_V8QI:
33778 case V8SF_FTYPE_V8SF:
33779 case V8SF_FTYPE_V8SI:
33780 case V8SF_FTYPE_V4SF:
33781 case V8SF_FTYPE_V8HI:
33782 case V4SI_FTYPE_V4SI:
33783 case V4SI_FTYPE_V16QI:
33784 case V4SI_FTYPE_V4SF:
33785 case V4SI_FTYPE_V8SI:
33786 case V4SI_FTYPE_V8HI:
33787 case V4SI_FTYPE_V4DF:
33788 case V4SI_FTYPE_V2DF:
33789 case V4HI_FTYPE_V4HI:
33790 case V4DF_FTYPE_V4DF:
33791 case V4DF_FTYPE_V4SI:
33792 case V4DF_FTYPE_V4SF:
33793 case V4DF_FTYPE_V2DF:
33794 case V4SF_FTYPE_V4SF:
33795 case V4SF_FTYPE_V4SI:
33796 case V4SF_FTYPE_V8SF:
33797 case V4SF_FTYPE_V4DF:
33798 case V4SF_FTYPE_V8HI:
33799 case V4SF_FTYPE_V2DF:
33800 case V2DI_FTYPE_V2DI:
33801 case V2DI_FTYPE_V16QI:
33802 case V2DI_FTYPE_V8HI:
33803 case V2DI_FTYPE_V4SI:
33804 case V2DF_FTYPE_V2DF:
33805 case V2DF_FTYPE_V4SI:
33806 case V2DF_FTYPE_V4DF:
33807 case V2DF_FTYPE_V4SF:
33808 case V2DF_FTYPE_V2SI:
33809 case V2SI_FTYPE_V2SI:
33810 case V2SI_FTYPE_V4SF:
33811 case V2SI_FTYPE_V2SF:
33812 case V2SI_FTYPE_V2DF:
33813 case V2SF_FTYPE_V2SF:
33814 case V2SF_FTYPE_V2SI:
33815 case V32QI_FTYPE_V32QI:
33816 case V32QI_FTYPE_V16QI:
33817 case V16HI_FTYPE_V16HI:
33818 case V16HI_FTYPE_V8HI:
33819 case V8SI_FTYPE_V8SI:
33820 case V16HI_FTYPE_V16QI:
33821 case V8SI_FTYPE_V16QI:
33822 case V4DI_FTYPE_V16QI:
33823 case V8SI_FTYPE_V8HI:
33824 case V4DI_FTYPE_V8HI:
33825 case V4DI_FTYPE_V4SI:
33826 case V4DI_FTYPE_V2DI:
33827 case HI_FTYPE_HI:
33828 case UINT_FTYPE_V2DF:
33829 case UINT_FTYPE_V4SF:
33830 case UINT64_FTYPE_V2DF:
33831 case UINT64_FTYPE_V4SF:
33832 case V16QI_FTYPE_V8DI:
33833 case V16HI_FTYPE_V16SI:
33834 case V16SI_FTYPE_HI:
33835 case V16SI_FTYPE_V16SI:
33836 case V16SI_FTYPE_INT:
33837 case V16SF_FTYPE_FLOAT:
33838 case V16SF_FTYPE_V8SF:
33839 case V16SI_FTYPE_V8SI:
33840 case V16SF_FTYPE_V4SF:
33841 case V16SI_FTYPE_V4SI:
33842 case V16SF_FTYPE_V16SF:
33843 case V8HI_FTYPE_V8DI:
33844 case V8UHI_FTYPE_V8UHI:
33845 case V8SI_FTYPE_V8DI:
33846 case V8USI_FTYPE_V8USI:
33847 case V8SF_FTYPE_V8DF:
33848 case V8DI_FTYPE_QI:
33849 case V8DI_FTYPE_INT64:
33850 case V8DI_FTYPE_V4DI:
33851 case V8DI_FTYPE_V8DI:
33852 case V8DF_FTYPE_DOUBLE:
33853 case V8DF_FTYPE_V4DF:
33854 case V8DF_FTYPE_V2DF:
33855 case V8DF_FTYPE_V8DF:
33856 case V8DF_FTYPE_V8SI:
33857 nargs = 1;
33858 break;
33859 case V4SF_FTYPE_V4SF_VEC_MERGE:
33860 case V2DF_FTYPE_V2DF_VEC_MERGE:
33861 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33862 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33863 case V16QI_FTYPE_V16QI_V16QI:
33864 case V16QI_FTYPE_V8HI_V8HI:
33865 case V16SI_FTYPE_V16SI_V16SI:
33866 case V16SF_FTYPE_V16SF_V16SF:
33867 case V16SF_FTYPE_V16SF_V16SI:
33868 case V8QI_FTYPE_V8QI_V8QI:
33869 case V8QI_FTYPE_V4HI_V4HI:
33870 case V8HI_FTYPE_V8HI_V8HI:
33871 case V8HI_FTYPE_V16QI_V16QI:
33872 case V8HI_FTYPE_V4SI_V4SI:
33873 case V8SF_FTYPE_V8SF_V8SF:
33874 case V8SF_FTYPE_V8SF_V8SI:
33875 case V8DI_FTYPE_V8DI_V8DI:
33876 case V8DF_FTYPE_V8DF_V8DF:
33877 case V8DF_FTYPE_V8DF_V8DI:
33878 case V4SI_FTYPE_V4SI_V4SI:
33879 case V4SI_FTYPE_V8HI_V8HI:
33880 case V4SI_FTYPE_V4SF_V4SF:
33881 case V4SI_FTYPE_V2DF_V2DF:
33882 case V4HI_FTYPE_V4HI_V4HI:
33883 case V4HI_FTYPE_V8QI_V8QI:
33884 case V4HI_FTYPE_V2SI_V2SI:
33885 case V4DF_FTYPE_V4DF_V4DF:
33886 case V4DF_FTYPE_V4DF_V4DI:
33887 case V4SF_FTYPE_V4SF_V4SF:
33888 case V4SF_FTYPE_V4SF_V4SI:
33889 case V4SF_FTYPE_V4SF_V2SI:
33890 case V4SF_FTYPE_V4SF_V2DF:
33891 case V4SF_FTYPE_V4SF_UINT:
33892 case V4SF_FTYPE_V4SF_UINT64:
33893 case V4SF_FTYPE_V4SF_DI:
33894 case V4SF_FTYPE_V4SF_SI:
33895 case V2DI_FTYPE_V2DI_V2DI:
33896 case V2DI_FTYPE_V16QI_V16QI:
33897 case V2DI_FTYPE_V4SI_V4SI:
33898 case V2UDI_FTYPE_V4USI_V4USI:
33899 case V2DI_FTYPE_V2DI_V16QI:
33900 case V2DI_FTYPE_V2DF_V2DF:
33901 case V2SI_FTYPE_V2SI_V2SI:
33902 case V2SI_FTYPE_V4HI_V4HI:
33903 case V2SI_FTYPE_V2SF_V2SF:
33904 case V2DF_FTYPE_V2DF_V2DF:
33905 case V2DF_FTYPE_V2DF_V4SF:
33906 case V2DF_FTYPE_V2DF_V2DI:
33907 case V2DF_FTYPE_V2DF_DI:
33908 case V2DF_FTYPE_V2DF_SI:
33909 case V2DF_FTYPE_V2DF_UINT:
33910 case V2DF_FTYPE_V2DF_UINT64:
33911 case V2SF_FTYPE_V2SF_V2SF:
33912 case V1DI_FTYPE_V1DI_V1DI:
33913 case V1DI_FTYPE_V8QI_V8QI:
33914 case V1DI_FTYPE_V2SI_V2SI:
33915 case V32QI_FTYPE_V16HI_V16HI:
33916 case V16HI_FTYPE_V8SI_V8SI:
33917 case V32QI_FTYPE_V32QI_V32QI:
33918 case V16HI_FTYPE_V32QI_V32QI:
33919 case V16HI_FTYPE_V16HI_V16HI:
33920 case V8SI_FTYPE_V4DF_V4DF:
33921 case V8SI_FTYPE_V8SI_V8SI:
33922 case V8SI_FTYPE_V16HI_V16HI:
33923 case V4DI_FTYPE_V4DI_V4DI:
33924 case V4DI_FTYPE_V8SI_V8SI:
33925 case V4UDI_FTYPE_V8USI_V8USI:
33926 case QI_FTYPE_V8DI_V8DI:
33927 case HI_FTYPE_V16SI_V16SI:
33928 if (comparison == UNKNOWN)
33929 return ix86_expand_binop_builtin (icode, exp, target);
33930 nargs = 2;
33931 break;
33932 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33933 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33934 gcc_assert (comparison != UNKNOWN);
33935 nargs = 2;
33936 swap = true;
33937 break;
33938 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33939 case V16HI_FTYPE_V16HI_SI_COUNT:
33940 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33941 case V8SI_FTYPE_V8SI_SI_COUNT:
33942 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33943 case V4DI_FTYPE_V4DI_INT_COUNT:
33944 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33945 case V8HI_FTYPE_V8HI_SI_COUNT:
33946 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33947 case V4SI_FTYPE_V4SI_SI_COUNT:
33948 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33949 case V4HI_FTYPE_V4HI_SI_COUNT:
33950 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33951 case V2DI_FTYPE_V2DI_SI_COUNT:
33952 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33953 case V2SI_FTYPE_V2SI_SI_COUNT:
33954 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33955 case V1DI_FTYPE_V1DI_SI_COUNT:
33956 nargs = 2;
33957 last_arg_count = true;
33958 break;
33959 case UINT64_FTYPE_UINT64_UINT64:
33960 case UINT_FTYPE_UINT_UINT:
33961 case UINT_FTYPE_UINT_USHORT:
33962 case UINT_FTYPE_UINT_UCHAR:
33963 case UINT16_FTYPE_UINT16_INT:
33964 case UINT8_FTYPE_UINT8_INT:
33965 case HI_FTYPE_HI_HI:
33966 case V16SI_FTYPE_V8DF_V8DF:
33967 nargs = 2;
33968 break;
33969 case V2DI_FTYPE_V2DI_INT_CONVERT:
33970 nargs = 2;
33971 rmode = V1TImode;
33972 nargs_constant = 1;
33973 break;
33974 case V4DI_FTYPE_V4DI_INT_CONVERT:
33975 nargs = 2;
33976 rmode = V2TImode;
33977 nargs_constant = 1;
33978 break;
33979 case V8HI_FTYPE_V8HI_INT:
33980 case V8HI_FTYPE_V8SF_INT:
33981 case V16HI_FTYPE_V16SF_INT:
33982 case V8HI_FTYPE_V4SF_INT:
33983 case V8SF_FTYPE_V8SF_INT:
33984 case V4SF_FTYPE_V16SF_INT:
33985 case V16SF_FTYPE_V16SF_INT:
33986 case V4SI_FTYPE_V4SI_INT:
33987 case V4SI_FTYPE_V8SI_INT:
33988 case V4HI_FTYPE_V4HI_INT:
33989 case V4DF_FTYPE_V4DF_INT:
33990 case V4DF_FTYPE_V8DF_INT:
33991 case V4SF_FTYPE_V4SF_INT:
33992 case V4SF_FTYPE_V8SF_INT:
33993 case V2DI_FTYPE_V2DI_INT:
33994 case V2DF_FTYPE_V2DF_INT:
33995 case V2DF_FTYPE_V4DF_INT:
33996 case V16HI_FTYPE_V16HI_INT:
33997 case V8SI_FTYPE_V8SI_INT:
33998 case V16SI_FTYPE_V16SI_INT:
33999 case V4SI_FTYPE_V16SI_INT:
34000 case V4DI_FTYPE_V4DI_INT:
34001 case V2DI_FTYPE_V4DI_INT:
34002 case V4DI_FTYPE_V8DI_INT:
34003 case HI_FTYPE_HI_INT:
34004 nargs = 2;
34005 nargs_constant = 1;
34006 break;
34007 case V16QI_FTYPE_V16QI_V16QI_V16QI:
34008 case V8SF_FTYPE_V8SF_V8SF_V8SF:
34009 case V4DF_FTYPE_V4DF_V4DF_V4DF:
34010 case V4SF_FTYPE_V4SF_V4SF_V4SF:
34011 case V2DF_FTYPE_V2DF_V2DF_V2DF:
34012 case V32QI_FTYPE_V32QI_V32QI_V32QI:
34013 case HI_FTYPE_V16SI_V16SI_HI:
34014 case QI_FTYPE_V8DI_V8DI_QI:
34015 case V16HI_FTYPE_V16SI_V16HI_HI:
34016 case V16QI_FTYPE_V16SI_V16QI_HI:
34017 case V16QI_FTYPE_V8DI_V16QI_QI:
34018 case V16SF_FTYPE_V16SF_V16SF_HI:
34019 case V16SF_FTYPE_V16SF_V16SF_V16SF:
34020 case V16SF_FTYPE_V16SF_V16SI_V16SF:
34021 case V16SF_FTYPE_V16SI_V16SF_HI:
34022 case V16SF_FTYPE_V16SI_V16SF_V16SF:
34023 case V16SF_FTYPE_V4SF_V16SF_HI:
34024 case V16SI_FTYPE_SI_V16SI_HI:
34025 case V16SI_FTYPE_V16HI_V16SI_HI:
34026 case V16SI_FTYPE_V16QI_V16SI_HI:
34027 case V16SI_FTYPE_V16SF_V16SI_HI:
34028 case V16SI_FTYPE_V16SI_V16SI_HI:
34029 case V16SI_FTYPE_V16SI_V16SI_V16SI:
34030 case V16SI_FTYPE_V4SI_V16SI_HI:
34031 case V2DI_FTYPE_V2DI_V2DI_V2DI:
34032 case V4DI_FTYPE_V4DI_V4DI_V4DI:
34033 case V8DF_FTYPE_V2DF_V8DF_QI:
34034 case V8DF_FTYPE_V4DF_V8DF_QI:
34035 case V8DF_FTYPE_V8DF_V8DF_QI:
34036 case V8DF_FTYPE_V8DF_V8DF_V8DF:
34037 case V8DF_FTYPE_V8DF_V8DI_V8DF:
34038 case V8DF_FTYPE_V8DI_V8DF_V8DF:
34039 case V8DF_FTYPE_V8SF_V8DF_QI:
34040 case V8DF_FTYPE_V8SI_V8DF_QI:
34041 case V8DI_FTYPE_DI_V8DI_QI:
34042 case V8DI_FTYPE_V16QI_V8DI_QI:
34043 case V8DI_FTYPE_V2DI_V8DI_QI:
34044 case V8DI_FTYPE_V4DI_V8DI_QI:
34045 case V8DI_FTYPE_V8DI_V8DI_QI:
34046 case V8DI_FTYPE_V8DI_V8DI_V8DI:
34047 case V8DI_FTYPE_V8HI_V8DI_QI:
34048 case V8DI_FTYPE_V8SI_V8DI_QI:
34049 case V8HI_FTYPE_V8DI_V8HI_QI:
34050 case V8SF_FTYPE_V8DF_V8SF_QI:
34051 case V8SI_FTYPE_V8DF_V8SI_QI:
34052 case V8SI_FTYPE_V8DI_V8SI_QI:
34053 case V4SI_FTYPE_V4SI_V4SI_V4SI:
34054 nargs = 3;
34055 break;
34056 case V32QI_FTYPE_V32QI_V32QI_INT:
34057 case V16HI_FTYPE_V16HI_V16HI_INT:
34058 case V16QI_FTYPE_V16QI_V16QI_INT:
34059 case V4DI_FTYPE_V4DI_V4DI_INT:
34060 case V8HI_FTYPE_V8HI_V8HI_INT:
34061 case V8SI_FTYPE_V8SI_V8SI_INT:
34062 case V8SI_FTYPE_V8SI_V4SI_INT:
34063 case V8SF_FTYPE_V8SF_V8SF_INT:
34064 case V8SF_FTYPE_V8SF_V4SF_INT:
34065 case V4SI_FTYPE_V4SI_V4SI_INT:
34066 case V4DF_FTYPE_V4DF_V4DF_INT:
34067 case V16SF_FTYPE_V16SF_V16SF_INT:
34068 case V16SF_FTYPE_V16SF_V4SF_INT:
34069 case V16SI_FTYPE_V16SI_V4SI_INT:
34070 case V4DF_FTYPE_V4DF_V2DF_INT:
34071 case V4SF_FTYPE_V4SF_V4SF_INT:
34072 case V2DI_FTYPE_V2DI_V2DI_INT:
34073 case V4DI_FTYPE_V4DI_V2DI_INT:
34074 case V2DF_FTYPE_V2DF_V2DF_INT:
34075 case QI_FTYPE_V8DI_V8DI_INT:
34076 case QI_FTYPE_V8DF_V8DF_INT:
34077 case QI_FTYPE_V2DF_V2DF_INT:
34078 case QI_FTYPE_V4SF_V4SF_INT:
34079 case HI_FTYPE_V16SI_V16SI_INT:
34080 case HI_FTYPE_V16SF_V16SF_INT:
34081 nargs = 3;
34082 nargs_constant = 1;
34083 break;
34084 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
34085 nargs = 3;
34086 rmode = V4DImode;
34087 nargs_constant = 1;
34088 break;
34089 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
34090 nargs = 3;
34091 rmode = V2DImode;
34092 nargs_constant = 1;
34093 break;
34094 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
34095 nargs = 3;
34096 rmode = DImode;
34097 nargs_constant = 1;
34098 break;
34099 case V2DI_FTYPE_V2DI_UINT_UINT:
34100 nargs = 3;
34101 nargs_constant = 2;
34102 break;
34103 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
34104 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
34105 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
34106 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
34107 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
34108 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
34109 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
34110 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
34111 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
34112 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
34113 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
34114 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
34115 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
34116 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
34117 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
34118 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
34119 nargs = 4;
34120 break;
34121 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
34122 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
34123 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
34124 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
34125 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
34126 nargs = 4;
34127 nargs_constant = 1;
34128 break;
34129 case QI_FTYPE_V2DF_V2DF_INT_QI:
34130 case QI_FTYPE_V4SF_V4SF_INT_QI:
34131 nargs = 4;
34132 mask_pos = 1;
34133 nargs_constant = 1;
34134 break;
34135 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
34136 nargs = 4;
34137 nargs_constant = 2;
34138 break;
34139 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
34140 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
34141 nargs = 4;
34142 break;
34143 case QI_FTYPE_V8DI_V8DI_INT_QI:
34144 case HI_FTYPE_V16SI_V16SI_INT_HI:
34145 case QI_FTYPE_V8DF_V8DF_INT_QI:
34146 case HI_FTYPE_V16SF_V16SF_INT_HI:
34147 mask_pos = 1;
34148 nargs = 4;
34149 nargs_constant = 1;
34150 break;
34151 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
34152 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
34153 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
34154 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
34155 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
34156 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34157 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34158 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34159 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34160 nargs = 4;
34161 mask_pos = 2;
34162 nargs_constant = 1;
34163 break;
34164 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34165 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34166 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34167 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34168 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34169 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34170 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34171 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34172 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34173 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34174 nargs = 5;
34175 mask_pos = 2;
34176 nargs_constant = 1;
34177 break;
34178 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34179 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34180 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34181 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34182 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34183 nargs = 5;
34184 mask_pos = 1;
34185 nargs_constant = 1;
34186 break;
34187
34188 default:
34189 gcc_unreachable ();
34190 }
34191
34192 gcc_assert (nargs <= ARRAY_SIZE (args));
34193
34194 if (comparison != UNKNOWN)
34195 {
34196 gcc_assert (nargs == 2);
34197 return ix86_expand_sse_compare (d, exp, target, swap);
34198 }
34199
34200 if (rmode == VOIDmode || rmode == tmode)
34201 {
34202 if (optimize
34203 || target == 0
34204 || GET_MODE (target) != tmode
34205 || !insn_p->operand[0].predicate (target, tmode))
34206 target = gen_reg_rtx (tmode);
34207 real_target = target;
34208 }
34209 else
34210 {
34211 real_target = gen_reg_rtx (tmode);
34212 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34213 }
34214
34215 for (i = 0; i < nargs; i++)
34216 {
34217 tree arg = CALL_EXPR_ARG (exp, i);
34218 rtx op = expand_normal (arg);
34219 enum machine_mode mode = insn_p->operand[i + 1].mode;
34220 bool match = insn_p->operand[i + 1].predicate (op, mode);
34221
34222 if (last_arg_count && (i + 1) == nargs)
34223 {
34224 /* SIMD shift insns take either an 8-bit immediate or
34225 register as count. But builtin functions take int as
34226 count. If count doesn't match, we put it in register. */
34227 if (!match)
34228 {
34229 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34230 if (!insn_p->operand[i + 1].predicate (op, mode))
34231 op = copy_to_reg (op);
34232 }
34233 }
34234 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34235 (!mask_pos && (nargs - i) <= nargs_constant))
34236 {
34237 if (!match)
34238 switch (icode)
34239 {
34240 case CODE_FOR_avx_vinsertf128v4di:
34241 case CODE_FOR_avx_vextractf128v4di:
34242 error ("the last argument must be an 1-bit immediate");
34243 return const0_rtx;
34244
34245 case CODE_FOR_avx512f_cmpv8di3_mask:
34246 case CODE_FOR_avx512f_cmpv16si3_mask:
34247 case CODE_FOR_avx512f_ucmpv8di3_mask:
34248 case CODE_FOR_avx512f_ucmpv16si3_mask:
34249 case CODE_FOR_avx512vl_cmpv4di3_mask:
34250 case CODE_FOR_avx512vl_cmpv8si3_mask:
34251 case CODE_FOR_avx512vl_ucmpv4di3_mask:
34252 case CODE_FOR_avx512vl_ucmpv8si3_mask:
34253 case CODE_FOR_avx512vl_cmpv2di3_mask:
34254 case CODE_FOR_avx512vl_cmpv4si3_mask:
34255 case CODE_FOR_avx512vl_ucmpv2di3_mask:
34256 case CODE_FOR_avx512vl_ucmpv4si3_mask:
34257 error ("the last argument must be a 3-bit immediate");
34258 return const0_rtx;
34259
34260 case CODE_FOR_sse4_1_roundsd:
34261 case CODE_FOR_sse4_1_roundss:
34262
34263 case CODE_FOR_sse4_1_roundpd:
34264 case CODE_FOR_sse4_1_roundps:
34265 case CODE_FOR_avx_roundpd256:
34266 case CODE_FOR_avx_roundps256:
34267
34268 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34269 case CODE_FOR_sse4_1_roundps_sfix:
34270 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34271 case CODE_FOR_avx_roundps_sfix256:
34272
34273 case CODE_FOR_sse4_1_blendps:
34274 case CODE_FOR_avx_blendpd256:
34275 case CODE_FOR_avx_vpermilv4df:
34276 case CODE_FOR_avx512f_getmantv8df_mask:
34277 case CODE_FOR_avx512f_getmantv16sf_mask:
34278 case CODE_FOR_avx512vl_getmantv8sf_mask:
34279 case CODE_FOR_avx512vl_getmantv4df_mask:
34280 case CODE_FOR_avx512vl_getmantv4sf_mask:
34281 case CODE_FOR_avx512vl_getmantv2df_mask:
34282 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34283 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34284 case CODE_FOR_avx512dq_rangepv4df_mask:
34285 case CODE_FOR_avx512dq_rangepv8sf_mask:
34286 case CODE_FOR_avx512dq_rangepv2df_mask:
34287 case CODE_FOR_avx512dq_rangepv4sf_mask:
34288 error ("the last argument must be a 4-bit immediate");
34289 return const0_rtx;
34290
34291 case CODE_FOR_sha1rnds4:
34292 case CODE_FOR_sse4_1_blendpd:
34293 case CODE_FOR_avx_vpermilv2df:
34294 case CODE_FOR_xop_vpermil2v2df3:
34295 case CODE_FOR_xop_vpermil2v4sf3:
34296 case CODE_FOR_xop_vpermil2v4df3:
34297 case CODE_FOR_xop_vpermil2v8sf3:
34298 case CODE_FOR_avx512f_vinsertf32x4_mask:
34299 case CODE_FOR_avx512f_vinserti32x4_mask:
34300 case CODE_FOR_avx512f_vextractf32x4_mask:
34301 case CODE_FOR_avx512f_vextracti32x4_mask:
34302 case CODE_FOR_sse2_shufpd:
34303 case CODE_FOR_sse2_shufpd_mask:
34304 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34305 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34306 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34307 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34308 error ("the last argument must be a 2-bit immediate");
34309 return const0_rtx;
34310
34311 case CODE_FOR_avx_vextractf128v4df:
34312 case CODE_FOR_avx_vextractf128v8sf:
34313 case CODE_FOR_avx_vextractf128v8si:
34314 case CODE_FOR_avx_vinsertf128v4df:
34315 case CODE_FOR_avx_vinsertf128v8sf:
34316 case CODE_FOR_avx_vinsertf128v8si:
34317 case CODE_FOR_avx512f_vinsertf64x4_mask:
34318 case CODE_FOR_avx512f_vinserti64x4_mask:
34319 case CODE_FOR_avx512f_vextractf64x4_mask:
34320 case CODE_FOR_avx512f_vextracti64x4_mask:
34321 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34322 case CODE_FOR_avx512dq_vinserti32x8_mask:
34323 case CODE_FOR_avx512vl_vinsertv4df:
34324 case CODE_FOR_avx512vl_vinsertv4di:
34325 case CODE_FOR_avx512vl_vinsertv8sf:
34326 case CODE_FOR_avx512vl_vinsertv8si:
34327 error ("the last argument must be a 1-bit immediate");
34328 return const0_rtx;
34329
34330 case CODE_FOR_avx_vmcmpv2df3:
34331 case CODE_FOR_avx_vmcmpv4sf3:
34332 case CODE_FOR_avx_cmpv2df3:
34333 case CODE_FOR_avx_cmpv4sf3:
34334 case CODE_FOR_avx_cmpv4df3:
34335 case CODE_FOR_avx_cmpv8sf3:
34336 case CODE_FOR_avx512f_cmpv8df3_mask:
34337 case CODE_FOR_avx512f_cmpv16sf3_mask:
34338 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34339 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34340 error ("the last argument must be a 5-bit immediate");
34341 return const0_rtx;
34342
34343 default:
34344 switch (nargs_constant)
34345 {
34346 case 2:
34347 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34348 (!mask_pos && (nargs - i) == nargs_constant))
34349 {
34350 error ("the next to last argument must be an 8-bit immediate");
34351 break;
34352 }
34353 case 1:
34354 error ("the last argument must be an 8-bit immediate");
34355 break;
34356 default:
34357 gcc_unreachable ();
34358 }
34359 return const0_rtx;
34360 }
34361 }
34362 else
34363 {
34364 if (VECTOR_MODE_P (mode))
34365 op = safe_vector_operand (op, mode);
34366
34367 /* If we aren't optimizing, only allow one memory operand to
34368 be generated. */
34369 if (memory_operand (op, mode))
34370 num_memory++;
34371
34372 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34373 {
34374 if (optimize || !match || num_memory > 1)
34375 op = copy_to_mode_reg (mode, op);
34376 }
34377 else
34378 {
34379 op = copy_to_reg (op);
34380 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34381 }
34382 }
34383
34384 args[i].op = op;
34385 args[i].mode = mode;
34386 }
34387
34388 switch (nargs)
34389 {
34390 case 1:
34391 pat = GEN_FCN (icode) (real_target, args[0].op);
34392 break;
34393 case 2:
34394 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34395 break;
34396 case 3:
34397 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34398 args[2].op);
34399 break;
34400 case 4:
34401 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34402 args[2].op, args[3].op);
34403 break;
34404 case 5:
34405 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34406 args[2].op, args[3].op, args[4].op);
34407 case 6:
34408 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34409 args[2].op, args[3].op, args[4].op,
34410 args[5].op);
34411 break;
34412 default:
34413 gcc_unreachable ();
34414 }
34415
34416 if (! pat)
34417 return 0;
34418
34419 emit_insn (pat);
34420 return target;
34421 }
34422
34423 /* Transform pattern of following layout:
34424 (parallel [
34425 set (A B)
34426 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34427 ])
34428 into:
34429 (set (A B))
34430
34431 Or:
34432 (parallel [ A B
34433 ...
34434 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34435 ...
34436 ])
34437 into:
34438 (parallel [ A B ... ]) */
34439
34440 static rtx
34441 ix86_erase_embedded_rounding (rtx pat)
34442 {
34443 if (GET_CODE (pat) == INSN)
34444 pat = PATTERN (pat);
34445
34446 gcc_assert (GET_CODE (pat) == PARALLEL);
34447
34448 if (XVECLEN (pat, 0) == 2)
34449 {
34450 rtx p0 = XVECEXP (pat, 0, 0);
34451 rtx p1 = XVECEXP (pat, 0, 1);
34452
34453 gcc_assert (GET_CODE (p0) == SET
34454 && GET_CODE (p1) == UNSPEC
34455 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34456
34457 return p0;
34458 }
34459 else
34460 {
34461 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34462 int i = 0;
34463 int j = 0;
34464
34465 for (; i < XVECLEN (pat, 0); ++i)
34466 {
34467 rtx elem = XVECEXP (pat, 0, i);
34468 if (GET_CODE (elem) != UNSPEC
34469 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34470 res [j++] = elem;
34471 }
34472
34473 /* No more than 1 occurence was removed. */
34474 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34475
34476 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34477 }
34478 }
34479
34480 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34481 with rounding. */
34482 static rtx
34483 ix86_expand_sse_comi_round (const struct builtin_description *d,
34484 tree exp, rtx target)
34485 {
34486 rtx pat, set_dst;
34487 tree arg0 = CALL_EXPR_ARG (exp, 0);
34488 tree arg1 = CALL_EXPR_ARG (exp, 1);
34489 tree arg2 = CALL_EXPR_ARG (exp, 2);
34490 tree arg3 = CALL_EXPR_ARG (exp, 3);
34491 rtx op0 = expand_normal (arg0);
34492 rtx op1 = expand_normal (arg1);
34493 rtx op2 = expand_normal (arg2);
34494 rtx op3 = expand_normal (arg3);
34495 enum insn_code icode = d->icode;
34496 const struct insn_data_d *insn_p = &insn_data[icode];
34497 enum machine_mode mode0 = insn_p->operand[0].mode;
34498 enum machine_mode mode1 = insn_p->operand[1].mode;
34499 enum rtx_code comparison = UNEQ;
34500 bool need_ucomi = false;
34501
34502 /* See avxintrin.h for values. */
34503 enum rtx_code comi_comparisons[32] =
34504 {
34505 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34506 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34507 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34508 };
34509 bool need_ucomi_values[32] =
34510 {
34511 true, false, false, true, true, false, false, true,
34512 true, false, false, true, true, false, false, true,
34513 false, true, true, false, false, true, true, false,
34514 false, true, true, false, false, true, true, false
34515 };
34516
34517 if (!CONST_INT_P (op2))
34518 {
34519 error ("the third argument must be comparison constant");
34520 return const0_rtx;
34521 }
34522 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34523 {
34524 error ("incorect comparison mode");
34525 return const0_rtx;
34526 }
34527
34528 if (!insn_p->operand[2].predicate (op3, SImode))
34529 {
34530 error ("incorrect rounding operand");
34531 return const0_rtx;
34532 }
34533
34534 comparison = comi_comparisons[INTVAL (op2)];
34535 need_ucomi = need_ucomi_values[INTVAL (op2)];
34536
34537 if (VECTOR_MODE_P (mode0))
34538 op0 = safe_vector_operand (op0, mode0);
34539 if (VECTOR_MODE_P (mode1))
34540 op1 = safe_vector_operand (op1, mode1);
34541
34542 target = gen_reg_rtx (SImode);
34543 emit_move_insn (target, const0_rtx);
34544 target = gen_rtx_SUBREG (QImode, target, 0);
34545
34546 if ((optimize && !register_operand (op0, mode0))
34547 || !insn_p->operand[0].predicate (op0, mode0))
34548 op0 = copy_to_mode_reg (mode0, op0);
34549 if ((optimize && !register_operand (op1, mode1))
34550 || !insn_p->operand[1].predicate (op1, mode1))
34551 op1 = copy_to_mode_reg (mode1, op1);
34552
34553 if (need_ucomi)
34554 icode = icode == CODE_FOR_sse_comi_round
34555 ? CODE_FOR_sse_ucomi_round
34556 : CODE_FOR_sse2_ucomi_round;
34557
34558 pat = GEN_FCN (icode) (op0, op1, op3);
34559 if (! pat)
34560 return 0;
34561
34562 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34563 if (INTVAL (op3) == NO_ROUND)
34564 {
34565 pat = ix86_erase_embedded_rounding (pat);
34566 if (! pat)
34567 return 0;
34568
34569 set_dst = SET_DEST (pat);
34570 }
34571 else
34572 {
34573 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34574 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34575 }
34576
34577 emit_insn (pat);
34578 emit_insn (gen_rtx_SET (VOIDmode,
34579 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34580 gen_rtx_fmt_ee (comparison, QImode,
34581 set_dst,
34582 const0_rtx)));
34583
34584 return SUBREG_REG (target);
34585 }
34586
34587 static rtx
34588 ix86_expand_round_builtin (const struct builtin_description *d,
34589 tree exp, rtx target)
34590 {
34591 rtx pat;
34592 unsigned int i, nargs;
34593 struct
34594 {
34595 rtx op;
34596 enum machine_mode mode;
34597 } args[6];
34598 enum insn_code icode = d->icode;
34599 const struct insn_data_d *insn_p = &insn_data[icode];
34600 enum machine_mode tmode = insn_p->operand[0].mode;
34601 unsigned int nargs_constant = 0;
34602 unsigned int redundant_embed_rnd = 0;
34603
34604 switch ((enum ix86_builtin_func_type) d->flag)
34605 {
34606 case UINT64_FTYPE_V2DF_INT:
34607 case UINT64_FTYPE_V4SF_INT:
34608 case UINT_FTYPE_V2DF_INT:
34609 case UINT_FTYPE_V4SF_INT:
34610 case INT64_FTYPE_V2DF_INT:
34611 case INT64_FTYPE_V4SF_INT:
34612 case INT_FTYPE_V2DF_INT:
34613 case INT_FTYPE_V4SF_INT:
34614 nargs = 2;
34615 break;
34616 case V4SF_FTYPE_V4SF_UINT_INT:
34617 case V4SF_FTYPE_V4SF_UINT64_INT:
34618 case V2DF_FTYPE_V2DF_UINT64_INT:
34619 case V4SF_FTYPE_V4SF_INT_INT:
34620 case V4SF_FTYPE_V4SF_INT64_INT:
34621 case V2DF_FTYPE_V2DF_INT64_INT:
34622 case V4SF_FTYPE_V4SF_V4SF_INT:
34623 case V2DF_FTYPE_V2DF_V2DF_INT:
34624 case V4SF_FTYPE_V4SF_V2DF_INT:
34625 case V2DF_FTYPE_V2DF_V4SF_INT:
34626 nargs = 3;
34627 break;
34628 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34629 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34630 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34631 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34632 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34633 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34634 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34635 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34636 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34637 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34638 nargs = 4;
34639 break;
34640 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34641 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34642 nargs_constant = 2;
34643 nargs = 4;
34644 break;
34645 case INT_FTYPE_V4SF_V4SF_INT_INT:
34646 case INT_FTYPE_V2DF_V2DF_INT_INT:
34647 return ix86_expand_sse_comi_round (d, exp, target);
34648 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34649 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34650 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34651 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34652 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34653 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34654 nargs = 5;
34655 break;
34656 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34657 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34658 nargs_constant = 4;
34659 nargs = 5;
34660 break;
34661 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34662 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34663 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34664 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34665 nargs_constant = 3;
34666 nargs = 5;
34667 break;
34668 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34669 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34670 nargs = 6;
34671 nargs_constant = 4;
34672 break;
34673 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34674 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34675 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34676 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34677 nargs = 6;
34678 nargs_constant = 3;
34679 break;
34680 default:
34681 gcc_unreachable ();
34682 }
34683 gcc_assert (nargs <= ARRAY_SIZE (args));
34684
34685 if (optimize
34686 || target == 0
34687 || GET_MODE (target) != tmode
34688 || !insn_p->operand[0].predicate (target, tmode))
34689 target = gen_reg_rtx (tmode);
34690
34691 for (i = 0; i < nargs; i++)
34692 {
34693 tree arg = CALL_EXPR_ARG (exp, i);
34694 rtx op = expand_normal (arg);
34695 enum machine_mode mode = insn_p->operand[i + 1].mode;
34696 bool match = insn_p->operand[i + 1].predicate (op, mode);
34697
34698 if (i == nargs - nargs_constant)
34699 {
34700 if (!match)
34701 {
34702 switch (icode)
34703 {
34704 case CODE_FOR_avx512f_getmantv8df_mask_round:
34705 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34706 case CODE_FOR_avx512f_vgetmantv2df_round:
34707 case CODE_FOR_avx512f_vgetmantv4sf_round:
34708 error ("the immediate argument must be a 4-bit immediate");
34709 return const0_rtx;
34710 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34711 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34712 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34713 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34714 error ("the immediate argument must be a 5-bit immediate");
34715 return const0_rtx;
34716 default:
34717 error ("the immediate argument must be an 8-bit immediate");
34718 return const0_rtx;
34719 }
34720 }
34721 }
34722 else if (i == nargs-1)
34723 {
34724 if (!insn_p->operand[nargs].predicate (op, SImode))
34725 {
34726 error ("incorrect rounding operand");
34727 return const0_rtx;
34728 }
34729
34730 /* If there is no rounding use normal version of the pattern. */
34731 if (INTVAL (op) == NO_ROUND)
34732 redundant_embed_rnd = 1;
34733 }
34734 else
34735 {
34736 if (VECTOR_MODE_P (mode))
34737 op = safe_vector_operand (op, mode);
34738
34739 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34740 {
34741 if (optimize || !match)
34742 op = copy_to_mode_reg (mode, op);
34743 }
34744 else
34745 {
34746 op = copy_to_reg (op);
34747 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34748 }
34749 }
34750
34751 args[i].op = op;
34752 args[i].mode = mode;
34753 }
34754
34755 switch (nargs)
34756 {
34757 case 1:
34758 pat = GEN_FCN (icode) (target, args[0].op);
34759 break;
34760 case 2:
34761 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34762 break;
34763 case 3:
34764 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34765 args[2].op);
34766 break;
34767 case 4:
34768 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34769 args[2].op, args[3].op);
34770 break;
34771 case 5:
34772 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34773 args[2].op, args[3].op, args[4].op);
34774 case 6:
34775 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34776 args[2].op, args[3].op, args[4].op,
34777 args[5].op);
34778 break;
34779 default:
34780 gcc_unreachable ();
34781 }
34782
34783 if (!pat)
34784 return 0;
34785
34786 if (redundant_embed_rnd)
34787 pat = ix86_erase_embedded_rounding (pat);
34788
34789 emit_insn (pat);
34790 return target;
34791 }
34792
34793 /* Subroutine of ix86_expand_builtin to take care of special insns
34794 with variable number of operands. */
34795
34796 static rtx
34797 ix86_expand_special_args_builtin (const struct builtin_description *d,
34798 tree exp, rtx target)
34799 {
34800 tree arg;
34801 rtx pat, op;
34802 unsigned int i, nargs, arg_adjust, memory;
34803 bool aligned_mem = false;
34804 struct
34805 {
34806 rtx op;
34807 enum machine_mode mode;
34808 } args[3];
34809 enum insn_code icode = d->icode;
34810 bool last_arg_constant = false;
34811 const struct insn_data_d *insn_p = &insn_data[icode];
34812 enum machine_mode tmode = insn_p->operand[0].mode;
34813 enum { load, store } klass;
34814
34815 switch ((enum ix86_builtin_func_type) d->flag)
34816 {
34817 case VOID_FTYPE_VOID:
34818 emit_insn (GEN_FCN (icode) (target));
34819 return 0;
34820 case VOID_FTYPE_UINT64:
34821 case VOID_FTYPE_UNSIGNED:
34822 nargs = 0;
34823 klass = store;
34824 memory = 0;
34825 break;
34826
34827 case INT_FTYPE_VOID:
34828 case USHORT_FTYPE_VOID:
34829 case UINT64_FTYPE_VOID:
34830 case UNSIGNED_FTYPE_VOID:
34831 nargs = 0;
34832 klass = load;
34833 memory = 0;
34834 break;
34835 case UINT64_FTYPE_PUNSIGNED:
34836 case V2DI_FTYPE_PV2DI:
34837 case V4DI_FTYPE_PV4DI:
34838 case V32QI_FTYPE_PCCHAR:
34839 case V16QI_FTYPE_PCCHAR:
34840 case V8SF_FTYPE_PCV4SF:
34841 case V8SF_FTYPE_PCFLOAT:
34842 case V4SF_FTYPE_PCFLOAT:
34843 case V4DF_FTYPE_PCV2DF:
34844 case V4DF_FTYPE_PCDOUBLE:
34845 case V2DF_FTYPE_PCDOUBLE:
34846 case VOID_FTYPE_PVOID:
34847 case V16SI_FTYPE_PV4SI:
34848 case V16SF_FTYPE_PV4SF:
34849 case V8DI_FTYPE_PV4DI:
34850 case V8DI_FTYPE_PV8DI:
34851 case V8DF_FTYPE_PV4DF:
34852 nargs = 1;
34853 klass = load;
34854 memory = 0;
34855 switch (icode)
34856 {
34857 case CODE_FOR_sse4_1_movntdqa:
34858 case CODE_FOR_avx2_movntdqa:
34859 case CODE_FOR_avx512f_movntdqa:
34860 aligned_mem = true;
34861 break;
34862 default:
34863 break;
34864 }
34865 break;
34866 case VOID_FTYPE_PV2SF_V4SF:
34867 case VOID_FTYPE_PV8DI_V8DI:
34868 case VOID_FTYPE_PV4DI_V4DI:
34869 case VOID_FTYPE_PV2DI_V2DI:
34870 case VOID_FTYPE_PCHAR_V32QI:
34871 case VOID_FTYPE_PCHAR_V16QI:
34872 case VOID_FTYPE_PFLOAT_V16SF:
34873 case VOID_FTYPE_PFLOAT_V8SF:
34874 case VOID_FTYPE_PFLOAT_V4SF:
34875 case VOID_FTYPE_PDOUBLE_V8DF:
34876 case VOID_FTYPE_PDOUBLE_V4DF:
34877 case VOID_FTYPE_PDOUBLE_V2DF:
34878 case VOID_FTYPE_PLONGLONG_LONGLONG:
34879 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34880 case VOID_FTYPE_PINT_INT:
34881 nargs = 1;
34882 klass = store;
34883 /* Reserve memory operand for target. */
34884 memory = ARRAY_SIZE (args);
34885 switch (icode)
34886 {
34887 /* These builtins and instructions require the memory
34888 to be properly aligned. */
34889 case CODE_FOR_avx_movntv4di:
34890 case CODE_FOR_sse2_movntv2di:
34891 case CODE_FOR_avx_movntv8sf:
34892 case CODE_FOR_sse_movntv4sf:
34893 case CODE_FOR_sse4a_vmmovntv4sf:
34894 case CODE_FOR_avx_movntv4df:
34895 case CODE_FOR_sse2_movntv2df:
34896 case CODE_FOR_sse4a_vmmovntv2df:
34897 case CODE_FOR_sse2_movntidi:
34898 case CODE_FOR_sse_movntq:
34899 case CODE_FOR_sse2_movntisi:
34900 case CODE_FOR_avx512f_movntv16sf:
34901 case CODE_FOR_avx512f_movntv8df:
34902 case CODE_FOR_avx512f_movntv8di:
34903 aligned_mem = true;
34904 break;
34905 default:
34906 break;
34907 }
34908 break;
34909 case V4SF_FTYPE_V4SF_PCV2SF:
34910 case V2DF_FTYPE_V2DF_PCDOUBLE:
34911 nargs = 2;
34912 klass = load;
34913 memory = 1;
34914 break;
34915 case V8SF_FTYPE_PCV8SF_V8SI:
34916 case V4DF_FTYPE_PCV4DF_V4DI:
34917 case V4SF_FTYPE_PCV4SF_V4SI:
34918 case V2DF_FTYPE_PCV2DF_V2DI:
34919 case V8SI_FTYPE_PCV8SI_V8SI:
34920 case V4DI_FTYPE_PCV4DI_V4DI:
34921 case V4SI_FTYPE_PCV4SI_V4SI:
34922 case V2DI_FTYPE_PCV2DI_V2DI:
34923 nargs = 2;
34924 klass = load;
34925 memory = 0;
34926 break;
34927 case VOID_FTYPE_PV8DF_V8DF_QI:
34928 case VOID_FTYPE_PV16SF_V16SF_HI:
34929 case VOID_FTYPE_PV8DI_V8DI_QI:
34930 case VOID_FTYPE_PV16SI_V16SI_HI:
34931 switch (icode)
34932 {
34933 /* These builtins and instructions require the memory
34934 to be properly aligned. */
34935 case CODE_FOR_avx512f_storev16sf_mask:
34936 case CODE_FOR_avx512f_storev16si_mask:
34937 case CODE_FOR_avx512f_storev8df_mask:
34938 case CODE_FOR_avx512f_storev8di_mask:
34939 case CODE_FOR_avx512vl_storev8sf_mask:
34940 case CODE_FOR_avx512vl_storev8si_mask:
34941 case CODE_FOR_avx512vl_storev4df_mask:
34942 case CODE_FOR_avx512vl_storev4di_mask:
34943 case CODE_FOR_avx512vl_storev4sf_mask:
34944 case CODE_FOR_avx512vl_storev4si_mask:
34945 case CODE_FOR_avx512vl_storev2df_mask:
34946 case CODE_FOR_avx512vl_storev2di_mask:
34947 aligned_mem = true;
34948 break;
34949 default:
34950 break;
34951 }
34952 /* FALLTHRU */
34953 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34954 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34955 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34956 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34957 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34958 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34959 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34960 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34961 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34962 case VOID_FTYPE_PFLOAT_V4SF_QI:
34963 case VOID_FTYPE_PV8SI_V8DI_QI:
34964 case VOID_FTYPE_PV8HI_V8DI_QI:
34965 case VOID_FTYPE_PV16HI_V16SI_HI:
34966 case VOID_FTYPE_PV16QI_V8DI_QI:
34967 case VOID_FTYPE_PV16QI_V16SI_HI:
34968 nargs = 2;
34969 klass = store;
34970 /* Reserve memory operand for target. */
34971 memory = ARRAY_SIZE (args);
34972 break;
34973 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34974 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34975 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34976 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34977 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34978 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34979 nargs = 3;
34980 klass = load;
34981 memory = 0;
34982 switch (icode)
34983 {
34984 /* These builtins and instructions require the memory
34985 to be properly aligned. */
34986 case CODE_FOR_avx512f_loadv16sf_mask:
34987 case CODE_FOR_avx512f_loadv16si_mask:
34988 case CODE_FOR_avx512f_loadv8df_mask:
34989 case CODE_FOR_avx512f_loadv8di_mask:
34990 case CODE_FOR_avx512vl_loadv8sf_mask:
34991 case CODE_FOR_avx512vl_loadv8si_mask:
34992 case CODE_FOR_avx512vl_loadv4df_mask:
34993 case CODE_FOR_avx512vl_loadv4di_mask:
34994 case CODE_FOR_avx512vl_loadv4sf_mask:
34995 case CODE_FOR_avx512vl_loadv4si_mask:
34996 case CODE_FOR_avx512vl_loadv2df_mask:
34997 case CODE_FOR_avx512vl_loadv2di_mask:
34998 case CODE_FOR_avx512bw_loadv64qi_mask:
34999 case CODE_FOR_avx512vl_loadv32qi_mask:
35000 case CODE_FOR_avx512vl_loadv16qi_mask:
35001 case CODE_FOR_avx512bw_loadv32hi_mask:
35002 case CODE_FOR_avx512vl_loadv16hi_mask:
35003 case CODE_FOR_avx512vl_loadv8hi_mask:
35004 aligned_mem = true;
35005 break;
35006 default:
35007 break;
35008 }
35009 break;
35010 case VOID_FTYPE_UINT_UINT_UINT:
35011 case VOID_FTYPE_UINT64_UINT_UINT:
35012 case UCHAR_FTYPE_UINT_UINT_UINT:
35013 case UCHAR_FTYPE_UINT64_UINT_UINT:
35014 nargs = 3;
35015 klass = load;
35016 memory = ARRAY_SIZE (args);
35017 last_arg_constant = true;
35018 break;
35019 default:
35020 gcc_unreachable ();
35021 }
35022
35023 gcc_assert (nargs <= ARRAY_SIZE (args));
35024
35025 if (klass == store)
35026 {
35027 arg = CALL_EXPR_ARG (exp, 0);
35028 op = expand_normal (arg);
35029 gcc_assert (target == 0);
35030 if (memory)
35031 {
35032 op = ix86_zero_extend_to_Pmode (op);
35033 target = gen_rtx_MEM (tmode, op);
35034 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
35035 on it. Try to improve it using get_pointer_alignment,
35036 and if the special builtin is one that requires strict
35037 mode alignment, also from it's GET_MODE_ALIGNMENT.
35038 Failure to do so could lead to ix86_legitimate_combined_insn
35039 rejecting all changes to such insns. */
35040 unsigned int align = get_pointer_alignment (arg);
35041 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
35042 align = GET_MODE_ALIGNMENT (tmode);
35043 if (MEM_ALIGN (target) < align)
35044 set_mem_align (target, align);
35045 }
35046 else
35047 target = force_reg (tmode, op);
35048 arg_adjust = 1;
35049 }
35050 else
35051 {
35052 arg_adjust = 0;
35053 if (optimize
35054 || target == 0
35055 || !register_operand (target, tmode)
35056 || GET_MODE (target) != tmode)
35057 target = gen_reg_rtx (tmode);
35058 }
35059
35060 for (i = 0; i < nargs; i++)
35061 {
35062 enum machine_mode mode = insn_p->operand[i + 1].mode;
35063 bool match;
35064
35065 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
35066 op = expand_normal (arg);
35067 match = insn_p->operand[i + 1].predicate (op, mode);
35068
35069 if (last_arg_constant && (i + 1) == nargs)
35070 {
35071 if (!match)
35072 {
35073 if (icode == CODE_FOR_lwp_lwpvalsi3
35074 || icode == CODE_FOR_lwp_lwpinssi3
35075 || icode == CODE_FOR_lwp_lwpvaldi3
35076 || icode == CODE_FOR_lwp_lwpinsdi3)
35077 error ("the last argument must be a 32-bit immediate");
35078 else
35079 error ("the last argument must be an 8-bit immediate");
35080 return const0_rtx;
35081 }
35082 }
35083 else
35084 {
35085 if (i == memory)
35086 {
35087 /* This must be the memory operand. */
35088 op = ix86_zero_extend_to_Pmode (op);
35089 op = gen_rtx_MEM (mode, op);
35090 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
35091 on it. Try to improve it using get_pointer_alignment,
35092 and if the special builtin is one that requires strict
35093 mode alignment, also from it's GET_MODE_ALIGNMENT.
35094 Failure to do so could lead to ix86_legitimate_combined_insn
35095 rejecting all changes to such insns. */
35096 unsigned int align = get_pointer_alignment (arg);
35097 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
35098 align = GET_MODE_ALIGNMENT (mode);
35099 if (MEM_ALIGN (op) < align)
35100 set_mem_align (op, align);
35101 }
35102 else
35103 {
35104 /* This must be register. */
35105 if (VECTOR_MODE_P (mode))
35106 op = safe_vector_operand (op, mode);
35107
35108 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
35109 op = copy_to_mode_reg (mode, op);
35110 else
35111 {
35112 op = copy_to_reg (op);
35113 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
35114 }
35115 }
35116 }
35117
35118 args[i].op = op;
35119 args[i].mode = mode;
35120 }
35121
35122 switch (nargs)
35123 {
35124 case 0:
35125 pat = GEN_FCN (icode) (target);
35126 break;
35127 case 1:
35128 pat = GEN_FCN (icode) (target, args[0].op);
35129 break;
35130 case 2:
35131 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35132 break;
35133 case 3:
35134 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35135 break;
35136 default:
35137 gcc_unreachable ();
35138 }
35139
35140 if (! pat)
35141 return 0;
35142 emit_insn (pat);
35143 return klass == store ? 0 : target;
35144 }
35145
35146 /* Return the integer constant in ARG. Constrain it to be in the range
35147 of the subparts of VEC_TYPE; issue an error if not. */
35148
35149 static int
35150 get_element_number (tree vec_type, tree arg)
35151 {
35152 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
35153
35154 if (!tree_fits_uhwi_p (arg)
35155 || (elt = tree_to_uhwi (arg), elt > max))
35156 {
35157 error ("selector must be an integer constant in the range 0..%wi", max);
35158 return 0;
35159 }
35160
35161 return elt;
35162 }
35163
35164 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35165 ix86_expand_vector_init. We DO have language-level syntax for this, in
35166 the form of (type){ init-list }. Except that since we can't place emms
35167 instructions from inside the compiler, we can't allow the use of MMX
35168 registers unless the user explicitly asks for it. So we do *not* define
35169 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35170 we have builtins invoked by mmintrin.h that gives us license to emit
35171 these sorts of instructions. */
35172
35173 static rtx
35174 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35175 {
35176 enum machine_mode tmode = TYPE_MODE (type);
35177 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
35178 int i, n_elt = GET_MODE_NUNITS (tmode);
35179 rtvec v = rtvec_alloc (n_elt);
35180
35181 gcc_assert (VECTOR_MODE_P (tmode));
35182 gcc_assert (call_expr_nargs (exp) == n_elt);
35183
35184 for (i = 0; i < n_elt; ++i)
35185 {
35186 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35187 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35188 }
35189
35190 if (!target || !register_operand (target, tmode))
35191 target = gen_reg_rtx (tmode);
35192
35193 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35194 return target;
35195 }
35196
35197 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35198 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35199 had a language-level syntax for referencing vector elements. */
35200
35201 static rtx
35202 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35203 {
35204 enum machine_mode tmode, mode0;
35205 tree arg0, arg1;
35206 int elt;
35207 rtx op0;
35208
35209 arg0 = CALL_EXPR_ARG (exp, 0);
35210 arg1 = CALL_EXPR_ARG (exp, 1);
35211
35212 op0 = expand_normal (arg0);
35213 elt = get_element_number (TREE_TYPE (arg0), arg1);
35214
35215 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35216 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35217 gcc_assert (VECTOR_MODE_P (mode0));
35218
35219 op0 = force_reg (mode0, op0);
35220
35221 if (optimize || !target || !register_operand (target, tmode))
35222 target = gen_reg_rtx (tmode);
35223
35224 ix86_expand_vector_extract (true, target, op0, elt);
35225
35226 return target;
35227 }
35228
35229 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35230 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35231 a language-level syntax for referencing vector elements. */
35232
35233 static rtx
35234 ix86_expand_vec_set_builtin (tree exp)
35235 {
35236 enum machine_mode tmode, mode1;
35237 tree arg0, arg1, arg2;
35238 int elt;
35239 rtx op0, op1, target;
35240
35241 arg0 = CALL_EXPR_ARG (exp, 0);
35242 arg1 = CALL_EXPR_ARG (exp, 1);
35243 arg2 = CALL_EXPR_ARG (exp, 2);
35244
35245 tmode = TYPE_MODE (TREE_TYPE (arg0));
35246 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35247 gcc_assert (VECTOR_MODE_P (tmode));
35248
35249 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35250 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35251 elt = get_element_number (TREE_TYPE (arg0), arg2);
35252
35253 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35254 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35255
35256 op0 = force_reg (tmode, op0);
35257 op1 = force_reg (mode1, op1);
35258
35259 /* OP0 is the source of these builtin functions and shouldn't be
35260 modified. Create a copy, use it and return it as target. */
35261 target = gen_reg_rtx (tmode);
35262 emit_move_insn (target, op0);
35263 ix86_expand_vector_set (true, target, op1, elt);
35264
35265 return target;
35266 }
35267
35268 /* Expand an expression EXP that calls a built-in function,
35269 with result going to TARGET if that's convenient
35270 (and in mode MODE if that's convenient).
35271 SUBTARGET may be used as the target for computing one of EXP's operands.
35272 IGNORE is nonzero if the value is to be ignored. */
35273
35274 static rtx
35275 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35276 enum machine_mode mode, int ignore)
35277 {
35278 const struct builtin_description *d;
35279 size_t i;
35280 enum insn_code icode;
35281 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35282 tree arg0, arg1, arg2, arg3, arg4;
35283 rtx op0, op1, op2, op3, op4, pat, insn;
35284 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35285 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35286
35287 /* For CPU builtins that can be folded, fold first and expand the fold. */
35288 switch (fcode)
35289 {
35290 case IX86_BUILTIN_CPU_INIT:
35291 {
35292 /* Make it call __cpu_indicator_init in libgcc. */
35293 tree call_expr, fndecl, type;
35294 type = build_function_type_list (integer_type_node, NULL_TREE);
35295 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35296 call_expr = build_call_expr (fndecl, 0);
35297 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35298 }
35299 case IX86_BUILTIN_CPU_IS:
35300 case IX86_BUILTIN_CPU_SUPPORTS:
35301 {
35302 tree arg0 = CALL_EXPR_ARG (exp, 0);
35303 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35304 gcc_assert (fold_expr != NULL_TREE);
35305 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35306 }
35307 }
35308
35309 /* Determine whether the builtin function is available under the current ISA.
35310 Originally the builtin was not created if it wasn't applicable to the
35311 current ISA based on the command line switches. With function specific
35312 options, we need to check in the context of the function making the call
35313 whether it is supported. */
35314 if (ix86_builtins_isa[fcode].isa
35315 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35316 {
35317 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35318 NULL, (enum fpmath_unit) 0, false);
35319
35320 if (!opts)
35321 error ("%qE needs unknown isa option", fndecl);
35322 else
35323 {
35324 gcc_assert (opts != NULL);
35325 error ("%qE needs isa option %s", fndecl, opts);
35326 free (opts);
35327 }
35328 return const0_rtx;
35329 }
35330
35331 switch (fcode)
35332 {
35333 case IX86_BUILTIN_MASKMOVQ:
35334 case IX86_BUILTIN_MASKMOVDQU:
35335 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35336 ? CODE_FOR_mmx_maskmovq
35337 : CODE_FOR_sse2_maskmovdqu);
35338 /* Note the arg order is different from the operand order. */
35339 arg1 = CALL_EXPR_ARG (exp, 0);
35340 arg2 = CALL_EXPR_ARG (exp, 1);
35341 arg0 = CALL_EXPR_ARG (exp, 2);
35342 op0 = expand_normal (arg0);
35343 op1 = expand_normal (arg1);
35344 op2 = expand_normal (arg2);
35345 mode0 = insn_data[icode].operand[0].mode;
35346 mode1 = insn_data[icode].operand[1].mode;
35347 mode2 = insn_data[icode].operand[2].mode;
35348
35349 op0 = ix86_zero_extend_to_Pmode (op0);
35350 op0 = gen_rtx_MEM (mode1, op0);
35351
35352 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35353 op0 = copy_to_mode_reg (mode0, op0);
35354 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35355 op1 = copy_to_mode_reg (mode1, op1);
35356 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35357 op2 = copy_to_mode_reg (mode2, op2);
35358 pat = GEN_FCN (icode) (op0, op1, op2);
35359 if (! pat)
35360 return 0;
35361 emit_insn (pat);
35362 return 0;
35363
35364 case IX86_BUILTIN_LDMXCSR:
35365 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35366 target = assign_386_stack_local (SImode, SLOT_TEMP);
35367 emit_move_insn (target, op0);
35368 emit_insn (gen_sse_ldmxcsr (target));
35369 return 0;
35370
35371 case IX86_BUILTIN_STMXCSR:
35372 target = assign_386_stack_local (SImode, SLOT_TEMP);
35373 emit_insn (gen_sse_stmxcsr (target));
35374 return copy_to_mode_reg (SImode, target);
35375
35376 case IX86_BUILTIN_CLFLUSH:
35377 arg0 = CALL_EXPR_ARG (exp, 0);
35378 op0 = expand_normal (arg0);
35379 icode = CODE_FOR_sse2_clflush;
35380 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35381 op0 = ix86_zero_extend_to_Pmode (op0);
35382
35383 emit_insn (gen_sse2_clflush (op0));
35384 return 0;
35385
35386 case IX86_BUILTIN_CLFLUSHOPT:
35387 arg0 = CALL_EXPR_ARG (exp, 0);
35388 op0 = expand_normal (arg0);
35389 icode = CODE_FOR_clflushopt;
35390 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35391 op0 = ix86_zero_extend_to_Pmode (op0);
35392
35393 emit_insn (gen_clflushopt (op0));
35394 return 0;
35395
35396 case IX86_BUILTIN_MONITOR:
35397 arg0 = CALL_EXPR_ARG (exp, 0);
35398 arg1 = CALL_EXPR_ARG (exp, 1);
35399 arg2 = CALL_EXPR_ARG (exp, 2);
35400 op0 = expand_normal (arg0);
35401 op1 = expand_normal (arg1);
35402 op2 = expand_normal (arg2);
35403 if (!REG_P (op0))
35404 op0 = ix86_zero_extend_to_Pmode (op0);
35405 if (!REG_P (op1))
35406 op1 = copy_to_mode_reg (SImode, op1);
35407 if (!REG_P (op2))
35408 op2 = copy_to_mode_reg (SImode, op2);
35409 emit_insn (ix86_gen_monitor (op0, op1, op2));
35410 return 0;
35411
35412 case IX86_BUILTIN_MWAIT:
35413 arg0 = CALL_EXPR_ARG (exp, 0);
35414 arg1 = CALL_EXPR_ARG (exp, 1);
35415 op0 = expand_normal (arg0);
35416 op1 = expand_normal (arg1);
35417 if (!REG_P (op0))
35418 op0 = copy_to_mode_reg (SImode, op0);
35419 if (!REG_P (op1))
35420 op1 = copy_to_mode_reg (SImode, op1);
35421 emit_insn (gen_sse3_mwait (op0, op1));
35422 return 0;
35423
35424 case IX86_BUILTIN_VEC_INIT_V2SI:
35425 case IX86_BUILTIN_VEC_INIT_V4HI:
35426 case IX86_BUILTIN_VEC_INIT_V8QI:
35427 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35428
35429 case IX86_BUILTIN_VEC_EXT_V2DF:
35430 case IX86_BUILTIN_VEC_EXT_V2DI:
35431 case IX86_BUILTIN_VEC_EXT_V4SF:
35432 case IX86_BUILTIN_VEC_EXT_V4SI:
35433 case IX86_BUILTIN_VEC_EXT_V8HI:
35434 case IX86_BUILTIN_VEC_EXT_V2SI:
35435 case IX86_BUILTIN_VEC_EXT_V4HI:
35436 case IX86_BUILTIN_VEC_EXT_V16QI:
35437 return ix86_expand_vec_ext_builtin (exp, target);
35438
35439 case IX86_BUILTIN_VEC_SET_V2DI:
35440 case IX86_BUILTIN_VEC_SET_V4SF:
35441 case IX86_BUILTIN_VEC_SET_V4SI:
35442 case IX86_BUILTIN_VEC_SET_V8HI:
35443 case IX86_BUILTIN_VEC_SET_V4HI:
35444 case IX86_BUILTIN_VEC_SET_V16QI:
35445 return ix86_expand_vec_set_builtin (exp);
35446
35447 case IX86_BUILTIN_INFQ:
35448 case IX86_BUILTIN_HUGE_VALQ:
35449 {
35450 REAL_VALUE_TYPE inf;
35451 rtx tmp;
35452
35453 real_inf (&inf);
35454 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35455
35456 tmp = validize_mem (force_const_mem (mode, tmp));
35457
35458 if (target == 0)
35459 target = gen_reg_rtx (mode);
35460
35461 emit_move_insn (target, tmp);
35462 return target;
35463 }
35464
35465 case IX86_BUILTIN_RDPMC:
35466 case IX86_BUILTIN_RDTSC:
35467 case IX86_BUILTIN_RDTSCP:
35468
35469 op0 = gen_reg_rtx (DImode);
35470 op1 = gen_reg_rtx (DImode);
35471
35472 if (fcode == IX86_BUILTIN_RDPMC)
35473 {
35474 arg0 = CALL_EXPR_ARG (exp, 0);
35475 op2 = expand_normal (arg0);
35476 if (!register_operand (op2, SImode))
35477 op2 = copy_to_mode_reg (SImode, op2);
35478
35479 insn = (TARGET_64BIT
35480 ? gen_rdpmc_rex64 (op0, op1, op2)
35481 : gen_rdpmc (op0, op2));
35482 emit_insn (insn);
35483 }
35484 else if (fcode == IX86_BUILTIN_RDTSC)
35485 {
35486 insn = (TARGET_64BIT
35487 ? gen_rdtsc_rex64 (op0, op1)
35488 : gen_rdtsc (op0));
35489 emit_insn (insn);
35490 }
35491 else
35492 {
35493 op2 = gen_reg_rtx (SImode);
35494
35495 insn = (TARGET_64BIT
35496 ? gen_rdtscp_rex64 (op0, op1, op2)
35497 : gen_rdtscp (op0, op2));
35498 emit_insn (insn);
35499
35500 arg0 = CALL_EXPR_ARG (exp, 0);
35501 op4 = expand_normal (arg0);
35502 if (!address_operand (op4, VOIDmode))
35503 {
35504 op4 = convert_memory_address (Pmode, op4);
35505 op4 = copy_addr_to_reg (op4);
35506 }
35507 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35508 }
35509
35510 if (target == 0)
35511 {
35512 /* mode is VOIDmode if __builtin_rd* has been called
35513 without lhs. */
35514 if (mode == VOIDmode)
35515 return target;
35516 target = gen_reg_rtx (mode);
35517 }
35518
35519 if (TARGET_64BIT)
35520 {
35521 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35522 op1, 1, OPTAB_DIRECT);
35523 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35524 op0, 1, OPTAB_DIRECT);
35525 }
35526
35527 emit_move_insn (target, op0);
35528 return target;
35529
35530 case IX86_BUILTIN_FXSAVE:
35531 case IX86_BUILTIN_FXRSTOR:
35532 case IX86_BUILTIN_FXSAVE64:
35533 case IX86_BUILTIN_FXRSTOR64:
35534 case IX86_BUILTIN_FNSTENV:
35535 case IX86_BUILTIN_FLDENV:
35536 mode0 = BLKmode;
35537 switch (fcode)
35538 {
35539 case IX86_BUILTIN_FXSAVE:
35540 icode = CODE_FOR_fxsave;
35541 break;
35542 case IX86_BUILTIN_FXRSTOR:
35543 icode = CODE_FOR_fxrstor;
35544 break;
35545 case IX86_BUILTIN_FXSAVE64:
35546 icode = CODE_FOR_fxsave64;
35547 break;
35548 case IX86_BUILTIN_FXRSTOR64:
35549 icode = CODE_FOR_fxrstor64;
35550 break;
35551 case IX86_BUILTIN_FNSTENV:
35552 icode = CODE_FOR_fnstenv;
35553 break;
35554 case IX86_BUILTIN_FLDENV:
35555 icode = CODE_FOR_fldenv;
35556 break;
35557 default:
35558 gcc_unreachable ();
35559 }
35560
35561 arg0 = CALL_EXPR_ARG (exp, 0);
35562 op0 = expand_normal (arg0);
35563
35564 if (!address_operand (op0, VOIDmode))
35565 {
35566 op0 = convert_memory_address (Pmode, op0);
35567 op0 = copy_addr_to_reg (op0);
35568 }
35569 op0 = gen_rtx_MEM (mode0, op0);
35570
35571 pat = GEN_FCN (icode) (op0);
35572 if (pat)
35573 emit_insn (pat);
35574 return 0;
35575
35576 case IX86_BUILTIN_XSAVE:
35577 case IX86_BUILTIN_XRSTOR:
35578 case IX86_BUILTIN_XSAVE64:
35579 case IX86_BUILTIN_XRSTOR64:
35580 case IX86_BUILTIN_XSAVEOPT:
35581 case IX86_BUILTIN_XSAVEOPT64:
35582 case IX86_BUILTIN_XSAVES:
35583 case IX86_BUILTIN_XRSTORS:
35584 case IX86_BUILTIN_XSAVES64:
35585 case IX86_BUILTIN_XRSTORS64:
35586 case IX86_BUILTIN_XSAVEC:
35587 case IX86_BUILTIN_XSAVEC64:
35588 arg0 = CALL_EXPR_ARG (exp, 0);
35589 arg1 = CALL_EXPR_ARG (exp, 1);
35590 op0 = expand_normal (arg0);
35591 op1 = expand_normal (arg1);
35592
35593 if (!address_operand (op0, VOIDmode))
35594 {
35595 op0 = convert_memory_address (Pmode, op0);
35596 op0 = copy_addr_to_reg (op0);
35597 }
35598 op0 = gen_rtx_MEM (BLKmode, op0);
35599
35600 op1 = force_reg (DImode, op1);
35601
35602 if (TARGET_64BIT)
35603 {
35604 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35605 NULL, 1, OPTAB_DIRECT);
35606 switch (fcode)
35607 {
35608 case IX86_BUILTIN_XSAVE:
35609 icode = CODE_FOR_xsave_rex64;
35610 break;
35611 case IX86_BUILTIN_XRSTOR:
35612 icode = CODE_FOR_xrstor_rex64;
35613 break;
35614 case IX86_BUILTIN_XSAVE64:
35615 icode = CODE_FOR_xsave64;
35616 break;
35617 case IX86_BUILTIN_XRSTOR64:
35618 icode = CODE_FOR_xrstor64;
35619 break;
35620 case IX86_BUILTIN_XSAVEOPT:
35621 icode = CODE_FOR_xsaveopt_rex64;
35622 break;
35623 case IX86_BUILTIN_XSAVEOPT64:
35624 icode = CODE_FOR_xsaveopt64;
35625 break;
35626 case IX86_BUILTIN_XSAVES:
35627 icode = CODE_FOR_xsaves_rex64;
35628 break;
35629 case IX86_BUILTIN_XRSTORS:
35630 icode = CODE_FOR_xrstors_rex64;
35631 break;
35632 case IX86_BUILTIN_XSAVES64:
35633 icode = CODE_FOR_xsaves64;
35634 break;
35635 case IX86_BUILTIN_XRSTORS64:
35636 icode = CODE_FOR_xrstors64;
35637 break;
35638 case IX86_BUILTIN_XSAVEC:
35639 icode = CODE_FOR_xsavec_rex64;
35640 break;
35641 case IX86_BUILTIN_XSAVEC64:
35642 icode = CODE_FOR_xsavec64;
35643 break;
35644 default:
35645 gcc_unreachable ();
35646 }
35647
35648 op2 = gen_lowpart (SImode, op2);
35649 op1 = gen_lowpart (SImode, op1);
35650 pat = GEN_FCN (icode) (op0, op1, op2);
35651 }
35652 else
35653 {
35654 switch (fcode)
35655 {
35656 case IX86_BUILTIN_XSAVE:
35657 icode = CODE_FOR_xsave;
35658 break;
35659 case IX86_BUILTIN_XRSTOR:
35660 icode = CODE_FOR_xrstor;
35661 break;
35662 case IX86_BUILTIN_XSAVEOPT:
35663 icode = CODE_FOR_xsaveopt;
35664 break;
35665 case IX86_BUILTIN_XSAVES:
35666 icode = CODE_FOR_xsaves;
35667 break;
35668 case IX86_BUILTIN_XRSTORS:
35669 icode = CODE_FOR_xrstors;
35670 break;
35671 case IX86_BUILTIN_XSAVEC:
35672 icode = CODE_FOR_xsavec;
35673 break;
35674 default:
35675 gcc_unreachable ();
35676 }
35677 pat = GEN_FCN (icode) (op0, op1);
35678 }
35679
35680 if (pat)
35681 emit_insn (pat);
35682 return 0;
35683
35684 case IX86_BUILTIN_LLWPCB:
35685 arg0 = CALL_EXPR_ARG (exp, 0);
35686 op0 = expand_normal (arg0);
35687 icode = CODE_FOR_lwp_llwpcb;
35688 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35689 op0 = ix86_zero_extend_to_Pmode (op0);
35690 emit_insn (gen_lwp_llwpcb (op0));
35691 return 0;
35692
35693 case IX86_BUILTIN_SLWPCB:
35694 icode = CODE_FOR_lwp_slwpcb;
35695 if (!target
35696 || !insn_data[icode].operand[0].predicate (target, Pmode))
35697 target = gen_reg_rtx (Pmode);
35698 emit_insn (gen_lwp_slwpcb (target));
35699 return target;
35700
35701 case IX86_BUILTIN_BEXTRI32:
35702 case IX86_BUILTIN_BEXTRI64:
35703 arg0 = CALL_EXPR_ARG (exp, 0);
35704 arg1 = CALL_EXPR_ARG (exp, 1);
35705 op0 = expand_normal (arg0);
35706 op1 = expand_normal (arg1);
35707 icode = (fcode == IX86_BUILTIN_BEXTRI32
35708 ? CODE_FOR_tbm_bextri_si
35709 : CODE_FOR_tbm_bextri_di);
35710 if (!CONST_INT_P (op1))
35711 {
35712 error ("last argument must be an immediate");
35713 return const0_rtx;
35714 }
35715 else
35716 {
35717 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35718 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35719 op1 = GEN_INT (length);
35720 op2 = GEN_INT (lsb_index);
35721 pat = GEN_FCN (icode) (target, op0, op1, op2);
35722 if (pat)
35723 emit_insn (pat);
35724 return target;
35725 }
35726
35727 case IX86_BUILTIN_RDRAND16_STEP:
35728 icode = CODE_FOR_rdrandhi_1;
35729 mode0 = HImode;
35730 goto rdrand_step;
35731
35732 case IX86_BUILTIN_RDRAND32_STEP:
35733 icode = CODE_FOR_rdrandsi_1;
35734 mode0 = SImode;
35735 goto rdrand_step;
35736
35737 case IX86_BUILTIN_RDRAND64_STEP:
35738 icode = CODE_FOR_rdranddi_1;
35739 mode0 = DImode;
35740
35741 rdrand_step:
35742 op0 = gen_reg_rtx (mode0);
35743 emit_insn (GEN_FCN (icode) (op0));
35744
35745 arg0 = CALL_EXPR_ARG (exp, 0);
35746 op1 = expand_normal (arg0);
35747 if (!address_operand (op1, VOIDmode))
35748 {
35749 op1 = convert_memory_address (Pmode, op1);
35750 op1 = copy_addr_to_reg (op1);
35751 }
35752 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35753
35754 op1 = gen_reg_rtx (SImode);
35755 emit_move_insn (op1, CONST1_RTX (SImode));
35756
35757 /* Emit SImode conditional move. */
35758 if (mode0 == HImode)
35759 {
35760 op2 = gen_reg_rtx (SImode);
35761 emit_insn (gen_zero_extendhisi2 (op2, op0));
35762 }
35763 else if (mode0 == SImode)
35764 op2 = op0;
35765 else
35766 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35767
35768 if (target == 0
35769 || !register_operand (target, SImode))
35770 target = gen_reg_rtx (SImode);
35771
35772 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35773 const0_rtx);
35774 emit_insn (gen_rtx_SET (VOIDmode, target,
35775 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35776 return target;
35777
35778 case IX86_BUILTIN_RDSEED16_STEP:
35779 icode = CODE_FOR_rdseedhi_1;
35780 mode0 = HImode;
35781 goto rdseed_step;
35782
35783 case IX86_BUILTIN_RDSEED32_STEP:
35784 icode = CODE_FOR_rdseedsi_1;
35785 mode0 = SImode;
35786 goto rdseed_step;
35787
35788 case IX86_BUILTIN_RDSEED64_STEP:
35789 icode = CODE_FOR_rdseeddi_1;
35790 mode0 = DImode;
35791
35792 rdseed_step:
35793 op0 = gen_reg_rtx (mode0);
35794 emit_insn (GEN_FCN (icode) (op0));
35795
35796 arg0 = CALL_EXPR_ARG (exp, 0);
35797 op1 = expand_normal (arg0);
35798 if (!address_operand (op1, VOIDmode))
35799 {
35800 op1 = convert_memory_address (Pmode, op1);
35801 op1 = copy_addr_to_reg (op1);
35802 }
35803 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35804
35805 op2 = gen_reg_rtx (QImode);
35806
35807 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35808 const0_rtx);
35809 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35810
35811 if (target == 0
35812 || !register_operand (target, SImode))
35813 target = gen_reg_rtx (SImode);
35814
35815 emit_insn (gen_zero_extendqisi2 (target, op2));
35816 return target;
35817
35818 case IX86_BUILTIN_SBB32:
35819 icode = CODE_FOR_subsi3_carry;
35820 mode0 = SImode;
35821 goto addcarryx;
35822
35823 case IX86_BUILTIN_SBB64:
35824 icode = CODE_FOR_subdi3_carry;
35825 mode0 = DImode;
35826 goto addcarryx;
35827
35828 case IX86_BUILTIN_ADDCARRYX32:
35829 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35830 mode0 = SImode;
35831 goto addcarryx;
35832
35833 case IX86_BUILTIN_ADDCARRYX64:
35834 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35835 mode0 = DImode;
35836
35837 addcarryx:
35838 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35839 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35840 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35841 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35842
35843 op0 = gen_reg_rtx (QImode);
35844
35845 /* Generate CF from input operand. */
35846 op1 = expand_normal (arg0);
35847 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35848 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35849
35850 /* Gen ADCX instruction to compute X+Y+CF. */
35851 op2 = expand_normal (arg1);
35852 op3 = expand_normal (arg2);
35853
35854 if (!REG_P (op2))
35855 op2 = copy_to_mode_reg (mode0, op2);
35856 if (!REG_P (op3))
35857 op3 = copy_to_mode_reg (mode0, op3);
35858
35859 op0 = gen_reg_rtx (mode0);
35860
35861 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35862 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35863 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35864
35865 /* Store the result. */
35866 op4 = expand_normal (arg3);
35867 if (!address_operand (op4, VOIDmode))
35868 {
35869 op4 = convert_memory_address (Pmode, op4);
35870 op4 = copy_addr_to_reg (op4);
35871 }
35872 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35873
35874 /* Return current CF value. */
35875 if (target == 0)
35876 target = gen_reg_rtx (QImode);
35877
35878 PUT_MODE (pat, QImode);
35879 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35880 return target;
35881
35882 case IX86_BUILTIN_READ_FLAGS:
35883 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35884
35885 if (optimize
35886 || target == NULL_RTX
35887 || !nonimmediate_operand (target, word_mode)
35888 || GET_MODE (target) != word_mode)
35889 target = gen_reg_rtx (word_mode);
35890
35891 emit_insn (gen_pop (target));
35892 return target;
35893
35894 case IX86_BUILTIN_WRITE_FLAGS:
35895
35896 arg0 = CALL_EXPR_ARG (exp, 0);
35897 op0 = expand_normal (arg0);
35898 if (!general_no_elim_operand (op0, word_mode))
35899 op0 = copy_to_mode_reg (word_mode, op0);
35900
35901 emit_insn (gen_push (op0));
35902 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35903 return 0;
35904
35905 case IX86_BUILTIN_KORTESTC16:
35906 icode = CODE_FOR_kortestchi;
35907 mode0 = HImode;
35908 mode1 = CCCmode;
35909 goto kortest;
35910
35911 case IX86_BUILTIN_KORTESTZ16:
35912 icode = CODE_FOR_kortestzhi;
35913 mode0 = HImode;
35914 mode1 = CCZmode;
35915
35916 kortest:
35917 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35918 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35919 op0 = expand_normal (arg0);
35920 op1 = expand_normal (arg1);
35921
35922 op0 = copy_to_reg (op0);
35923 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35924 op1 = copy_to_reg (op1);
35925 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35926
35927 target = gen_reg_rtx (QImode);
35928 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35929
35930 /* Emit kortest. */
35931 emit_insn (GEN_FCN (icode) (op0, op1));
35932 /* And use setcc to return result from flags. */
35933 ix86_expand_setcc (target, EQ,
35934 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35935 return target;
35936
35937 case IX86_BUILTIN_GATHERSIV2DF:
35938 icode = CODE_FOR_avx2_gathersiv2df;
35939 goto gather_gen;
35940 case IX86_BUILTIN_GATHERSIV4DF:
35941 icode = CODE_FOR_avx2_gathersiv4df;
35942 goto gather_gen;
35943 case IX86_BUILTIN_GATHERDIV2DF:
35944 icode = CODE_FOR_avx2_gatherdiv2df;
35945 goto gather_gen;
35946 case IX86_BUILTIN_GATHERDIV4DF:
35947 icode = CODE_FOR_avx2_gatherdiv4df;
35948 goto gather_gen;
35949 case IX86_BUILTIN_GATHERSIV4SF:
35950 icode = CODE_FOR_avx2_gathersiv4sf;
35951 goto gather_gen;
35952 case IX86_BUILTIN_GATHERSIV8SF:
35953 icode = CODE_FOR_avx2_gathersiv8sf;
35954 goto gather_gen;
35955 case IX86_BUILTIN_GATHERDIV4SF:
35956 icode = CODE_FOR_avx2_gatherdiv4sf;
35957 goto gather_gen;
35958 case IX86_BUILTIN_GATHERDIV8SF:
35959 icode = CODE_FOR_avx2_gatherdiv8sf;
35960 goto gather_gen;
35961 case IX86_BUILTIN_GATHERSIV2DI:
35962 icode = CODE_FOR_avx2_gathersiv2di;
35963 goto gather_gen;
35964 case IX86_BUILTIN_GATHERSIV4DI:
35965 icode = CODE_FOR_avx2_gathersiv4di;
35966 goto gather_gen;
35967 case IX86_BUILTIN_GATHERDIV2DI:
35968 icode = CODE_FOR_avx2_gatherdiv2di;
35969 goto gather_gen;
35970 case IX86_BUILTIN_GATHERDIV4DI:
35971 icode = CODE_FOR_avx2_gatherdiv4di;
35972 goto gather_gen;
35973 case IX86_BUILTIN_GATHERSIV4SI:
35974 icode = CODE_FOR_avx2_gathersiv4si;
35975 goto gather_gen;
35976 case IX86_BUILTIN_GATHERSIV8SI:
35977 icode = CODE_FOR_avx2_gathersiv8si;
35978 goto gather_gen;
35979 case IX86_BUILTIN_GATHERDIV4SI:
35980 icode = CODE_FOR_avx2_gatherdiv4si;
35981 goto gather_gen;
35982 case IX86_BUILTIN_GATHERDIV8SI:
35983 icode = CODE_FOR_avx2_gatherdiv8si;
35984 goto gather_gen;
35985 case IX86_BUILTIN_GATHERALTSIV4DF:
35986 icode = CODE_FOR_avx2_gathersiv4df;
35987 goto gather_gen;
35988 case IX86_BUILTIN_GATHERALTDIV8SF:
35989 icode = CODE_FOR_avx2_gatherdiv8sf;
35990 goto gather_gen;
35991 case IX86_BUILTIN_GATHERALTSIV4DI:
35992 icode = CODE_FOR_avx2_gathersiv4di;
35993 goto gather_gen;
35994 case IX86_BUILTIN_GATHERALTDIV8SI:
35995 icode = CODE_FOR_avx2_gatherdiv8si;
35996 goto gather_gen;
35997 case IX86_BUILTIN_GATHER3SIV16SF:
35998 icode = CODE_FOR_avx512f_gathersiv16sf;
35999 goto gather_gen;
36000 case IX86_BUILTIN_GATHER3SIV8DF:
36001 icode = CODE_FOR_avx512f_gathersiv8df;
36002 goto gather_gen;
36003 case IX86_BUILTIN_GATHER3DIV16SF:
36004 icode = CODE_FOR_avx512f_gatherdiv16sf;
36005 goto gather_gen;
36006 case IX86_BUILTIN_GATHER3DIV8DF:
36007 icode = CODE_FOR_avx512f_gatherdiv8df;
36008 goto gather_gen;
36009 case IX86_BUILTIN_GATHER3SIV16SI:
36010 icode = CODE_FOR_avx512f_gathersiv16si;
36011 goto gather_gen;
36012 case IX86_BUILTIN_GATHER3SIV8DI:
36013 icode = CODE_FOR_avx512f_gathersiv8di;
36014 goto gather_gen;
36015 case IX86_BUILTIN_GATHER3DIV16SI:
36016 icode = CODE_FOR_avx512f_gatherdiv16si;
36017 goto gather_gen;
36018 case IX86_BUILTIN_GATHER3DIV8DI:
36019 icode = CODE_FOR_avx512f_gatherdiv8di;
36020 goto gather_gen;
36021 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36022 icode = CODE_FOR_avx512f_gathersiv8df;
36023 goto gather_gen;
36024 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36025 icode = CODE_FOR_avx512f_gatherdiv16sf;
36026 goto gather_gen;
36027 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36028 icode = CODE_FOR_avx512f_gathersiv8di;
36029 goto gather_gen;
36030 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36031 icode = CODE_FOR_avx512f_gatherdiv16si;
36032 goto gather_gen;
36033 case IX86_BUILTIN_SCATTERSIV16SF:
36034 icode = CODE_FOR_avx512f_scattersiv16sf;
36035 goto scatter_gen;
36036 case IX86_BUILTIN_SCATTERSIV8DF:
36037 icode = CODE_FOR_avx512f_scattersiv8df;
36038 goto scatter_gen;
36039 case IX86_BUILTIN_SCATTERDIV16SF:
36040 icode = CODE_FOR_avx512f_scatterdiv16sf;
36041 goto scatter_gen;
36042 case IX86_BUILTIN_SCATTERDIV8DF:
36043 icode = CODE_FOR_avx512f_scatterdiv8df;
36044 goto scatter_gen;
36045 case IX86_BUILTIN_SCATTERSIV16SI:
36046 icode = CODE_FOR_avx512f_scattersiv16si;
36047 goto scatter_gen;
36048 case IX86_BUILTIN_SCATTERSIV8DI:
36049 icode = CODE_FOR_avx512f_scattersiv8di;
36050 goto scatter_gen;
36051 case IX86_BUILTIN_SCATTERDIV16SI:
36052 icode = CODE_FOR_avx512f_scatterdiv16si;
36053 goto scatter_gen;
36054 case IX86_BUILTIN_SCATTERDIV8DI:
36055 icode = CODE_FOR_avx512f_scatterdiv8di;
36056 goto scatter_gen;
36057
36058 case IX86_BUILTIN_GATHERPFDPD:
36059 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
36060 goto vec_prefetch_gen;
36061 case IX86_BUILTIN_GATHERPFDPS:
36062 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
36063 goto vec_prefetch_gen;
36064 case IX86_BUILTIN_GATHERPFQPD:
36065 icode = CODE_FOR_avx512pf_gatherpfv8didf;
36066 goto vec_prefetch_gen;
36067 case IX86_BUILTIN_GATHERPFQPS:
36068 icode = CODE_FOR_avx512pf_gatherpfv8disf;
36069 goto vec_prefetch_gen;
36070 case IX86_BUILTIN_SCATTERPFDPD:
36071 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
36072 goto vec_prefetch_gen;
36073 case IX86_BUILTIN_SCATTERPFDPS:
36074 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
36075 goto vec_prefetch_gen;
36076 case IX86_BUILTIN_SCATTERPFQPD:
36077 icode = CODE_FOR_avx512pf_scatterpfv8didf;
36078 goto vec_prefetch_gen;
36079 case IX86_BUILTIN_SCATTERPFQPS:
36080 icode = CODE_FOR_avx512pf_scatterpfv8disf;
36081 goto vec_prefetch_gen;
36082
36083 gather_gen:
36084 rtx half;
36085 rtx (*gen) (rtx, rtx);
36086
36087 arg0 = CALL_EXPR_ARG (exp, 0);
36088 arg1 = CALL_EXPR_ARG (exp, 1);
36089 arg2 = CALL_EXPR_ARG (exp, 2);
36090 arg3 = CALL_EXPR_ARG (exp, 3);
36091 arg4 = CALL_EXPR_ARG (exp, 4);
36092 op0 = expand_normal (arg0);
36093 op1 = expand_normal (arg1);
36094 op2 = expand_normal (arg2);
36095 op3 = expand_normal (arg3);
36096 op4 = expand_normal (arg4);
36097 /* Note the arg order is different from the operand order. */
36098 mode0 = insn_data[icode].operand[1].mode;
36099 mode2 = insn_data[icode].operand[3].mode;
36100 mode3 = insn_data[icode].operand[4].mode;
36101 mode4 = insn_data[icode].operand[5].mode;
36102
36103 if (target == NULL_RTX
36104 || GET_MODE (target) != insn_data[icode].operand[0].mode
36105 || !insn_data[icode].operand[0].predicate (target,
36106 GET_MODE (target)))
36107 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
36108 else
36109 subtarget = target;
36110
36111 switch (fcode)
36112 {
36113 case IX86_BUILTIN_GATHER3ALTSIV8DF:
36114 case IX86_BUILTIN_GATHER3ALTSIV8DI:
36115 half = gen_reg_rtx (V8SImode);
36116 if (!nonimmediate_operand (op2, V16SImode))
36117 op2 = copy_to_mode_reg (V16SImode, op2);
36118 emit_insn (gen_vec_extract_lo_v16si (half, op2));
36119 op2 = half;
36120 break;
36121 case IX86_BUILTIN_GATHERALTSIV4DF:
36122 case IX86_BUILTIN_GATHERALTSIV4DI:
36123 half = gen_reg_rtx (V4SImode);
36124 if (!nonimmediate_operand (op2, V8SImode))
36125 op2 = copy_to_mode_reg (V8SImode, op2);
36126 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36127 op2 = half;
36128 break;
36129 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36130 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36131 half = gen_reg_rtx (mode0);
36132 if (mode0 == V8SFmode)
36133 gen = gen_vec_extract_lo_v16sf;
36134 else
36135 gen = gen_vec_extract_lo_v16si;
36136 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36137 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36138 emit_insn (gen (half, op0));
36139 op0 = half;
36140 if (GET_MODE (op3) != VOIDmode)
36141 {
36142 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36143 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36144 emit_insn (gen (half, op3));
36145 op3 = half;
36146 }
36147 break;
36148 case IX86_BUILTIN_GATHERALTDIV8SF:
36149 case IX86_BUILTIN_GATHERALTDIV8SI:
36150 half = gen_reg_rtx (mode0);
36151 if (mode0 == V4SFmode)
36152 gen = gen_vec_extract_lo_v8sf;
36153 else
36154 gen = gen_vec_extract_lo_v8si;
36155 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36156 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36157 emit_insn (gen (half, op0));
36158 op0 = half;
36159 if (GET_MODE (op3) != VOIDmode)
36160 {
36161 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36162 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36163 emit_insn (gen (half, op3));
36164 op3 = half;
36165 }
36166 break;
36167 default:
36168 break;
36169 }
36170
36171 /* Force memory operand only with base register here. But we
36172 don't want to do it on memory operand for other builtin
36173 functions. */
36174 op1 = ix86_zero_extend_to_Pmode (op1);
36175
36176 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36177 op0 = copy_to_mode_reg (mode0, op0);
36178 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36179 op1 = copy_to_mode_reg (Pmode, op1);
36180 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36181 op2 = copy_to_mode_reg (mode2, op2);
36182 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36183 {
36184 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36185 op3 = copy_to_mode_reg (mode3, op3);
36186 }
36187 else
36188 {
36189 op3 = copy_to_reg (op3);
36190 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
36191 }
36192 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36193 {
36194 error ("the last argument must be scale 1, 2, 4, 8");
36195 return const0_rtx;
36196 }
36197
36198 /* Optimize. If mask is known to have all high bits set,
36199 replace op0 with pc_rtx to signal that the instruction
36200 overwrites the whole destination and doesn't use its
36201 previous contents. */
36202 if (optimize)
36203 {
36204 if (TREE_CODE (arg3) == INTEGER_CST)
36205 {
36206 if (integer_all_onesp (arg3))
36207 op0 = pc_rtx;
36208 }
36209 else if (TREE_CODE (arg3) == VECTOR_CST)
36210 {
36211 unsigned int negative = 0;
36212 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36213 {
36214 tree cst = VECTOR_CST_ELT (arg3, i);
36215 if (TREE_CODE (cst) == INTEGER_CST
36216 && tree_int_cst_sign_bit (cst))
36217 negative++;
36218 else if (TREE_CODE (cst) == REAL_CST
36219 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36220 negative++;
36221 }
36222 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36223 op0 = pc_rtx;
36224 }
36225 else if (TREE_CODE (arg3) == SSA_NAME
36226 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36227 {
36228 /* Recognize also when mask is like:
36229 __v2df src = _mm_setzero_pd ();
36230 __v2df mask = _mm_cmpeq_pd (src, src);
36231 or
36232 __v8sf src = _mm256_setzero_ps ();
36233 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36234 as that is a cheaper way to load all ones into
36235 a register than having to load a constant from
36236 memory. */
36237 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36238 if (is_gimple_call (def_stmt))
36239 {
36240 tree fndecl = gimple_call_fndecl (def_stmt);
36241 if (fndecl
36242 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36243 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36244 {
36245 case IX86_BUILTIN_CMPPD:
36246 case IX86_BUILTIN_CMPPS:
36247 case IX86_BUILTIN_CMPPD256:
36248 case IX86_BUILTIN_CMPPS256:
36249 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36250 break;
36251 /* FALLTHRU */
36252 case IX86_BUILTIN_CMPEQPD:
36253 case IX86_BUILTIN_CMPEQPS:
36254 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36255 && initializer_zerop (gimple_call_arg (def_stmt,
36256 1)))
36257 op0 = pc_rtx;
36258 break;
36259 default:
36260 break;
36261 }
36262 }
36263 }
36264 }
36265
36266 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36267 if (! pat)
36268 return const0_rtx;
36269 emit_insn (pat);
36270
36271 switch (fcode)
36272 {
36273 case IX86_BUILTIN_GATHER3DIV16SF:
36274 if (target == NULL_RTX)
36275 target = gen_reg_rtx (V8SFmode);
36276 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36277 break;
36278 case IX86_BUILTIN_GATHER3DIV16SI:
36279 if (target == NULL_RTX)
36280 target = gen_reg_rtx (V8SImode);
36281 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36282 break;
36283 case IX86_BUILTIN_GATHERDIV8SF:
36284 if (target == NULL_RTX)
36285 target = gen_reg_rtx (V4SFmode);
36286 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36287 break;
36288 case IX86_BUILTIN_GATHERDIV8SI:
36289 if (target == NULL_RTX)
36290 target = gen_reg_rtx (V4SImode);
36291 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36292 break;
36293 default:
36294 target = subtarget;
36295 break;
36296 }
36297 return target;
36298
36299 scatter_gen:
36300 arg0 = CALL_EXPR_ARG (exp, 0);
36301 arg1 = CALL_EXPR_ARG (exp, 1);
36302 arg2 = CALL_EXPR_ARG (exp, 2);
36303 arg3 = CALL_EXPR_ARG (exp, 3);
36304 arg4 = CALL_EXPR_ARG (exp, 4);
36305 op0 = expand_normal (arg0);
36306 op1 = expand_normal (arg1);
36307 op2 = expand_normal (arg2);
36308 op3 = expand_normal (arg3);
36309 op4 = expand_normal (arg4);
36310 mode1 = insn_data[icode].operand[1].mode;
36311 mode2 = insn_data[icode].operand[2].mode;
36312 mode3 = insn_data[icode].operand[3].mode;
36313 mode4 = insn_data[icode].operand[4].mode;
36314
36315 /* Force memory operand only with base register here. But we
36316 don't want to do it on memory operand for other builtin
36317 functions. */
36318 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36319
36320 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36321 op0 = copy_to_mode_reg (Pmode, op0);
36322
36323 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36324 {
36325 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36326 op1 = copy_to_mode_reg (mode1, op1);
36327 }
36328 else
36329 {
36330 op1 = copy_to_reg (op1);
36331 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36332 }
36333
36334 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36335 op2 = copy_to_mode_reg (mode2, op2);
36336
36337 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36338 op3 = copy_to_mode_reg (mode3, op3);
36339
36340 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36341 {
36342 error ("the last argument must be scale 1, 2, 4, 8");
36343 return const0_rtx;
36344 }
36345
36346 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36347 if (! pat)
36348 return const0_rtx;
36349
36350 emit_insn (pat);
36351 return 0;
36352
36353 vec_prefetch_gen:
36354 arg0 = CALL_EXPR_ARG (exp, 0);
36355 arg1 = CALL_EXPR_ARG (exp, 1);
36356 arg2 = CALL_EXPR_ARG (exp, 2);
36357 arg3 = CALL_EXPR_ARG (exp, 3);
36358 arg4 = CALL_EXPR_ARG (exp, 4);
36359 op0 = expand_normal (arg0);
36360 op1 = expand_normal (arg1);
36361 op2 = expand_normal (arg2);
36362 op3 = expand_normal (arg3);
36363 op4 = expand_normal (arg4);
36364 mode0 = insn_data[icode].operand[0].mode;
36365 mode1 = insn_data[icode].operand[1].mode;
36366 mode3 = insn_data[icode].operand[3].mode;
36367 mode4 = insn_data[icode].operand[4].mode;
36368
36369 if (GET_MODE (op0) == mode0
36370 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36371 {
36372 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36373 op0 = copy_to_mode_reg (mode0, op0);
36374 }
36375 else if (op0 != constm1_rtx)
36376 {
36377 op0 = copy_to_reg (op0);
36378 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36379 }
36380
36381 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36382 op1 = copy_to_mode_reg (mode1, op1);
36383
36384 /* Force memory operand only with base register here. But we
36385 don't want to do it on memory operand for other builtin
36386 functions. */
36387 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36388
36389 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36390 op2 = copy_to_mode_reg (Pmode, op2);
36391
36392 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36393 {
36394 error ("the forth argument must be scale 1, 2, 4, 8");
36395 return const0_rtx;
36396 }
36397
36398 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36399 {
36400 error ("incorrect hint operand");
36401 return const0_rtx;
36402 }
36403
36404 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36405 if (! pat)
36406 return const0_rtx;
36407
36408 emit_insn (pat);
36409
36410 return 0;
36411
36412 case IX86_BUILTIN_XABORT:
36413 icode = CODE_FOR_xabort;
36414 arg0 = CALL_EXPR_ARG (exp, 0);
36415 op0 = expand_normal (arg0);
36416 mode0 = insn_data[icode].operand[0].mode;
36417 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36418 {
36419 error ("the xabort's argument must be an 8-bit immediate");
36420 return const0_rtx;
36421 }
36422 emit_insn (gen_xabort (op0));
36423 return 0;
36424
36425 default:
36426 break;
36427 }
36428
36429 for (i = 0, d = bdesc_special_args;
36430 i < ARRAY_SIZE (bdesc_special_args);
36431 i++, d++)
36432 if (d->code == fcode)
36433 return ix86_expand_special_args_builtin (d, exp, target);
36434
36435 for (i = 0, d = bdesc_args;
36436 i < ARRAY_SIZE (bdesc_args);
36437 i++, d++)
36438 if (d->code == fcode)
36439 switch (fcode)
36440 {
36441 case IX86_BUILTIN_FABSQ:
36442 case IX86_BUILTIN_COPYSIGNQ:
36443 if (!TARGET_SSE)
36444 /* Emit a normal call if SSE isn't available. */
36445 return expand_call (exp, target, ignore);
36446 default:
36447 return ix86_expand_args_builtin (d, exp, target);
36448 }
36449
36450 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36451 if (d->code == fcode)
36452 return ix86_expand_sse_comi (d, exp, target);
36453
36454 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36455 if (d->code == fcode)
36456 return ix86_expand_round_builtin (d, exp, target);
36457
36458 for (i = 0, d = bdesc_pcmpestr;
36459 i < ARRAY_SIZE (bdesc_pcmpestr);
36460 i++, d++)
36461 if (d->code == fcode)
36462 return ix86_expand_sse_pcmpestr (d, exp, target);
36463
36464 for (i = 0, d = bdesc_pcmpistr;
36465 i < ARRAY_SIZE (bdesc_pcmpistr);
36466 i++, d++)
36467 if (d->code == fcode)
36468 return ix86_expand_sse_pcmpistr (d, exp, target);
36469
36470 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36471 if (d->code == fcode)
36472 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36473 (enum ix86_builtin_func_type)
36474 d->flag, d->comparison);
36475
36476 gcc_unreachable ();
36477 }
36478
36479 /* This returns the target-specific builtin with code CODE if
36480 current_function_decl has visibility on this builtin, which is checked
36481 using isa flags. Returns NULL_TREE otherwise. */
36482
36483 static tree ix86_get_builtin (enum ix86_builtins code)
36484 {
36485 struct cl_target_option *opts;
36486 tree target_tree = NULL_TREE;
36487
36488 /* Determine the isa flags of current_function_decl. */
36489
36490 if (current_function_decl)
36491 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36492
36493 if (target_tree == NULL)
36494 target_tree = target_option_default_node;
36495
36496 opts = TREE_TARGET_OPTION (target_tree);
36497
36498 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36499 return ix86_builtin_decl (code, true);
36500 else
36501 return NULL_TREE;
36502 }
36503
36504 /* Returns a function decl for a vectorized version of the builtin function
36505 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36506 if it is not available. */
36507
36508 static tree
36509 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36510 tree type_in)
36511 {
36512 enum machine_mode in_mode, out_mode;
36513 int in_n, out_n;
36514 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36515
36516 if (TREE_CODE (type_out) != VECTOR_TYPE
36517 || TREE_CODE (type_in) != VECTOR_TYPE
36518 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36519 return NULL_TREE;
36520
36521 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36522 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36523 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36524 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36525
36526 switch (fn)
36527 {
36528 case BUILT_IN_SQRT:
36529 if (out_mode == DFmode && in_mode == DFmode)
36530 {
36531 if (out_n == 2 && in_n == 2)
36532 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36533 else if (out_n == 4 && in_n == 4)
36534 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36535 else if (out_n == 8 && in_n == 8)
36536 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36537 }
36538 break;
36539
36540 case BUILT_IN_EXP2F:
36541 if (out_mode == SFmode && in_mode == SFmode)
36542 {
36543 if (out_n == 16 && in_n == 16)
36544 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36545 }
36546 break;
36547
36548 case BUILT_IN_SQRTF:
36549 if (out_mode == SFmode && in_mode == SFmode)
36550 {
36551 if (out_n == 4 && in_n == 4)
36552 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36553 else if (out_n == 8 && in_n == 8)
36554 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36555 else if (out_n == 16 && in_n == 16)
36556 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36557 }
36558 break;
36559
36560 case BUILT_IN_IFLOOR:
36561 case BUILT_IN_LFLOOR:
36562 case BUILT_IN_LLFLOOR:
36563 /* The round insn does not trap on denormals. */
36564 if (flag_trapping_math || !TARGET_ROUND)
36565 break;
36566
36567 if (out_mode == SImode && in_mode == DFmode)
36568 {
36569 if (out_n == 4 && in_n == 2)
36570 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36571 else if (out_n == 8 && in_n == 4)
36572 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36573 else if (out_n == 16 && in_n == 8)
36574 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36575 }
36576 break;
36577
36578 case BUILT_IN_IFLOORF:
36579 case BUILT_IN_LFLOORF:
36580 case BUILT_IN_LLFLOORF:
36581 /* The round insn does not trap on denormals. */
36582 if (flag_trapping_math || !TARGET_ROUND)
36583 break;
36584
36585 if (out_mode == SImode && in_mode == SFmode)
36586 {
36587 if (out_n == 4 && in_n == 4)
36588 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36589 else if (out_n == 8 && in_n == 8)
36590 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36591 }
36592 break;
36593
36594 case BUILT_IN_ICEIL:
36595 case BUILT_IN_LCEIL:
36596 case BUILT_IN_LLCEIL:
36597 /* The round insn does not trap on denormals. */
36598 if (flag_trapping_math || !TARGET_ROUND)
36599 break;
36600
36601 if (out_mode == SImode && in_mode == DFmode)
36602 {
36603 if (out_n == 4 && in_n == 2)
36604 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36605 else if (out_n == 8 && in_n == 4)
36606 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36607 else if (out_n == 16 && in_n == 8)
36608 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36609 }
36610 break;
36611
36612 case BUILT_IN_ICEILF:
36613 case BUILT_IN_LCEILF:
36614 case BUILT_IN_LLCEILF:
36615 /* The round insn does not trap on denormals. */
36616 if (flag_trapping_math || !TARGET_ROUND)
36617 break;
36618
36619 if (out_mode == SImode && in_mode == SFmode)
36620 {
36621 if (out_n == 4 && in_n == 4)
36622 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36623 else if (out_n == 8 && in_n == 8)
36624 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36625 }
36626 break;
36627
36628 case BUILT_IN_IRINT:
36629 case BUILT_IN_LRINT:
36630 case BUILT_IN_LLRINT:
36631 if (out_mode == SImode && in_mode == DFmode)
36632 {
36633 if (out_n == 4 && in_n == 2)
36634 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36635 else if (out_n == 8 && in_n == 4)
36636 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36637 }
36638 break;
36639
36640 case BUILT_IN_IRINTF:
36641 case BUILT_IN_LRINTF:
36642 case BUILT_IN_LLRINTF:
36643 if (out_mode == SImode && in_mode == SFmode)
36644 {
36645 if (out_n == 4 && in_n == 4)
36646 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36647 else if (out_n == 8 && in_n == 8)
36648 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36649 }
36650 break;
36651
36652 case BUILT_IN_IROUND:
36653 case BUILT_IN_LROUND:
36654 case BUILT_IN_LLROUND:
36655 /* The round insn does not trap on denormals. */
36656 if (flag_trapping_math || !TARGET_ROUND)
36657 break;
36658
36659 if (out_mode == SImode && in_mode == DFmode)
36660 {
36661 if (out_n == 4 && in_n == 2)
36662 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36663 else if (out_n == 8 && in_n == 4)
36664 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36665 else if (out_n == 16 && in_n == 8)
36666 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36667 }
36668 break;
36669
36670 case BUILT_IN_IROUNDF:
36671 case BUILT_IN_LROUNDF:
36672 case BUILT_IN_LLROUNDF:
36673 /* The round insn does not trap on denormals. */
36674 if (flag_trapping_math || !TARGET_ROUND)
36675 break;
36676
36677 if (out_mode == SImode && in_mode == SFmode)
36678 {
36679 if (out_n == 4 && in_n == 4)
36680 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36681 else if (out_n == 8 && in_n == 8)
36682 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36683 }
36684 break;
36685
36686 case BUILT_IN_COPYSIGN:
36687 if (out_mode == DFmode && in_mode == DFmode)
36688 {
36689 if (out_n == 2 && in_n == 2)
36690 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36691 else if (out_n == 4 && in_n == 4)
36692 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36693 else if (out_n == 8 && in_n == 8)
36694 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36695 }
36696 break;
36697
36698 case BUILT_IN_COPYSIGNF:
36699 if (out_mode == SFmode && in_mode == SFmode)
36700 {
36701 if (out_n == 4 && in_n == 4)
36702 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36703 else if (out_n == 8 && in_n == 8)
36704 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36705 else if (out_n == 16 && in_n == 16)
36706 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36707 }
36708 break;
36709
36710 case BUILT_IN_FLOOR:
36711 /* The round insn does not trap on denormals. */
36712 if (flag_trapping_math || !TARGET_ROUND)
36713 break;
36714
36715 if (out_mode == DFmode && in_mode == DFmode)
36716 {
36717 if (out_n == 2 && in_n == 2)
36718 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36719 else if (out_n == 4 && in_n == 4)
36720 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36721 }
36722 break;
36723
36724 case BUILT_IN_FLOORF:
36725 /* The round insn does not trap on denormals. */
36726 if (flag_trapping_math || !TARGET_ROUND)
36727 break;
36728
36729 if (out_mode == SFmode && in_mode == SFmode)
36730 {
36731 if (out_n == 4 && in_n == 4)
36732 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36733 else if (out_n == 8 && in_n == 8)
36734 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36735 }
36736 break;
36737
36738 case BUILT_IN_CEIL:
36739 /* The round insn does not trap on denormals. */
36740 if (flag_trapping_math || !TARGET_ROUND)
36741 break;
36742
36743 if (out_mode == DFmode && in_mode == DFmode)
36744 {
36745 if (out_n == 2 && in_n == 2)
36746 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36747 else if (out_n == 4 && in_n == 4)
36748 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36749 }
36750 break;
36751
36752 case BUILT_IN_CEILF:
36753 /* The round insn does not trap on denormals. */
36754 if (flag_trapping_math || !TARGET_ROUND)
36755 break;
36756
36757 if (out_mode == SFmode && in_mode == SFmode)
36758 {
36759 if (out_n == 4 && in_n == 4)
36760 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36761 else if (out_n == 8 && in_n == 8)
36762 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36763 }
36764 break;
36765
36766 case BUILT_IN_TRUNC:
36767 /* The round insn does not trap on denormals. */
36768 if (flag_trapping_math || !TARGET_ROUND)
36769 break;
36770
36771 if (out_mode == DFmode && in_mode == DFmode)
36772 {
36773 if (out_n == 2 && in_n == 2)
36774 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36775 else if (out_n == 4 && in_n == 4)
36776 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36777 }
36778 break;
36779
36780 case BUILT_IN_TRUNCF:
36781 /* The round insn does not trap on denormals. */
36782 if (flag_trapping_math || !TARGET_ROUND)
36783 break;
36784
36785 if (out_mode == SFmode && in_mode == SFmode)
36786 {
36787 if (out_n == 4 && in_n == 4)
36788 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36789 else if (out_n == 8 && in_n == 8)
36790 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36791 }
36792 break;
36793
36794 case BUILT_IN_RINT:
36795 /* The round insn does not trap on denormals. */
36796 if (flag_trapping_math || !TARGET_ROUND)
36797 break;
36798
36799 if (out_mode == DFmode && in_mode == DFmode)
36800 {
36801 if (out_n == 2 && in_n == 2)
36802 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36803 else if (out_n == 4 && in_n == 4)
36804 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36805 }
36806 break;
36807
36808 case BUILT_IN_RINTF:
36809 /* The round insn does not trap on denormals. */
36810 if (flag_trapping_math || !TARGET_ROUND)
36811 break;
36812
36813 if (out_mode == SFmode && in_mode == SFmode)
36814 {
36815 if (out_n == 4 && in_n == 4)
36816 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36817 else if (out_n == 8 && in_n == 8)
36818 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36819 }
36820 break;
36821
36822 case BUILT_IN_ROUND:
36823 /* The round insn does not trap on denormals. */
36824 if (flag_trapping_math || !TARGET_ROUND)
36825 break;
36826
36827 if (out_mode == DFmode && in_mode == DFmode)
36828 {
36829 if (out_n == 2 && in_n == 2)
36830 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36831 else if (out_n == 4 && in_n == 4)
36832 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36833 }
36834 break;
36835
36836 case BUILT_IN_ROUNDF:
36837 /* The round insn does not trap on denormals. */
36838 if (flag_trapping_math || !TARGET_ROUND)
36839 break;
36840
36841 if (out_mode == SFmode && in_mode == SFmode)
36842 {
36843 if (out_n == 4 && in_n == 4)
36844 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36845 else if (out_n == 8 && in_n == 8)
36846 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36847 }
36848 break;
36849
36850 case BUILT_IN_FMA:
36851 if (out_mode == DFmode && in_mode == DFmode)
36852 {
36853 if (out_n == 2 && in_n == 2)
36854 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36855 if (out_n == 4 && in_n == 4)
36856 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36857 }
36858 break;
36859
36860 case BUILT_IN_FMAF:
36861 if (out_mode == SFmode && in_mode == SFmode)
36862 {
36863 if (out_n == 4 && in_n == 4)
36864 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36865 if (out_n == 8 && in_n == 8)
36866 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36867 }
36868 break;
36869
36870 default:
36871 break;
36872 }
36873
36874 /* Dispatch to a handler for a vectorization library. */
36875 if (ix86_veclib_handler)
36876 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36877 type_in);
36878
36879 return NULL_TREE;
36880 }
36881
36882 /* Handler for an SVML-style interface to
36883 a library with vectorized intrinsics. */
36884
36885 static tree
36886 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36887 {
36888 char name[20];
36889 tree fntype, new_fndecl, args;
36890 unsigned arity;
36891 const char *bname;
36892 enum machine_mode el_mode, in_mode;
36893 int n, in_n;
36894
36895 /* The SVML is suitable for unsafe math only. */
36896 if (!flag_unsafe_math_optimizations)
36897 return NULL_TREE;
36898
36899 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36900 n = TYPE_VECTOR_SUBPARTS (type_out);
36901 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36902 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36903 if (el_mode != in_mode
36904 || n != in_n)
36905 return NULL_TREE;
36906
36907 switch (fn)
36908 {
36909 case BUILT_IN_EXP:
36910 case BUILT_IN_LOG:
36911 case BUILT_IN_LOG10:
36912 case BUILT_IN_POW:
36913 case BUILT_IN_TANH:
36914 case BUILT_IN_TAN:
36915 case BUILT_IN_ATAN:
36916 case BUILT_IN_ATAN2:
36917 case BUILT_IN_ATANH:
36918 case BUILT_IN_CBRT:
36919 case BUILT_IN_SINH:
36920 case BUILT_IN_SIN:
36921 case BUILT_IN_ASINH:
36922 case BUILT_IN_ASIN:
36923 case BUILT_IN_COSH:
36924 case BUILT_IN_COS:
36925 case BUILT_IN_ACOSH:
36926 case BUILT_IN_ACOS:
36927 if (el_mode != DFmode || n != 2)
36928 return NULL_TREE;
36929 break;
36930
36931 case BUILT_IN_EXPF:
36932 case BUILT_IN_LOGF:
36933 case BUILT_IN_LOG10F:
36934 case BUILT_IN_POWF:
36935 case BUILT_IN_TANHF:
36936 case BUILT_IN_TANF:
36937 case BUILT_IN_ATANF:
36938 case BUILT_IN_ATAN2F:
36939 case BUILT_IN_ATANHF:
36940 case BUILT_IN_CBRTF:
36941 case BUILT_IN_SINHF:
36942 case BUILT_IN_SINF:
36943 case BUILT_IN_ASINHF:
36944 case BUILT_IN_ASINF:
36945 case BUILT_IN_COSHF:
36946 case BUILT_IN_COSF:
36947 case BUILT_IN_ACOSHF:
36948 case BUILT_IN_ACOSF:
36949 if (el_mode != SFmode || n != 4)
36950 return NULL_TREE;
36951 break;
36952
36953 default:
36954 return NULL_TREE;
36955 }
36956
36957 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36958
36959 if (fn == BUILT_IN_LOGF)
36960 strcpy (name, "vmlsLn4");
36961 else if (fn == BUILT_IN_LOG)
36962 strcpy (name, "vmldLn2");
36963 else if (n == 4)
36964 {
36965 sprintf (name, "vmls%s", bname+10);
36966 name[strlen (name)-1] = '4';
36967 }
36968 else
36969 sprintf (name, "vmld%s2", bname+10);
36970
36971 /* Convert to uppercase. */
36972 name[4] &= ~0x20;
36973
36974 arity = 0;
36975 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36976 args;
36977 args = TREE_CHAIN (args))
36978 arity++;
36979
36980 if (arity == 1)
36981 fntype = build_function_type_list (type_out, type_in, NULL);
36982 else
36983 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36984
36985 /* Build a function declaration for the vectorized function. */
36986 new_fndecl = build_decl (BUILTINS_LOCATION,
36987 FUNCTION_DECL, get_identifier (name), fntype);
36988 TREE_PUBLIC (new_fndecl) = 1;
36989 DECL_EXTERNAL (new_fndecl) = 1;
36990 DECL_IS_NOVOPS (new_fndecl) = 1;
36991 TREE_READONLY (new_fndecl) = 1;
36992
36993 return new_fndecl;
36994 }
36995
36996 /* Handler for an ACML-style interface to
36997 a library with vectorized intrinsics. */
36998
36999 static tree
37000 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
37001 {
37002 char name[20] = "__vr.._";
37003 tree fntype, new_fndecl, args;
37004 unsigned arity;
37005 const char *bname;
37006 enum machine_mode el_mode, in_mode;
37007 int n, in_n;
37008
37009 /* The ACML is 64bits only and suitable for unsafe math only as
37010 it does not correctly support parts of IEEE with the required
37011 precision such as denormals. */
37012 if (!TARGET_64BIT
37013 || !flag_unsafe_math_optimizations)
37014 return NULL_TREE;
37015
37016 el_mode = TYPE_MODE (TREE_TYPE (type_out));
37017 n = TYPE_VECTOR_SUBPARTS (type_out);
37018 in_mode = TYPE_MODE (TREE_TYPE (type_in));
37019 in_n = TYPE_VECTOR_SUBPARTS (type_in);
37020 if (el_mode != in_mode
37021 || n != in_n)
37022 return NULL_TREE;
37023
37024 switch (fn)
37025 {
37026 case BUILT_IN_SIN:
37027 case BUILT_IN_COS:
37028 case BUILT_IN_EXP:
37029 case BUILT_IN_LOG:
37030 case BUILT_IN_LOG2:
37031 case BUILT_IN_LOG10:
37032 name[4] = 'd';
37033 name[5] = '2';
37034 if (el_mode != DFmode
37035 || n != 2)
37036 return NULL_TREE;
37037 break;
37038
37039 case BUILT_IN_SINF:
37040 case BUILT_IN_COSF:
37041 case BUILT_IN_EXPF:
37042 case BUILT_IN_POWF:
37043 case BUILT_IN_LOGF:
37044 case BUILT_IN_LOG2F:
37045 case BUILT_IN_LOG10F:
37046 name[4] = 's';
37047 name[5] = '4';
37048 if (el_mode != SFmode
37049 || n != 4)
37050 return NULL_TREE;
37051 break;
37052
37053 default:
37054 return NULL_TREE;
37055 }
37056
37057 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
37058 sprintf (name + 7, "%s", bname+10);
37059
37060 arity = 0;
37061 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
37062 args;
37063 args = TREE_CHAIN (args))
37064 arity++;
37065
37066 if (arity == 1)
37067 fntype = build_function_type_list (type_out, type_in, NULL);
37068 else
37069 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
37070
37071 /* Build a function declaration for the vectorized function. */
37072 new_fndecl = build_decl (BUILTINS_LOCATION,
37073 FUNCTION_DECL, get_identifier (name), fntype);
37074 TREE_PUBLIC (new_fndecl) = 1;
37075 DECL_EXTERNAL (new_fndecl) = 1;
37076 DECL_IS_NOVOPS (new_fndecl) = 1;
37077 TREE_READONLY (new_fndecl) = 1;
37078
37079 return new_fndecl;
37080 }
37081
37082 /* Returns a decl of a function that implements gather load with
37083 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
37084 Return NULL_TREE if it is not available. */
37085
37086 static tree
37087 ix86_vectorize_builtin_gather (const_tree mem_vectype,
37088 const_tree index_type, int scale)
37089 {
37090 bool si;
37091 enum ix86_builtins code;
37092
37093 if (! TARGET_AVX2)
37094 return NULL_TREE;
37095
37096 if ((TREE_CODE (index_type) != INTEGER_TYPE
37097 && !POINTER_TYPE_P (index_type))
37098 || (TYPE_MODE (index_type) != SImode
37099 && TYPE_MODE (index_type) != DImode))
37100 return NULL_TREE;
37101
37102 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
37103 return NULL_TREE;
37104
37105 /* v*gather* insn sign extends index to pointer mode. */
37106 if (TYPE_PRECISION (index_type) < POINTER_SIZE
37107 && TYPE_UNSIGNED (index_type))
37108 return NULL_TREE;
37109
37110 if (scale <= 0
37111 || scale > 8
37112 || (scale & (scale - 1)) != 0)
37113 return NULL_TREE;
37114
37115 si = TYPE_MODE (index_type) == SImode;
37116 switch (TYPE_MODE (mem_vectype))
37117 {
37118 case V2DFmode:
37119 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
37120 break;
37121 case V4DFmode:
37122 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
37123 break;
37124 case V2DImode:
37125 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
37126 break;
37127 case V4DImode:
37128 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
37129 break;
37130 case V4SFmode:
37131 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
37132 break;
37133 case V8SFmode:
37134 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
37135 break;
37136 case V4SImode:
37137 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
37138 break;
37139 case V8SImode:
37140 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
37141 break;
37142 case V8DFmode:
37143 if (TARGET_AVX512F)
37144 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
37145 else
37146 return NULL_TREE;
37147 break;
37148 case V8DImode:
37149 if (TARGET_AVX512F)
37150 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
37151 else
37152 return NULL_TREE;
37153 break;
37154 case V16SFmode:
37155 if (TARGET_AVX512F)
37156 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
37157 else
37158 return NULL_TREE;
37159 break;
37160 case V16SImode:
37161 if (TARGET_AVX512F)
37162 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
37163 else
37164 return NULL_TREE;
37165 break;
37166 default:
37167 return NULL_TREE;
37168 }
37169
37170 return ix86_get_builtin (code);
37171 }
37172
37173 /* Returns a code for a target-specific builtin that implements
37174 reciprocal of the function, or NULL_TREE if not available. */
37175
37176 static tree
37177 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
37178 {
37179 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
37180 && flag_finite_math_only && !flag_trapping_math
37181 && flag_unsafe_math_optimizations))
37182 return NULL_TREE;
37183
37184 if (md_fn)
37185 /* Machine dependent builtins. */
37186 switch (fn)
37187 {
37188 /* Vectorized version of sqrt to rsqrt conversion. */
37189 case IX86_BUILTIN_SQRTPS_NR:
37190 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37191
37192 case IX86_BUILTIN_SQRTPS_NR256:
37193 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37194
37195 default:
37196 return NULL_TREE;
37197 }
37198 else
37199 /* Normal builtins. */
37200 switch (fn)
37201 {
37202 /* Sqrt to rsqrt conversion. */
37203 case BUILT_IN_SQRTF:
37204 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37205
37206 default:
37207 return NULL_TREE;
37208 }
37209 }
37210 \f
37211 /* Helper for avx_vpermilps256_operand et al. This is also used by
37212 the expansion functions to turn the parallel back into a mask.
37213 The return value is 0 for no match and the imm8+1 for a match. */
37214
37215 int
37216 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37217 {
37218 unsigned i, nelt = GET_MODE_NUNITS (mode);
37219 unsigned mask = 0;
37220 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37221
37222 if (XVECLEN (par, 0) != (int) nelt)
37223 return 0;
37224
37225 /* Validate that all of the elements are constants, and not totally
37226 out of range. Copy the data into an integral array to make the
37227 subsequent checks easier. */
37228 for (i = 0; i < nelt; ++i)
37229 {
37230 rtx er = XVECEXP (par, 0, i);
37231 unsigned HOST_WIDE_INT ei;
37232
37233 if (!CONST_INT_P (er))
37234 return 0;
37235 ei = INTVAL (er);
37236 if (ei >= nelt)
37237 return 0;
37238 ipar[i] = ei;
37239 }
37240
37241 switch (mode)
37242 {
37243 case V8DFmode:
37244 /* In the 512-bit DFmode case, we can only move elements within
37245 a 128-bit lane. First fill the second part of the mask,
37246 then fallthru. */
37247 for (i = 4; i < 6; ++i)
37248 {
37249 if (ipar[i] < 4 || ipar[i] >= 6)
37250 return 0;
37251 mask |= (ipar[i] - 4) << i;
37252 }
37253 for (i = 6; i < 8; ++i)
37254 {
37255 if (ipar[i] < 6)
37256 return 0;
37257 mask |= (ipar[i] - 6) << i;
37258 }
37259 /* FALLTHRU */
37260
37261 case V4DFmode:
37262 /* In the 256-bit DFmode case, we can only move elements within
37263 a 128-bit lane. */
37264 for (i = 0; i < 2; ++i)
37265 {
37266 if (ipar[i] >= 2)
37267 return 0;
37268 mask |= ipar[i] << i;
37269 }
37270 for (i = 2; i < 4; ++i)
37271 {
37272 if (ipar[i] < 2)
37273 return 0;
37274 mask |= (ipar[i] - 2) << i;
37275 }
37276 break;
37277
37278 case V16SFmode:
37279 /* In 512 bit SFmode case, permutation in the upper 256 bits
37280 must mirror the permutation in the lower 256-bits. */
37281 for (i = 0; i < 8; ++i)
37282 if (ipar[i] + 8 != ipar[i + 8])
37283 return 0;
37284 /* FALLTHRU */
37285
37286 case V8SFmode:
37287 /* In 256 bit SFmode case, we have full freedom of
37288 movement within the low 128-bit lane, but the high 128-bit
37289 lane must mirror the exact same pattern. */
37290 for (i = 0; i < 4; ++i)
37291 if (ipar[i] + 4 != ipar[i + 4])
37292 return 0;
37293 nelt = 4;
37294 /* FALLTHRU */
37295
37296 case V2DFmode:
37297 case V4SFmode:
37298 /* In the 128-bit case, we've full freedom in the placement of
37299 the elements from the source operand. */
37300 for (i = 0; i < nelt; ++i)
37301 mask |= ipar[i] << (i * (nelt / 2));
37302 break;
37303
37304 default:
37305 gcc_unreachable ();
37306 }
37307
37308 /* Make sure success has a non-zero value by adding one. */
37309 return mask + 1;
37310 }
37311
37312 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37313 the expansion functions to turn the parallel back into a mask.
37314 The return value is 0 for no match and the imm8+1 for a match. */
37315
37316 int
37317 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37318 {
37319 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37320 unsigned mask = 0;
37321 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37322
37323 if (XVECLEN (par, 0) != (int) nelt)
37324 return 0;
37325
37326 /* Validate that all of the elements are constants, and not totally
37327 out of range. Copy the data into an integral array to make the
37328 subsequent checks easier. */
37329 for (i = 0; i < nelt; ++i)
37330 {
37331 rtx er = XVECEXP (par, 0, i);
37332 unsigned HOST_WIDE_INT ei;
37333
37334 if (!CONST_INT_P (er))
37335 return 0;
37336 ei = INTVAL (er);
37337 if (ei >= 2 * nelt)
37338 return 0;
37339 ipar[i] = ei;
37340 }
37341
37342 /* Validate that the halves of the permute are halves. */
37343 for (i = 0; i < nelt2 - 1; ++i)
37344 if (ipar[i] + 1 != ipar[i + 1])
37345 return 0;
37346 for (i = nelt2; i < nelt - 1; ++i)
37347 if (ipar[i] + 1 != ipar[i + 1])
37348 return 0;
37349
37350 /* Reconstruct the mask. */
37351 for (i = 0; i < 2; ++i)
37352 {
37353 unsigned e = ipar[i * nelt2];
37354 if (e % nelt2)
37355 return 0;
37356 e /= nelt2;
37357 mask |= e << (i * 4);
37358 }
37359
37360 /* Make sure success has a non-zero value by adding one. */
37361 return mask + 1;
37362 }
37363 \f
37364 /* Return a register priority for hard reg REGNO. */
37365 static int
37366 ix86_register_priority (int hard_regno)
37367 {
37368 /* ebp and r13 as the base always wants a displacement, r12 as the
37369 base always wants an index. So discourage their usage in an
37370 address. */
37371 if (hard_regno == R12_REG || hard_regno == R13_REG)
37372 return 0;
37373 if (hard_regno == BP_REG)
37374 return 1;
37375 /* New x86-64 int registers result in bigger code size. Discourage
37376 them. */
37377 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37378 return 2;
37379 /* New x86-64 SSE registers result in bigger code size. Discourage
37380 them. */
37381 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37382 return 2;
37383 /* Usage of AX register results in smaller code. Prefer it. */
37384 if (hard_regno == 0)
37385 return 4;
37386 return 3;
37387 }
37388
37389 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37390
37391 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37392 QImode must go into class Q_REGS.
37393 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37394 movdf to do mem-to-mem moves through integer regs. */
37395
37396 static reg_class_t
37397 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37398 {
37399 enum machine_mode mode = GET_MODE (x);
37400
37401 /* We're only allowed to return a subclass of CLASS. Many of the
37402 following checks fail for NO_REGS, so eliminate that early. */
37403 if (regclass == NO_REGS)
37404 return NO_REGS;
37405
37406 /* All classes can load zeros. */
37407 if (x == CONST0_RTX (mode))
37408 return regclass;
37409
37410 /* Force constants into memory if we are loading a (nonzero) constant into
37411 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37412 instructions to load from a constant. */
37413 if (CONSTANT_P (x)
37414 && (MAYBE_MMX_CLASS_P (regclass)
37415 || MAYBE_SSE_CLASS_P (regclass)
37416 || MAYBE_MASK_CLASS_P (regclass)))
37417 return NO_REGS;
37418
37419 /* Prefer SSE regs only, if we can use them for math. */
37420 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37421 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37422
37423 /* Floating-point constants need more complex checks. */
37424 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37425 {
37426 /* General regs can load everything. */
37427 if (reg_class_subset_p (regclass, GENERAL_REGS))
37428 return regclass;
37429
37430 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37431 zero above. We only want to wind up preferring 80387 registers if
37432 we plan on doing computation with them. */
37433 if (TARGET_80387
37434 && standard_80387_constant_p (x) > 0)
37435 {
37436 /* Limit class to non-sse. */
37437 if (regclass == FLOAT_SSE_REGS)
37438 return FLOAT_REGS;
37439 if (regclass == FP_TOP_SSE_REGS)
37440 return FP_TOP_REG;
37441 if (regclass == FP_SECOND_SSE_REGS)
37442 return FP_SECOND_REG;
37443 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37444 return regclass;
37445 }
37446
37447 return NO_REGS;
37448 }
37449
37450 /* Generally when we see PLUS here, it's the function invariant
37451 (plus soft-fp const_int). Which can only be computed into general
37452 regs. */
37453 if (GET_CODE (x) == PLUS)
37454 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37455
37456 /* QImode constants are easy to load, but non-constant QImode data
37457 must go into Q_REGS. */
37458 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37459 {
37460 if (reg_class_subset_p (regclass, Q_REGS))
37461 return regclass;
37462 if (reg_class_subset_p (Q_REGS, regclass))
37463 return Q_REGS;
37464 return NO_REGS;
37465 }
37466
37467 return regclass;
37468 }
37469
37470 /* Discourage putting floating-point values in SSE registers unless
37471 SSE math is being used, and likewise for the 387 registers. */
37472 static reg_class_t
37473 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37474 {
37475 enum machine_mode mode = GET_MODE (x);
37476
37477 /* Restrict the output reload class to the register bank that we are doing
37478 math on. If we would like not to return a subset of CLASS, reject this
37479 alternative: if reload cannot do this, it will still use its choice. */
37480 mode = GET_MODE (x);
37481 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37482 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37483
37484 if (X87_FLOAT_MODE_P (mode))
37485 {
37486 if (regclass == FP_TOP_SSE_REGS)
37487 return FP_TOP_REG;
37488 else if (regclass == FP_SECOND_SSE_REGS)
37489 return FP_SECOND_REG;
37490 else
37491 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37492 }
37493
37494 return regclass;
37495 }
37496
37497 static reg_class_t
37498 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37499 enum machine_mode mode, secondary_reload_info *sri)
37500 {
37501 /* Double-word spills from general registers to non-offsettable memory
37502 references (zero-extended addresses) require special handling. */
37503 if (TARGET_64BIT
37504 && MEM_P (x)
37505 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37506 && INTEGER_CLASS_P (rclass)
37507 && !offsettable_memref_p (x))
37508 {
37509 sri->icode = (in_p
37510 ? CODE_FOR_reload_noff_load
37511 : CODE_FOR_reload_noff_store);
37512 /* Add the cost of moving address to a temporary. */
37513 sri->extra_cost = 1;
37514
37515 return NO_REGS;
37516 }
37517
37518 /* QImode spills from non-QI registers require
37519 intermediate register on 32bit targets. */
37520 if (mode == QImode
37521 && (MAYBE_MASK_CLASS_P (rclass)
37522 || (!TARGET_64BIT && !in_p
37523 && INTEGER_CLASS_P (rclass)
37524 && MAYBE_NON_Q_CLASS_P (rclass))))
37525 {
37526 int regno;
37527
37528 if (REG_P (x))
37529 regno = REGNO (x);
37530 else
37531 regno = -1;
37532
37533 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37534 regno = true_regnum (x);
37535
37536 /* Return Q_REGS if the operand is in memory. */
37537 if (regno == -1)
37538 return Q_REGS;
37539 }
37540
37541 /* This condition handles corner case where an expression involving
37542 pointers gets vectorized. We're trying to use the address of a
37543 stack slot as a vector initializer.
37544
37545 (set (reg:V2DI 74 [ vect_cst_.2 ])
37546 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37547
37548 Eventually frame gets turned into sp+offset like this:
37549
37550 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37551 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37552 (const_int 392 [0x188]))))
37553
37554 That later gets turned into:
37555
37556 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37557 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37558 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37559
37560 We'll have the following reload recorded:
37561
37562 Reload 0: reload_in (DI) =
37563 (plus:DI (reg/f:DI 7 sp)
37564 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37565 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37566 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37567 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37568 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37569 reload_reg_rtx: (reg:V2DI 22 xmm1)
37570
37571 Which isn't going to work since SSE instructions can't handle scalar
37572 additions. Returning GENERAL_REGS forces the addition into integer
37573 register and reload can handle subsequent reloads without problems. */
37574
37575 if (in_p && GET_CODE (x) == PLUS
37576 && SSE_CLASS_P (rclass)
37577 && SCALAR_INT_MODE_P (mode))
37578 return GENERAL_REGS;
37579
37580 return NO_REGS;
37581 }
37582
37583 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37584
37585 static bool
37586 ix86_class_likely_spilled_p (reg_class_t rclass)
37587 {
37588 switch (rclass)
37589 {
37590 case AREG:
37591 case DREG:
37592 case CREG:
37593 case BREG:
37594 case AD_REGS:
37595 case SIREG:
37596 case DIREG:
37597 case SSE_FIRST_REG:
37598 case FP_TOP_REG:
37599 case FP_SECOND_REG:
37600 return true;
37601
37602 default:
37603 break;
37604 }
37605
37606 return false;
37607 }
37608
37609 /* If we are copying between general and FP registers, we need a memory
37610 location. The same is true for SSE and MMX registers.
37611
37612 To optimize register_move_cost performance, allow inline variant.
37613
37614 The macro can't work reliably when one of the CLASSES is class containing
37615 registers from multiple units (SSE, MMX, integer). We avoid this by never
37616 combining those units in single alternative in the machine description.
37617 Ensure that this constraint holds to avoid unexpected surprises.
37618
37619 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37620 enforce these sanity checks. */
37621
37622 static inline bool
37623 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37624 enum machine_mode mode, int strict)
37625 {
37626 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37627 return false;
37628 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37629 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37630 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37631 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37632 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37633 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37634 {
37635 gcc_assert (!strict || lra_in_progress);
37636 return true;
37637 }
37638
37639 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37640 return true;
37641
37642 /* Between mask and general, we have moves no larger than word size. */
37643 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37644 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37645 return true;
37646
37647 /* ??? This is a lie. We do have moves between mmx/general, and for
37648 mmx/sse2. But by saying we need secondary memory we discourage the
37649 register allocator from using the mmx registers unless needed. */
37650 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37651 return true;
37652
37653 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37654 {
37655 /* SSE1 doesn't have any direct moves from other classes. */
37656 if (!TARGET_SSE2)
37657 return true;
37658
37659 /* If the target says that inter-unit moves are more expensive
37660 than moving through memory, then don't generate them. */
37661 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37662 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37663 return true;
37664
37665 /* Between SSE and general, we have moves no larger than word size. */
37666 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37667 return true;
37668 }
37669
37670 return false;
37671 }
37672
37673 bool
37674 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37675 enum machine_mode mode, int strict)
37676 {
37677 return inline_secondary_memory_needed (class1, class2, mode, strict);
37678 }
37679
37680 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37681
37682 On the 80386, this is the size of MODE in words,
37683 except in the FP regs, where a single reg is always enough. */
37684
37685 static unsigned char
37686 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37687 {
37688 if (MAYBE_INTEGER_CLASS_P (rclass))
37689 {
37690 if (mode == XFmode)
37691 return (TARGET_64BIT ? 2 : 3);
37692 else if (mode == XCmode)
37693 return (TARGET_64BIT ? 4 : 6);
37694 else
37695 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37696 }
37697 else
37698 {
37699 if (COMPLEX_MODE_P (mode))
37700 return 2;
37701 else
37702 return 1;
37703 }
37704 }
37705
37706 /* Return true if the registers in CLASS cannot represent the change from
37707 modes FROM to TO. */
37708
37709 bool
37710 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37711 enum reg_class regclass)
37712 {
37713 if (from == to)
37714 return false;
37715
37716 /* x87 registers can't do subreg at all, as all values are reformatted
37717 to extended precision. */
37718 if (MAYBE_FLOAT_CLASS_P (regclass))
37719 return true;
37720
37721 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37722 {
37723 /* Vector registers do not support QI or HImode loads. If we don't
37724 disallow a change to these modes, reload will assume it's ok to
37725 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37726 the vec_dupv4hi pattern. */
37727 if (GET_MODE_SIZE (from) < 4)
37728 return true;
37729 }
37730
37731 return false;
37732 }
37733
37734 /* Return the cost of moving data of mode M between a
37735 register and memory. A value of 2 is the default; this cost is
37736 relative to those in `REGISTER_MOVE_COST'.
37737
37738 This function is used extensively by register_move_cost that is used to
37739 build tables at startup. Make it inline in this case.
37740 When IN is 2, return maximum of in and out move cost.
37741
37742 If moving between registers and memory is more expensive than
37743 between two registers, you should define this macro to express the
37744 relative cost.
37745
37746 Model also increased moving costs of QImode registers in non
37747 Q_REGS classes.
37748 */
37749 static inline int
37750 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37751 int in)
37752 {
37753 int cost;
37754 if (FLOAT_CLASS_P (regclass))
37755 {
37756 int index;
37757 switch (mode)
37758 {
37759 case SFmode:
37760 index = 0;
37761 break;
37762 case DFmode:
37763 index = 1;
37764 break;
37765 case XFmode:
37766 index = 2;
37767 break;
37768 default:
37769 return 100;
37770 }
37771 if (in == 2)
37772 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37773 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37774 }
37775 if (SSE_CLASS_P (regclass))
37776 {
37777 int index;
37778 switch (GET_MODE_SIZE (mode))
37779 {
37780 case 4:
37781 index = 0;
37782 break;
37783 case 8:
37784 index = 1;
37785 break;
37786 case 16:
37787 index = 2;
37788 break;
37789 default:
37790 return 100;
37791 }
37792 if (in == 2)
37793 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37794 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37795 }
37796 if (MMX_CLASS_P (regclass))
37797 {
37798 int index;
37799 switch (GET_MODE_SIZE (mode))
37800 {
37801 case 4:
37802 index = 0;
37803 break;
37804 case 8:
37805 index = 1;
37806 break;
37807 default:
37808 return 100;
37809 }
37810 if (in)
37811 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37812 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37813 }
37814 switch (GET_MODE_SIZE (mode))
37815 {
37816 case 1:
37817 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37818 {
37819 if (!in)
37820 return ix86_cost->int_store[0];
37821 if (TARGET_PARTIAL_REG_DEPENDENCY
37822 && optimize_function_for_speed_p (cfun))
37823 cost = ix86_cost->movzbl_load;
37824 else
37825 cost = ix86_cost->int_load[0];
37826 if (in == 2)
37827 return MAX (cost, ix86_cost->int_store[0]);
37828 return cost;
37829 }
37830 else
37831 {
37832 if (in == 2)
37833 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37834 if (in)
37835 return ix86_cost->movzbl_load;
37836 else
37837 return ix86_cost->int_store[0] + 4;
37838 }
37839 break;
37840 case 2:
37841 if (in == 2)
37842 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37843 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37844 default:
37845 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37846 if (mode == TFmode)
37847 mode = XFmode;
37848 if (in == 2)
37849 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37850 else if (in)
37851 cost = ix86_cost->int_load[2];
37852 else
37853 cost = ix86_cost->int_store[2];
37854 return (cost * (((int) GET_MODE_SIZE (mode)
37855 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37856 }
37857 }
37858
37859 static int
37860 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37861 bool in)
37862 {
37863 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37864 }
37865
37866
37867 /* Return the cost of moving data from a register in class CLASS1 to
37868 one in class CLASS2.
37869
37870 It is not required that the cost always equal 2 when FROM is the same as TO;
37871 on some machines it is expensive to move between registers if they are not
37872 general registers. */
37873
37874 static int
37875 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37876 reg_class_t class2_i)
37877 {
37878 enum reg_class class1 = (enum reg_class) class1_i;
37879 enum reg_class class2 = (enum reg_class) class2_i;
37880
37881 /* In case we require secondary memory, compute cost of the store followed
37882 by load. In order to avoid bad register allocation choices, we need
37883 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37884
37885 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37886 {
37887 int cost = 1;
37888
37889 cost += inline_memory_move_cost (mode, class1, 2);
37890 cost += inline_memory_move_cost (mode, class2, 2);
37891
37892 /* In case of copying from general_purpose_register we may emit multiple
37893 stores followed by single load causing memory size mismatch stall.
37894 Count this as arbitrarily high cost of 20. */
37895 if (targetm.class_max_nregs (class1, mode)
37896 > targetm.class_max_nregs (class2, mode))
37897 cost += 20;
37898
37899 /* In the case of FP/MMX moves, the registers actually overlap, and we
37900 have to switch modes in order to treat them differently. */
37901 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37902 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37903 cost += 20;
37904
37905 return cost;
37906 }
37907
37908 /* Moves between SSE/MMX and integer unit are expensive. */
37909 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37910 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37911
37912 /* ??? By keeping returned value relatively high, we limit the number
37913 of moves between integer and MMX/SSE registers for all targets.
37914 Additionally, high value prevents problem with x86_modes_tieable_p(),
37915 where integer modes in MMX/SSE registers are not tieable
37916 because of missing QImode and HImode moves to, from or between
37917 MMX/SSE registers. */
37918 return MAX (8, ix86_cost->mmxsse_to_integer);
37919
37920 if (MAYBE_FLOAT_CLASS_P (class1))
37921 return ix86_cost->fp_move;
37922 if (MAYBE_SSE_CLASS_P (class1))
37923 return ix86_cost->sse_move;
37924 if (MAYBE_MMX_CLASS_P (class1))
37925 return ix86_cost->mmx_move;
37926 return 2;
37927 }
37928
37929 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37930 MODE. */
37931
37932 bool
37933 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37934 {
37935 /* Flags and only flags can only hold CCmode values. */
37936 if (CC_REGNO_P (regno))
37937 return GET_MODE_CLASS (mode) == MODE_CC;
37938 if (GET_MODE_CLASS (mode) == MODE_CC
37939 || GET_MODE_CLASS (mode) == MODE_RANDOM
37940 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37941 return false;
37942 if (STACK_REGNO_P (regno))
37943 return VALID_FP_MODE_P (mode);
37944 if (MASK_REGNO_P (regno))
37945 return (VALID_MASK_REG_MODE (mode)
37946 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37947 if (SSE_REGNO_P (regno))
37948 {
37949 /* We implement the move patterns for all vector modes into and
37950 out of SSE registers, even when no operation instructions
37951 are available. */
37952
37953 /* For AVX-512 we allow, regardless of regno:
37954 - XI mode
37955 - any of 512-bit wide vector mode
37956 - any scalar mode. */
37957 if (TARGET_AVX512F
37958 && (mode == XImode
37959 || VALID_AVX512F_REG_MODE (mode)
37960 || VALID_AVX512F_SCALAR_MODE (mode)))
37961 return true;
37962
37963 /* TODO check for QI/HI scalars. */
37964 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37965 if (TARGET_AVX512VL
37966 && (mode == OImode
37967 || mode == TImode
37968 || VALID_AVX256_REG_MODE (mode)
37969 || VALID_AVX512VL_128_REG_MODE (mode)))
37970 return true;
37971
37972 /* xmm16-xmm31 are only available for AVX-512. */
37973 if (EXT_REX_SSE_REGNO_P (regno))
37974 return false;
37975
37976 /* OImode and AVX modes are available only when AVX is enabled. */
37977 return ((TARGET_AVX
37978 && VALID_AVX256_REG_OR_OI_MODE (mode))
37979 || VALID_SSE_REG_MODE (mode)
37980 || VALID_SSE2_REG_MODE (mode)
37981 || VALID_MMX_REG_MODE (mode)
37982 || VALID_MMX_REG_MODE_3DNOW (mode));
37983 }
37984 if (MMX_REGNO_P (regno))
37985 {
37986 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37987 so if the register is available at all, then we can move data of
37988 the given mode into or out of it. */
37989 return (VALID_MMX_REG_MODE (mode)
37990 || VALID_MMX_REG_MODE_3DNOW (mode));
37991 }
37992
37993 if (mode == QImode)
37994 {
37995 /* Take care for QImode values - they can be in non-QI regs,
37996 but then they do cause partial register stalls. */
37997 if (ANY_QI_REGNO_P (regno))
37998 return true;
37999 if (!TARGET_PARTIAL_REG_STALL)
38000 return true;
38001 /* LRA checks if the hard register is OK for the given mode.
38002 QImode values can live in non-QI regs, so we allow all
38003 registers here. */
38004 if (lra_in_progress)
38005 return true;
38006 return !can_create_pseudo_p ();
38007 }
38008 /* We handle both integer and floats in the general purpose registers. */
38009 else if (VALID_INT_MODE_P (mode))
38010 return true;
38011 else if (VALID_FP_MODE_P (mode))
38012 return true;
38013 else if (VALID_DFP_MODE_P (mode))
38014 return true;
38015 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
38016 on to use that value in smaller contexts, this can easily force a
38017 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
38018 supporting DImode, allow it. */
38019 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
38020 return true;
38021
38022 return false;
38023 }
38024
38025 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
38026 tieable integer mode. */
38027
38028 static bool
38029 ix86_tieable_integer_mode_p (enum machine_mode mode)
38030 {
38031 switch (mode)
38032 {
38033 case HImode:
38034 case SImode:
38035 return true;
38036
38037 case QImode:
38038 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
38039
38040 case DImode:
38041 return TARGET_64BIT;
38042
38043 default:
38044 return false;
38045 }
38046 }
38047
38048 /* Return true if MODE1 is accessible in a register that can hold MODE2
38049 without copying. That is, all register classes that can hold MODE2
38050 can also hold MODE1. */
38051
38052 bool
38053 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
38054 {
38055 if (mode1 == mode2)
38056 return true;
38057
38058 if (ix86_tieable_integer_mode_p (mode1)
38059 && ix86_tieable_integer_mode_p (mode2))
38060 return true;
38061
38062 /* MODE2 being XFmode implies fp stack or general regs, which means we
38063 can tie any smaller floating point modes to it. Note that we do not
38064 tie this with TFmode. */
38065 if (mode2 == XFmode)
38066 return mode1 == SFmode || mode1 == DFmode;
38067
38068 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
38069 that we can tie it with SFmode. */
38070 if (mode2 == DFmode)
38071 return mode1 == SFmode;
38072
38073 /* If MODE2 is only appropriate for an SSE register, then tie with
38074 any other mode acceptable to SSE registers. */
38075 if (GET_MODE_SIZE (mode2) == 32
38076 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38077 return (GET_MODE_SIZE (mode1) == 32
38078 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38079 if (GET_MODE_SIZE (mode2) == 16
38080 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
38081 return (GET_MODE_SIZE (mode1) == 16
38082 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
38083
38084 /* If MODE2 is appropriate for an MMX register, then tie
38085 with any other mode acceptable to MMX registers. */
38086 if (GET_MODE_SIZE (mode2) == 8
38087 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
38088 return (GET_MODE_SIZE (mode1) == 8
38089 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
38090
38091 return false;
38092 }
38093
38094 /* Return the cost of moving between two registers of mode MODE. */
38095
38096 static int
38097 ix86_set_reg_reg_cost (enum machine_mode mode)
38098 {
38099 unsigned int units = UNITS_PER_WORD;
38100
38101 switch (GET_MODE_CLASS (mode))
38102 {
38103 default:
38104 break;
38105
38106 case MODE_CC:
38107 units = GET_MODE_SIZE (CCmode);
38108 break;
38109
38110 case MODE_FLOAT:
38111 if ((TARGET_SSE && mode == TFmode)
38112 || (TARGET_80387 && mode == XFmode)
38113 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
38114 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
38115 units = GET_MODE_SIZE (mode);
38116 break;
38117
38118 case MODE_COMPLEX_FLOAT:
38119 if ((TARGET_SSE && mode == TCmode)
38120 || (TARGET_80387 && mode == XCmode)
38121 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
38122 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
38123 units = GET_MODE_SIZE (mode);
38124 break;
38125
38126 case MODE_VECTOR_INT:
38127 case MODE_VECTOR_FLOAT:
38128 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
38129 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
38130 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
38131 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
38132 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
38133 units = GET_MODE_SIZE (mode);
38134 }
38135
38136 /* Return the cost of moving between two registers of mode MODE,
38137 assuming that the move will be in pieces of at most UNITS bytes. */
38138 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
38139 }
38140
38141 /* Compute a (partial) cost for rtx X. Return true if the complete
38142 cost has been computed, and false if subexpressions should be
38143 scanned. In either case, *TOTAL contains the cost result. */
38144
38145 static bool
38146 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
38147 bool speed)
38148 {
38149 rtx mask;
38150 enum rtx_code code = (enum rtx_code) code_i;
38151 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
38152 enum machine_mode mode = GET_MODE (x);
38153 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
38154
38155 switch (code)
38156 {
38157 case SET:
38158 if (register_operand (SET_DEST (x), VOIDmode)
38159 && reg_or_0_operand (SET_SRC (x), VOIDmode))
38160 {
38161 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
38162 return true;
38163 }
38164 return false;
38165
38166 case CONST_INT:
38167 case CONST:
38168 case LABEL_REF:
38169 case SYMBOL_REF:
38170 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
38171 *total = 3;
38172 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
38173 *total = 2;
38174 else if (flag_pic && SYMBOLIC_CONST (x)
38175 && !(TARGET_64BIT
38176 && (GET_CODE (x) == LABEL_REF
38177 || (GET_CODE (x) == SYMBOL_REF
38178 && SYMBOL_REF_LOCAL_P (x)))))
38179 *total = 1;
38180 else
38181 *total = 0;
38182 return true;
38183
38184 case CONST_DOUBLE:
38185 if (mode == VOIDmode)
38186 {
38187 *total = 0;
38188 return true;
38189 }
38190 switch (standard_80387_constant_p (x))
38191 {
38192 case 1: /* 0.0 */
38193 *total = 1;
38194 return true;
38195 default: /* Other constants */
38196 *total = 2;
38197 return true;
38198 case 0:
38199 case -1:
38200 break;
38201 }
38202 if (SSE_FLOAT_MODE_P (mode))
38203 {
38204 case CONST_VECTOR:
38205 switch (standard_sse_constant_p (x))
38206 {
38207 case 0:
38208 break;
38209 case 1: /* 0: xor eliminates false dependency */
38210 *total = 0;
38211 return true;
38212 default: /* -1: cmp contains false dependency */
38213 *total = 1;
38214 return true;
38215 }
38216 }
38217 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38218 it'll probably end up. Add a penalty for size. */
38219 *total = (COSTS_N_INSNS (1)
38220 + (flag_pic != 0 && !TARGET_64BIT)
38221 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38222 return true;
38223
38224 case ZERO_EXTEND:
38225 /* The zero extensions is often completely free on x86_64, so make
38226 it as cheap as possible. */
38227 if (TARGET_64BIT && mode == DImode
38228 && GET_MODE (XEXP (x, 0)) == SImode)
38229 *total = 1;
38230 else if (TARGET_ZERO_EXTEND_WITH_AND)
38231 *total = cost->add;
38232 else
38233 *total = cost->movzx;
38234 return false;
38235
38236 case SIGN_EXTEND:
38237 *total = cost->movsx;
38238 return false;
38239
38240 case ASHIFT:
38241 if (SCALAR_INT_MODE_P (mode)
38242 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38243 && CONST_INT_P (XEXP (x, 1)))
38244 {
38245 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38246 if (value == 1)
38247 {
38248 *total = cost->add;
38249 return false;
38250 }
38251 if ((value == 2 || value == 3)
38252 && cost->lea <= cost->shift_const)
38253 {
38254 *total = cost->lea;
38255 return false;
38256 }
38257 }
38258 /* FALLTHRU */
38259
38260 case ROTATE:
38261 case ASHIFTRT:
38262 case LSHIFTRT:
38263 case ROTATERT:
38264 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38265 {
38266 /* ??? Should be SSE vector operation cost. */
38267 /* At least for published AMD latencies, this really is the same
38268 as the latency for a simple fpu operation like fabs. */
38269 /* V*QImode is emulated with 1-11 insns. */
38270 if (mode == V16QImode || mode == V32QImode)
38271 {
38272 int count = 11;
38273 if (TARGET_XOP && mode == V16QImode)
38274 {
38275 /* For XOP we use vpshab, which requires a broadcast of the
38276 value to the variable shift insn. For constants this
38277 means a V16Q const in mem; even when we can perform the
38278 shift with one insn set the cost to prefer paddb. */
38279 if (CONSTANT_P (XEXP (x, 1)))
38280 {
38281 *total = (cost->fabs
38282 + rtx_cost (XEXP (x, 0), code, 0, speed)
38283 + (speed ? 2 : COSTS_N_BYTES (16)));
38284 return true;
38285 }
38286 count = 3;
38287 }
38288 else if (TARGET_SSSE3)
38289 count = 7;
38290 *total = cost->fabs * count;
38291 }
38292 else
38293 *total = cost->fabs;
38294 }
38295 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38296 {
38297 if (CONST_INT_P (XEXP (x, 1)))
38298 {
38299 if (INTVAL (XEXP (x, 1)) > 32)
38300 *total = cost->shift_const + COSTS_N_INSNS (2);
38301 else
38302 *total = cost->shift_const * 2;
38303 }
38304 else
38305 {
38306 if (GET_CODE (XEXP (x, 1)) == AND)
38307 *total = cost->shift_var * 2;
38308 else
38309 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38310 }
38311 }
38312 else
38313 {
38314 if (CONST_INT_P (XEXP (x, 1)))
38315 *total = cost->shift_const;
38316 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38317 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38318 {
38319 /* Return the cost after shift-and truncation. */
38320 *total = cost->shift_var;
38321 return true;
38322 }
38323 else
38324 *total = cost->shift_var;
38325 }
38326 return false;
38327
38328 case FMA:
38329 {
38330 rtx sub;
38331
38332 gcc_assert (FLOAT_MODE_P (mode));
38333 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38334
38335 /* ??? SSE scalar/vector cost should be used here. */
38336 /* ??? Bald assumption that fma has the same cost as fmul. */
38337 *total = cost->fmul;
38338 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38339
38340 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38341 sub = XEXP (x, 0);
38342 if (GET_CODE (sub) == NEG)
38343 sub = XEXP (sub, 0);
38344 *total += rtx_cost (sub, FMA, 0, speed);
38345
38346 sub = XEXP (x, 2);
38347 if (GET_CODE (sub) == NEG)
38348 sub = XEXP (sub, 0);
38349 *total += rtx_cost (sub, FMA, 2, speed);
38350 return true;
38351 }
38352
38353 case MULT:
38354 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38355 {
38356 /* ??? SSE scalar cost should be used here. */
38357 *total = cost->fmul;
38358 return false;
38359 }
38360 else if (X87_FLOAT_MODE_P (mode))
38361 {
38362 *total = cost->fmul;
38363 return false;
38364 }
38365 else if (FLOAT_MODE_P (mode))
38366 {
38367 /* ??? SSE vector cost should be used here. */
38368 *total = cost->fmul;
38369 return false;
38370 }
38371 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38372 {
38373 /* V*QImode is emulated with 7-13 insns. */
38374 if (mode == V16QImode || mode == V32QImode)
38375 {
38376 int extra = 11;
38377 if (TARGET_XOP && mode == V16QImode)
38378 extra = 5;
38379 else if (TARGET_SSSE3)
38380 extra = 6;
38381 *total = cost->fmul * 2 + cost->fabs * extra;
38382 }
38383 /* V*DImode is emulated with 5-8 insns. */
38384 else if (mode == V2DImode || mode == V4DImode)
38385 {
38386 if (TARGET_XOP && mode == V2DImode)
38387 *total = cost->fmul * 2 + cost->fabs * 3;
38388 else
38389 *total = cost->fmul * 3 + cost->fabs * 5;
38390 }
38391 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38392 insns, including two PMULUDQ. */
38393 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38394 *total = cost->fmul * 2 + cost->fabs * 5;
38395 else
38396 *total = cost->fmul;
38397 return false;
38398 }
38399 else
38400 {
38401 rtx op0 = XEXP (x, 0);
38402 rtx op1 = XEXP (x, 1);
38403 int nbits;
38404 if (CONST_INT_P (XEXP (x, 1)))
38405 {
38406 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38407 for (nbits = 0; value != 0; value &= value - 1)
38408 nbits++;
38409 }
38410 else
38411 /* This is arbitrary. */
38412 nbits = 7;
38413
38414 /* Compute costs correctly for widening multiplication. */
38415 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38416 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38417 == GET_MODE_SIZE (mode))
38418 {
38419 int is_mulwiden = 0;
38420 enum machine_mode inner_mode = GET_MODE (op0);
38421
38422 if (GET_CODE (op0) == GET_CODE (op1))
38423 is_mulwiden = 1, op1 = XEXP (op1, 0);
38424 else if (CONST_INT_P (op1))
38425 {
38426 if (GET_CODE (op0) == SIGN_EXTEND)
38427 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38428 == INTVAL (op1);
38429 else
38430 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38431 }
38432
38433 if (is_mulwiden)
38434 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38435 }
38436
38437 *total = (cost->mult_init[MODE_INDEX (mode)]
38438 + nbits * cost->mult_bit
38439 + rtx_cost (op0, outer_code, opno, speed)
38440 + rtx_cost (op1, outer_code, opno, speed));
38441
38442 return true;
38443 }
38444
38445 case DIV:
38446 case UDIV:
38447 case MOD:
38448 case UMOD:
38449 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38450 /* ??? SSE cost should be used here. */
38451 *total = cost->fdiv;
38452 else if (X87_FLOAT_MODE_P (mode))
38453 *total = cost->fdiv;
38454 else if (FLOAT_MODE_P (mode))
38455 /* ??? SSE vector cost should be used here. */
38456 *total = cost->fdiv;
38457 else
38458 *total = cost->divide[MODE_INDEX (mode)];
38459 return false;
38460
38461 case PLUS:
38462 if (GET_MODE_CLASS (mode) == MODE_INT
38463 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38464 {
38465 if (GET_CODE (XEXP (x, 0)) == PLUS
38466 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38467 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38468 && CONSTANT_P (XEXP (x, 1)))
38469 {
38470 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38471 if (val == 2 || val == 4 || val == 8)
38472 {
38473 *total = cost->lea;
38474 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38475 outer_code, opno, speed);
38476 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38477 outer_code, opno, speed);
38478 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38479 return true;
38480 }
38481 }
38482 else if (GET_CODE (XEXP (x, 0)) == MULT
38483 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38484 {
38485 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38486 if (val == 2 || val == 4 || val == 8)
38487 {
38488 *total = cost->lea;
38489 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38490 outer_code, opno, speed);
38491 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38492 return true;
38493 }
38494 }
38495 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38496 {
38497 *total = cost->lea;
38498 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38499 outer_code, opno, speed);
38500 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38501 outer_code, opno, speed);
38502 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38503 return true;
38504 }
38505 }
38506 /* FALLTHRU */
38507
38508 case MINUS:
38509 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38510 {
38511 /* ??? SSE cost should be used here. */
38512 *total = cost->fadd;
38513 return false;
38514 }
38515 else if (X87_FLOAT_MODE_P (mode))
38516 {
38517 *total = cost->fadd;
38518 return false;
38519 }
38520 else if (FLOAT_MODE_P (mode))
38521 {
38522 /* ??? SSE vector cost should be used here. */
38523 *total = cost->fadd;
38524 return false;
38525 }
38526 /* FALLTHRU */
38527
38528 case AND:
38529 case IOR:
38530 case XOR:
38531 if (GET_MODE_CLASS (mode) == MODE_INT
38532 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38533 {
38534 *total = (cost->add * 2
38535 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38536 << (GET_MODE (XEXP (x, 0)) != DImode))
38537 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38538 << (GET_MODE (XEXP (x, 1)) != DImode)));
38539 return true;
38540 }
38541 /* FALLTHRU */
38542
38543 case NEG:
38544 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38545 {
38546 /* ??? SSE cost should be used here. */
38547 *total = cost->fchs;
38548 return false;
38549 }
38550 else if (X87_FLOAT_MODE_P (mode))
38551 {
38552 *total = cost->fchs;
38553 return false;
38554 }
38555 else if (FLOAT_MODE_P (mode))
38556 {
38557 /* ??? SSE vector cost should be used here. */
38558 *total = cost->fchs;
38559 return false;
38560 }
38561 /* FALLTHRU */
38562
38563 case NOT:
38564 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38565 {
38566 /* ??? Should be SSE vector operation cost. */
38567 /* At least for published AMD latencies, this really is the same
38568 as the latency for a simple fpu operation like fabs. */
38569 *total = cost->fabs;
38570 }
38571 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38572 *total = cost->add * 2;
38573 else
38574 *total = cost->add;
38575 return false;
38576
38577 case COMPARE:
38578 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38579 && XEXP (XEXP (x, 0), 1) == const1_rtx
38580 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38581 && XEXP (x, 1) == const0_rtx)
38582 {
38583 /* This kind of construct is implemented using test[bwl].
38584 Treat it as if we had an AND. */
38585 *total = (cost->add
38586 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38587 + rtx_cost (const1_rtx, outer_code, opno, speed));
38588 return true;
38589 }
38590 return false;
38591
38592 case FLOAT_EXTEND:
38593 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38594 *total = 0;
38595 return false;
38596
38597 case ABS:
38598 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38599 /* ??? SSE cost should be used here. */
38600 *total = cost->fabs;
38601 else if (X87_FLOAT_MODE_P (mode))
38602 *total = cost->fabs;
38603 else if (FLOAT_MODE_P (mode))
38604 /* ??? SSE vector cost should be used here. */
38605 *total = cost->fabs;
38606 return false;
38607
38608 case SQRT:
38609 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38610 /* ??? SSE cost should be used here. */
38611 *total = cost->fsqrt;
38612 else if (X87_FLOAT_MODE_P (mode))
38613 *total = cost->fsqrt;
38614 else if (FLOAT_MODE_P (mode))
38615 /* ??? SSE vector cost should be used here. */
38616 *total = cost->fsqrt;
38617 return false;
38618
38619 case UNSPEC:
38620 if (XINT (x, 1) == UNSPEC_TP)
38621 *total = 0;
38622 return false;
38623
38624 case VEC_SELECT:
38625 case VEC_CONCAT:
38626 case VEC_DUPLICATE:
38627 /* ??? Assume all of these vector manipulation patterns are
38628 recognizable. In which case they all pretty much have the
38629 same cost. */
38630 *total = cost->fabs;
38631 return true;
38632 case VEC_MERGE:
38633 mask = XEXP (x, 2);
38634 /* This is masked instruction, assume the same cost,
38635 as nonmasked variant. */
38636 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38637 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38638 else
38639 *total = cost->fabs;
38640 return true;
38641
38642 default:
38643 return false;
38644 }
38645 }
38646
38647 #if TARGET_MACHO
38648
38649 static int current_machopic_label_num;
38650
38651 /* Given a symbol name and its associated stub, write out the
38652 definition of the stub. */
38653
38654 void
38655 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38656 {
38657 unsigned int length;
38658 char *binder_name, *symbol_name, lazy_ptr_name[32];
38659 int label = ++current_machopic_label_num;
38660
38661 /* For 64-bit we shouldn't get here. */
38662 gcc_assert (!TARGET_64BIT);
38663
38664 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38665 symb = targetm.strip_name_encoding (symb);
38666
38667 length = strlen (stub);
38668 binder_name = XALLOCAVEC (char, length + 32);
38669 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38670
38671 length = strlen (symb);
38672 symbol_name = XALLOCAVEC (char, length + 32);
38673 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38674
38675 sprintf (lazy_ptr_name, "L%d$lz", label);
38676
38677 if (MACHOPIC_ATT_STUB)
38678 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38679 else if (MACHOPIC_PURE)
38680 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38681 else
38682 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38683
38684 fprintf (file, "%s:\n", stub);
38685 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38686
38687 if (MACHOPIC_ATT_STUB)
38688 {
38689 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38690 }
38691 else if (MACHOPIC_PURE)
38692 {
38693 /* PIC stub. */
38694 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38695 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38696 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38697 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38698 label, lazy_ptr_name, label);
38699 fprintf (file, "\tjmp\t*%%ecx\n");
38700 }
38701 else
38702 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38703
38704 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38705 it needs no stub-binding-helper. */
38706 if (MACHOPIC_ATT_STUB)
38707 return;
38708
38709 fprintf (file, "%s:\n", binder_name);
38710
38711 if (MACHOPIC_PURE)
38712 {
38713 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38714 fprintf (file, "\tpushl\t%%ecx\n");
38715 }
38716 else
38717 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38718
38719 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38720
38721 /* N.B. Keep the correspondence of these
38722 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38723 old-pic/new-pic/non-pic stubs; altering this will break
38724 compatibility with existing dylibs. */
38725 if (MACHOPIC_PURE)
38726 {
38727 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38728 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38729 }
38730 else
38731 /* 16-byte -mdynamic-no-pic stub. */
38732 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38733
38734 fprintf (file, "%s:\n", lazy_ptr_name);
38735 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38736 fprintf (file, ASM_LONG "%s\n", binder_name);
38737 }
38738 #endif /* TARGET_MACHO */
38739
38740 /* Order the registers for register allocator. */
38741
38742 void
38743 x86_order_regs_for_local_alloc (void)
38744 {
38745 int pos = 0;
38746 int i;
38747
38748 /* First allocate the local general purpose registers. */
38749 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38750 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38751 reg_alloc_order [pos++] = i;
38752
38753 /* Global general purpose registers. */
38754 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38755 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38756 reg_alloc_order [pos++] = i;
38757
38758 /* x87 registers come first in case we are doing FP math
38759 using them. */
38760 if (!TARGET_SSE_MATH)
38761 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38762 reg_alloc_order [pos++] = i;
38763
38764 /* SSE registers. */
38765 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38766 reg_alloc_order [pos++] = i;
38767 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38768 reg_alloc_order [pos++] = i;
38769
38770 /* Extended REX SSE registers. */
38771 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38772 reg_alloc_order [pos++] = i;
38773
38774 /* Mask register. */
38775 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38776 reg_alloc_order [pos++] = i;
38777
38778 /* x87 registers. */
38779 if (TARGET_SSE_MATH)
38780 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38781 reg_alloc_order [pos++] = i;
38782
38783 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38784 reg_alloc_order [pos++] = i;
38785
38786 /* Initialize the rest of array as we do not allocate some registers
38787 at all. */
38788 while (pos < FIRST_PSEUDO_REGISTER)
38789 reg_alloc_order [pos++] = 0;
38790 }
38791
38792 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38793 in struct attribute_spec handler. */
38794 static tree
38795 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38796 tree args,
38797 int,
38798 bool *no_add_attrs)
38799 {
38800 if (TREE_CODE (*node) != FUNCTION_TYPE
38801 && TREE_CODE (*node) != METHOD_TYPE
38802 && TREE_CODE (*node) != FIELD_DECL
38803 && TREE_CODE (*node) != TYPE_DECL)
38804 {
38805 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38806 name);
38807 *no_add_attrs = true;
38808 return NULL_TREE;
38809 }
38810 if (TARGET_64BIT)
38811 {
38812 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38813 name);
38814 *no_add_attrs = true;
38815 return NULL_TREE;
38816 }
38817 if (is_attribute_p ("callee_pop_aggregate_return", name))
38818 {
38819 tree cst;
38820
38821 cst = TREE_VALUE (args);
38822 if (TREE_CODE (cst) != INTEGER_CST)
38823 {
38824 warning (OPT_Wattributes,
38825 "%qE attribute requires an integer constant argument",
38826 name);
38827 *no_add_attrs = true;
38828 }
38829 else if (compare_tree_int (cst, 0) != 0
38830 && compare_tree_int (cst, 1) != 0)
38831 {
38832 warning (OPT_Wattributes,
38833 "argument to %qE attribute is neither zero, nor one",
38834 name);
38835 *no_add_attrs = true;
38836 }
38837
38838 return NULL_TREE;
38839 }
38840
38841 return NULL_TREE;
38842 }
38843
38844 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38845 struct attribute_spec.handler. */
38846 static tree
38847 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38848 bool *no_add_attrs)
38849 {
38850 if (TREE_CODE (*node) != FUNCTION_TYPE
38851 && TREE_CODE (*node) != METHOD_TYPE
38852 && TREE_CODE (*node) != FIELD_DECL
38853 && TREE_CODE (*node) != TYPE_DECL)
38854 {
38855 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38856 name);
38857 *no_add_attrs = true;
38858 return NULL_TREE;
38859 }
38860
38861 /* Can combine regparm with all attributes but fastcall. */
38862 if (is_attribute_p ("ms_abi", name))
38863 {
38864 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38865 {
38866 error ("ms_abi and sysv_abi attributes are not compatible");
38867 }
38868
38869 return NULL_TREE;
38870 }
38871 else if (is_attribute_p ("sysv_abi", name))
38872 {
38873 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38874 {
38875 error ("ms_abi and sysv_abi attributes are not compatible");
38876 }
38877
38878 return NULL_TREE;
38879 }
38880
38881 return NULL_TREE;
38882 }
38883
38884 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38885 struct attribute_spec.handler. */
38886 static tree
38887 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38888 bool *no_add_attrs)
38889 {
38890 tree *type = NULL;
38891 if (DECL_P (*node))
38892 {
38893 if (TREE_CODE (*node) == TYPE_DECL)
38894 type = &TREE_TYPE (*node);
38895 }
38896 else
38897 type = node;
38898
38899 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38900 {
38901 warning (OPT_Wattributes, "%qE attribute ignored",
38902 name);
38903 *no_add_attrs = true;
38904 }
38905
38906 else if ((is_attribute_p ("ms_struct", name)
38907 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38908 || ((is_attribute_p ("gcc_struct", name)
38909 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38910 {
38911 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38912 name);
38913 *no_add_attrs = true;
38914 }
38915
38916 return NULL_TREE;
38917 }
38918
38919 static tree
38920 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38921 bool *no_add_attrs)
38922 {
38923 if (TREE_CODE (*node) != FUNCTION_DECL)
38924 {
38925 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38926 name);
38927 *no_add_attrs = true;
38928 }
38929 return NULL_TREE;
38930 }
38931
38932 static bool
38933 ix86_ms_bitfield_layout_p (const_tree record_type)
38934 {
38935 return ((TARGET_MS_BITFIELD_LAYOUT
38936 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38937 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38938 }
38939
38940 /* Returns an expression indicating where the this parameter is
38941 located on entry to the FUNCTION. */
38942
38943 static rtx
38944 x86_this_parameter (tree function)
38945 {
38946 tree type = TREE_TYPE (function);
38947 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38948 int nregs;
38949
38950 if (TARGET_64BIT)
38951 {
38952 const int *parm_regs;
38953
38954 if (ix86_function_type_abi (type) == MS_ABI)
38955 parm_regs = x86_64_ms_abi_int_parameter_registers;
38956 else
38957 parm_regs = x86_64_int_parameter_registers;
38958 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38959 }
38960
38961 nregs = ix86_function_regparm (type, function);
38962
38963 if (nregs > 0 && !stdarg_p (type))
38964 {
38965 int regno;
38966 unsigned int ccvt = ix86_get_callcvt (type);
38967
38968 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38969 regno = aggr ? DX_REG : CX_REG;
38970 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38971 {
38972 regno = CX_REG;
38973 if (aggr)
38974 return gen_rtx_MEM (SImode,
38975 plus_constant (Pmode, stack_pointer_rtx, 4));
38976 }
38977 else
38978 {
38979 regno = AX_REG;
38980 if (aggr)
38981 {
38982 regno = DX_REG;
38983 if (nregs == 1)
38984 return gen_rtx_MEM (SImode,
38985 plus_constant (Pmode,
38986 stack_pointer_rtx, 4));
38987 }
38988 }
38989 return gen_rtx_REG (SImode, regno);
38990 }
38991
38992 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38993 aggr ? 8 : 4));
38994 }
38995
38996 /* Determine whether x86_output_mi_thunk can succeed. */
38997
38998 static bool
38999 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
39000 const_tree function)
39001 {
39002 /* 64-bit can handle anything. */
39003 if (TARGET_64BIT)
39004 return true;
39005
39006 /* For 32-bit, everything's fine if we have one free register. */
39007 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
39008 return true;
39009
39010 /* Need a free register for vcall_offset. */
39011 if (vcall_offset)
39012 return false;
39013
39014 /* Need a free register for GOT references. */
39015 if (flag_pic && !targetm.binds_local_p (function))
39016 return false;
39017
39018 /* Otherwise ok. */
39019 return true;
39020 }
39021
39022 /* Output the assembler code for a thunk function. THUNK_DECL is the
39023 declaration for the thunk function itself, FUNCTION is the decl for
39024 the target function. DELTA is an immediate constant offset to be
39025 added to THIS. If VCALL_OFFSET is nonzero, the word at
39026 *(*this + vcall_offset) should be added to THIS. */
39027
39028 static void
39029 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
39030 HOST_WIDE_INT vcall_offset, tree function)
39031 {
39032 rtx this_param = x86_this_parameter (function);
39033 rtx this_reg, tmp, fnaddr;
39034 unsigned int tmp_regno;
39035 rtx_insn *insn;
39036
39037 if (TARGET_64BIT)
39038 tmp_regno = R10_REG;
39039 else
39040 {
39041 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
39042 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
39043 tmp_regno = AX_REG;
39044 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
39045 tmp_regno = DX_REG;
39046 else
39047 tmp_regno = CX_REG;
39048 }
39049
39050 emit_note (NOTE_INSN_PROLOGUE_END);
39051
39052 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
39053 pull it in now and let DELTA benefit. */
39054 if (REG_P (this_param))
39055 this_reg = this_param;
39056 else if (vcall_offset)
39057 {
39058 /* Put the this parameter into %eax. */
39059 this_reg = gen_rtx_REG (Pmode, AX_REG);
39060 emit_move_insn (this_reg, this_param);
39061 }
39062 else
39063 this_reg = NULL_RTX;
39064
39065 /* Adjust the this parameter by a fixed constant. */
39066 if (delta)
39067 {
39068 rtx delta_rtx = GEN_INT (delta);
39069 rtx delta_dst = this_reg ? this_reg : this_param;
39070
39071 if (TARGET_64BIT)
39072 {
39073 if (!x86_64_general_operand (delta_rtx, Pmode))
39074 {
39075 tmp = gen_rtx_REG (Pmode, tmp_regno);
39076 emit_move_insn (tmp, delta_rtx);
39077 delta_rtx = tmp;
39078 }
39079 }
39080
39081 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
39082 }
39083
39084 /* Adjust the this parameter by a value stored in the vtable. */
39085 if (vcall_offset)
39086 {
39087 rtx vcall_addr, vcall_mem, this_mem;
39088
39089 tmp = gen_rtx_REG (Pmode, tmp_regno);
39090
39091 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
39092 if (Pmode != ptr_mode)
39093 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
39094 emit_move_insn (tmp, this_mem);
39095
39096 /* Adjust the this parameter. */
39097 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
39098 if (TARGET_64BIT
39099 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
39100 {
39101 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
39102 emit_move_insn (tmp2, GEN_INT (vcall_offset));
39103 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
39104 }
39105
39106 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
39107 if (Pmode != ptr_mode)
39108 emit_insn (gen_addsi_1_zext (this_reg,
39109 gen_rtx_REG (ptr_mode,
39110 REGNO (this_reg)),
39111 vcall_mem));
39112 else
39113 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
39114 }
39115
39116 /* If necessary, drop THIS back to its stack slot. */
39117 if (this_reg && this_reg != this_param)
39118 emit_move_insn (this_param, this_reg);
39119
39120 fnaddr = XEXP (DECL_RTL (function), 0);
39121 if (TARGET_64BIT)
39122 {
39123 if (!flag_pic || targetm.binds_local_p (function)
39124 || TARGET_PECOFF)
39125 ;
39126 else
39127 {
39128 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
39129 tmp = gen_rtx_CONST (Pmode, tmp);
39130 fnaddr = gen_const_mem (Pmode, tmp);
39131 }
39132 }
39133 else
39134 {
39135 if (!flag_pic || targetm.binds_local_p (function))
39136 ;
39137 #if TARGET_MACHO
39138 else if (TARGET_MACHO)
39139 {
39140 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
39141 fnaddr = XEXP (fnaddr, 0);
39142 }
39143 #endif /* TARGET_MACHO */
39144 else
39145 {
39146 tmp = gen_rtx_REG (Pmode, CX_REG);
39147 output_set_got (tmp, NULL_RTX);
39148
39149 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
39150 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
39151 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
39152 fnaddr = gen_const_mem (Pmode, fnaddr);
39153 }
39154 }
39155
39156 /* Our sibling call patterns do not allow memories, because we have no
39157 predicate that can distinguish between frame and non-frame memory.
39158 For our purposes here, we can get away with (ab)using a jump pattern,
39159 because we're going to do no optimization. */
39160 if (MEM_P (fnaddr))
39161 {
39162 if (sibcall_insn_operand (fnaddr, word_mode))
39163 {
39164 fnaddr = XEXP (DECL_RTL (function), 0);
39165 tmp = gen_rtx_MEM (QImode, fnaddr);
39166 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39167 tmp = emit_call_insn (tmp);
39168 SIBLING_CALL_P (tmp) = 1;
39169 }
39170 else
39171 emit_jump_insn (gen_indirect_jump (fnaddr));
39172 }
39173 else
39174 {
39175 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
39176 fnaddr = legitimize_pic_address (fnaddr,
39177 gen_rtx_REG (Pmode, tmp_regno));
39178
39179 if (!sibcall_insn_operand (fnaddr, word_mode))
39180 {
39181 tmp = gen_rtx_REG (word_mode, tmp_regno);
39182 if (GET_MODE (fnaddr) != word_mode)
39183 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
39184 emit_move_insn (tmp, fnaddr);
39185 fnaddr = tmp;
39186 }
39187
39188 tmp = gen_rtx_MEM (QImode, fnaddr);
39189 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39190 tmp = emit_call_insn (tmp);
39191 SIBLING_CALL_P (tmp) = 1;
39192 }
39193 emit_barrier ();
39194
39195 /* Emit just enough of rest_of_compilation to get the insns emitted.
39196 Note that use_thunk calls assemble_start_function et al. */
39197 insn = get_insns ();
39198 shorten_branches (insn);
39199 final_start_function (insn, file, 1);
39200 final (insn, file, 1);
39201 final_end_function ();
39202 }
39203
39204 static void
39205 x86_file_start (void)
39206 {
39207 default_file_start ();
39208 if (TARGET_16BIT)
39209 fputs ("\t.code16gcc\n", asm_out_file);
39210 #if TARGET_MACHO
39211 darwin_file_start ();
39212 #endif
39213 if (X86_FILE_START_VERSION_DIRECTIVE)
39214 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39215 if (X86_FILE_START_FLTUSED)
39216 fputs ("\t.global\t__fltused\n", asm_out_file);
39217 if (ix86_asm_dialect == ASM_INTEL)
39218 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39219 }
39220
39221 int
39222 x86_field_alignment (tree field, int computed)
39223 {
39224 enum machine_mode mode;
39225 tree type = TREE_TYPE (field);
39226
39227 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39228 return computed;
39229 mode = TYPE_MODE (strip_array_types (type));
39230 if (mode == DFmode || mode == DCmode
39231 || GET_MODE_CLASS (mode) == MODE_INT
39232 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39233 return MIN (32, computed);
39234 return computed;
39235 }
39236
39237 /* Print call to TARGET to FILE. */
39238
39239 static void
39240 x86_print_call_or_nop (FILE *file, const char *target)
39241 {
39242 if (flag_nop_mcount)
39243 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
39244 else
39245 fprintf (file, "1:\tcall\t%s\n", target);
39246 }
39247
39248 /* Output assembler code to FILE to increment profiler label # LABELNO
39249 for profiling a function entry. */
39250 void
39251 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39252 {
39253 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39254 : MCOUNT_NAME);
39255 if (TARGET_64BIT)
39256 {
39257 #ifndef NO_PROFILE_COUNTERS
39258 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39259 #endif
39260
39261 if (!TARGET_PECOFF && flag_pic)
39262 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39263 else
39264 x86_print_call_or_nop (file, mcount_name);
39265 }
39266 else if (flag_pic)
39267 {
39268 #ifndef NO_PROFILE_COUNTERS
39269 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39270 LPREFIX, labelno);
39271 #endif
39272 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39273 }
39274 else
39275 {
39276 #ifndef NO_PROFILE_COUNTERS
39277 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39278 LPREFIX, labelno);
39279 #endif
39280 x86_print_call_or_nop (file, mcount_name);
39281 }
39282
39283 if (flag_record_mcount)
39284 {
39285 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
39286 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
39287 fprintf (file, "\t.previous\n");
39288 }
39289 }
39290
39291 /* We don't have exact information about the insn sizes, but we may assume
39292 quite safely that we are informed about all 1 byte insns and memory
39293 address sizes. This is enough to eliminate unnecessary padding in
39294 99% of cases. */
39295
39296 static int
39297 min_insn_size (rtx_insn *insn)
39298 {
39299 int l = 0, len;
39300
39301 if (!INSN_P (insn) || !active_insn_p (insn))
39302 return 0;
39303
39304 /* Discard alignments we've emit and jump instructions. */
39305 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39306 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39307 return 0;
39308
39309 /* Important case - calls are always 5 bytes.
39310 It is common to have many calls in the row. */
39311 if (CALL_P (insn)
39312 && symbolic_reference_mentioned_p (PATTERN (insn))
39313 && !SIBLING_CALL_P (insn))
39314 return 5;
39315 len = get_attr_length (insn);
39316 if (len <= 1)
39317 return 1;
39318
39319 /* For normal instructions we rely on get_attr_length being exact,
39320 with a few exceptions. */
39321 if (!JUMP_P (insn))
39322 {
39323 enum attr_type type = get_attr_type (insn);
39324
39325 switch (type)
39326 {
39327 case TYPE_MULTI:
39328 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39329 || asm_noperands (PATTERN (insn)) >= 0)
39330 return 0;
39331 break;
39332 case TYPE_OTHER:
39333 case TYPE_FCMP:
39334 break;
39335 default:
39336 /* Otherwise trust get_attr_length. */
39337 return len;
39338 }
39339
39340 l = get_attr_length_address (insn);
39341 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39342 l = 4;
39343 }
39344 if (l)
39345 return 1+l;
39346 else
39347 return 2;
39348 }
39349
39350 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39351
39352 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39353 window. */
39354
39355 static void
39356 ix86_avoid_jump_mispredicts (void)
39357 {
39358 rtx_insn *insn, *start = get_insns ();
39359 int nbytes = 0, njumps = 0;
39360 int isjump = 0;
39361
39362 /* Look for all minimal intervals of instructions containing 4 jumps.
39363 The intervals are bounded by START and INSN. NBYTES is the total
39364 size of instructions in the interval including INSN and not including
39365 START. When the NBYTES is smaller than 16 bytes, it is possible
39366 that the end of START and INSN ends up in the same 16byte page.
39367
39368 The smallest offset in the page INSN can start is the case where START
39369 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39370 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39371
39372 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39373 have to, control transfer to label(s) can be performed through other
39374 means, and also we estimate minimum length of all asm stmts as 0. */
39375 for (insn = start; insn; insn = NEXT_INSN (insn))
39376 {
39377 int min_size;
39378
39379 if (LABEL_P (insn))
39380 {
39381 int align = label_to_alignment (insn);
39382 int max_skip = label_to_max_skip (insn);
39383
39384 if (max_skip > 15)
39385 max_skip = 15;
39386 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39387 already in the current 16 byte page, because otherwise
39388 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39389 bytes to reach 16 byte boundary. */
39390 if (align <= 0
39391 || (align <= 3 && max_skip != (1 << align) - 1))
39392 max_skip = 0;
39393 if (dump_file)
39394 fprintf (dump_file, "Label %i with max_skip %i\n",
39395 INSN_UID (insn), max_skip);
39396 if (max_skip)
39397 {
39398 while (nbytes + max_skip >= 16)
39399 {
39400 start = NEXT_INSN (start);
39401 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39402 || CALL_P (start))
39403 njumps--, isjump = 1;
39404 else
39405 isjump = 0;
39406 nbytes -= min_insn_size (start);
39407 }
39408 }
39409 continue;
39410 }
39411
39412 min_size = min_insn_size (insn);
39413 nbytes += min_size;
39414 if (dump_file)
39415 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39416 INSN_UID (insn), min_size);
39417 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39418 || CALL_P (insn))
39419 njumps++;
39420 else
39421 continue;
39422
39423 while (njumps > 3)
39424 {
39425 start = NEXT_INSN (start);
39426 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39427 || CALL_P (start))
39428 njumps--, isjump = 1;
39429 else
39430 isjump = 0;
39431 nbytes -= min_insn_size (start);
39432 }
39433 gcc_assert (njumps >= 0);
39434 if (dump_file)
39435 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39436 INSN_UID (start), INSN_UID (insn), nbytes);
39437
39438 if (njumps == 3 && isjump && nbytes < 16)
39439 {
39440 int padsize = 15 - nbytes + min_insn_size (insn);
39441
39442 if (dump_file)
39443 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39444 INSN_UID (insn), padsize);
39445 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39446 }
39447 }
39448 }
39449 #endif
39450
39451 /* AMD Athlon works faster
39452 when RET is not destination of conditional jump or directly preceded
39453 by other jump instruction. We avoid the penalty by inserting NOP just
39454 before the RET instructions in such cases. */
39455 static void
39456 ix86_pad_returns (void)
39457 {
39458 edge e;
39459 edge_iterator ei;
39460
39461 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39462 {
39463 basic_block bb = e->src;
39464 rtx_insn *ret = BB_END (bb);
39465 rtx_insn *prev;
39466 bool replace = false;
39467
39468 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39469 || optimize_bb_for_size_p (bb))
39470 continue;
39471 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39472 if (active_insn_p (prev) || LABEL_P (prev))
39473 break;
39474 if (prev && LABEL_P (prev))
39475 {
39476 edge e;
39477 edge_iterator ei;
39478
39479 FOR_EACH_EDGE (e, ei, bb->preds)
39480 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39481 && !(e->flags & EDGE_FALLTHRU))
39482 {
39483 replace = true;
39484 break;
39485 }
39486 }
39487 if (!replace)
39488 {
39489 prev = prev_active_insn (ret);
39490 if (prev
39491 && ((JUMP_P (prev) && any_condjump_p (prev))
39492 || CALL_P (prev)))
39493 replace = true;
39494 /* Empty functions get branch mispredict even when
39495 the jump destination is not visible to us. */
39496 if (!prev && !optimize_function_for_size_p (cfun))
39497 replace = true;
39498 }
39499 if (replace)
39500 {
39501 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39502 delete_insn (ret);
39503 }
39504 }
39505 }
39506
39507 /* Count the minimum number of instructions in BB. Return 4 if the
39508 number of instructions >= 4. */
39509
39510 static int
39511 ix86_count_insn_bb (basic_block bb)
39512 {
39513 rtx_insn *insn;
39514 int insn_count = 0;
39515
39516 /* Count number of instructions in this block. Return 4 if the number
39517 of instructions >= 4. */
39518 FOR_BB_INSNS (bb, insn)
39519 {
39520 /* Only happen in exit blocks. */
39521 if (JUMP_P (insn)
39522 && ANY_RETURN_P (PATTERN (insn)))
39523 break;
39524
39525 if (NONDEBUG_INSN_P (insn)
39526 && GET_CODE (PATTERN (insn)) != USE
39527 && GET_CODE (PATTERN (insn)) != CLOBBER)
39528 {
39529 insn_count++;
39530 if (insn_count >= 4)
39531 return insn_count;
39532 }
39533 }
39534
39535 return insn_count;
39536 }
39537
39538
39539 /* Count the minimum number of instructions in code path in BB.
39540 Return 4 if the number of instructions >= 4. */
39541
39542 static int
39543 ix86_count_insn (basic_block bb)
39544 {
39545 edge e;
39546 edge_iterator ei;
39547 int min_prev_count;
39548
39549 /* Only bother counting instructions along paths with no
39550 more than 2 basic blocks between entry and exit. Given
39551 that BB has an edge to exit, determine if a predecessor
39552 of BB has an edge from entry. If so, compute the number
39553 of instructions in the predecessor block. If there
39554 happen to be multiple such blocks, compute the minimum. */
39555 min_prev_count = 4;
39556 FOR_EACH_EDGE (e, ei, bb->preds)
39557 {
39558 edge prev_e;
39559 edge_iterator prev_ei;
39560
39561 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39562 {
39563 min_prev_count = 0;
39564 break;
39565 }
39566 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39567 {
39568 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39569 {
39570 int count = ix86_count_insn_bb (e->src);
39571 if (count < min_prev_count)
39572 min_prev_count = count;
39573 break;
39574 }
39575 }
39576 }
39577
39578 if (min_prev_count < 4)
39579 min_prev_count += ix86_count_insn_bb (bb);
39580
39581 return min_prev_count;
39582 }
39583
39584 /* Pad short function to 4 instructions. */
39585
39586 static void
39587 ix86_pad_short_function (void)
39588 {
39589 edge e;
39590 edge_iterator ei;
39591
39592 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39593 {
39594 rtx_insn *ret = BB_END (e->src);
39595 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39596 {
39597 int insn_count = ix86_count_insn (e->src);
39598
39599 /* Pad short function. */
39600 if (insn_count < 4)
39601 {
39602 rtx_insn *insn = ret;
39603
39604 /* Find epilogue. */
39605 while (insn
39606 && (!NOTE_P (insn)
39607 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39608 insn = PREV_INSN (insn);
39609
39610 if (!insn)
39611 insn = ret;
39612
39613 /* Two NOPs count as one instruction. */
39614 insn_count = 2 * (4 - insn_count);
39615 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39616 }
39617 }
39618 }
39619 }
39620
39621 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39622 the epilogue, the Windows system unwinder will apply epilogue logic and
39623 produce incorrect offsets. This can be avoided by adding a nop between
39624 the last insn that can throw and the first insn of the epilogue. */
39625
39626 static void
39627 ix86_seh_fixup_eh_fallthru (void)
39628 {
39629 edge e;
39630 edge_iterator ei;
39631
39632 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39633 {
39634 rtx_insn *insn, *next;
39635
39636 /* Find the beginning of the epilogue. */
39637 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39638 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39639 break;
39640 if (insn == NULL)
39641 continue;
39642
39643 /* We only care about preceding insns that can throw. */
39644 insn = prev_active_insn (insn);
39645 if (insn == NULL || !can_throw_internal (insn))
39646 continue;
39647
39648 /* Do not separate calls from their debug information. */
39649 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39650 if (NOTE_P (next)
39651 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39652 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39653 insn = next;
39654 else
39655 break;
39656
39657 emit_insn_after (gen_nops (const1_rtx), insn);
39658 }
39659 }
39660
39661 /* Implement machine specific optimizations. We implement padding of returns
39662 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39663 static void
39664 ix86_reorg (void)
39665 {
39666 /* We are freeing block_for_insn in the toplev to keep compatibility
39667 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39668 compute_bb_for_insn ();
39669
39670 if (TARGET_SEH && current_function_has_exception_handlers ())
39671 ix86_seh_fixup_eh_fallthru ();
39672
39673 if (optimize && optimize_function_for_speed_p (cfun))
39674 {
39675 if (TARGET_PAD_SHORT_FUNCTION)
39676 ix86_pad_short_function ();
39677 else if (TARGET_PAD_RETURNS)
39678 ix86_pad_returns ();
39679 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39680 if (TARGET_FOUR_JUMP_LIMIT)
39681 ix86_avoid_jump_mispredicts ();
39682 #endif
39683 }
39684 }
39685
39686 /* Return nonzero when QImode register that must be represented via REX prefix
39687 is used. */
39688 bool
39689 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
39690 {
39691 int i;
39692 extract_insn_cached (insn);
39693 for (i = 0; i < recog_data.n_operands; i++)
39694 if (GENERAL_REG_P (recog_data.operand[i])
39695 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39696 return true;
39697 return false;
39698 }
39699
39700 /* Return nonzero when P points to register encoded via REX prefix.
39701 Called via for_each_rtx. */
39702 static int
39703 extended_reg_mentioned_1 (rtx *p, void *)
39704 {
39705 unsigned int regno;
39706 if (!REG_P (*p))
39707 return 0;
39708 regno = REGNO (*p);
39709 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39710 }
39711
39712 /* Return true when INSN mentions register that must be encoded using REX
39713 prefix. */
39714 bool
39715 x86_extended_reg_mentioned_p (rtx insn)
39716 {
39717 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39718 extended_reg_mentioned_1, NULL);
39719 }
39720
39721 /* If profitable, negate (without causing overflow) integer constant
39722 of mode MODE at location LOC. Return true in this case. */
39723 bool
39724 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39725 {
39726 HOST_WIDE_INT val;
39727
39728 if (!CONST_INT_P (*loc))
39729 return false;
39730
39731 switch (mode)
39732 {
39733 case DImode:
39734 /* DImode x86_64 constants must fit in 32 bits. */
39735 gcc_assert (x86_64_immediate_operand (*loc, mode));
39736
39737 mode = SImode;
39738 break;
39739
39740 case SImode:
39741 case HImode:
39742 case QImode:
39743 break;
39744
39745 default:
39746 gcc_unreachable ();
39747 }
39748
39749 /* Avoid overflows. */
39750 if (mode_signbit_p (mode, *loc))
39751 return false;
39752
39753 val = INTVAL (*loc);
39754
39755 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39756 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39757 if ((val < 0 && val != -128)
39758 || val == 128)
39759 {
39760 *loc = GEN_INT (-val);
39761 return true;
39762 }
39763
39764 return false;
39765 }
39766
39767 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39768 optabs would emit if we didn't have TFmode patterns. */
39769
39770 void
39771 x86_emit_floatuns (rtx operands[2])
39772 {
39773 rtx_code_label *neglab, *donelab;
39774 rtx i0, i1, f0, in, out;
39775 enum machine_mode mode, inmode;
39776
39777 inmode = GET_MODE (operands[1]);
39778 gcc_assert (inmode == SImode || inmode == DImode);
39779
39780 out = operands[0];
39781 in = force_reg (inmode, operands[1]);
39782 mode = GET_MODE (out);
39783 neglab = gen_label_rtx ();
39784 donelab = gen_label_rtx ();
39785 f0 = gen_reg_rtx (mode);
39786
39787 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39788
39789 expand_float (out, in, 0);
39790
39791 emit_jump_insn (gen_jump (donelab));
39792 emit_barrier ();
39793
39794 emit_label (neglab);
39795
39796 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39797 1, OPTAB_DIRECT);
39798 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39799 1, OPTAB_DIRECT);
39800 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39801
39802 expand_float (f0, i0, 0);
39803
39804 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39805
39806 emit_label (donelab);
39807 }
39808 \f
39809 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39810 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39811 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39812 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
39813
39814 /* Get a vector mode of the same size as the original but with elements
39815 twice as wide. This is only guaranteed to apply to integral vectors. */
39816
39817 static inline enum machine_mode
39818 get_mode_wider_vector (enum machine_mode o)
39819 {
39820 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39821 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39822 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39823 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39824 return n;
39825 }
39826
39827 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39828 fill target with val via vec_duplicate. */
39829
39830 static bool
39831 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39832 {
39833 bool ok;
39834 rtx_insn *insn;
39835 rtx dup;
39836
39837 /* First attempt to recognize VAL as-is. */
39838 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39839 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39840 if (recog_memoized (insn) < 0)
39841 {
39842 rtx_insn *seq;
39843 /* If that fails, force VAL into a register. */
39844
39845 start_sequence ();
39846 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39847 seq = get_insns ();
39848 end_sequence ();
39849 if (seq)
39850 emit_insn_before (seq, insn);
39851
39852 ok = recog_memoized (insn) >= 0;
39853 gcc_assert (ok);
39854 }
39855 return true;
39856 }
39857
39858 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39859 with all elements equal to VAR. Return true if successful. */
39860
39861 static bool
39862 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39863 rtx target, rtx val)
39864 {
39865 bool ok;
39866
39867 switch (mode)
39868 {
39869 case V2SImode:
39870 case V2SFmode:
39871 if (!mmx_ok)
39872 return false;
39873 /* FALLTHRU */
39874
39875 case V4DFmode:
39876 case V4DImode:
39877 case V8SFmode:
39878 case V8SImode:
39879 case V2DFmode:
39880 case V2DImode:
39881 case V4SFmode:
39882 case V4SImode:
39883 case V16SImode:
39884 case V8DImode:
39885 case V16SFmode:
39886 case V8DFmode:
39887 return ix86_vector_duplicate_value (mode, target, val);
39888
39889 case V4HImode:
39890 if (!mmx_ok)
39891 return false;
39892 if (TARGET_SSE || TARGET_3DNOW_A)
39893 {
39894 rtx x;
39895
39896 val = gen_lowpart (SImode, val);
39897 x = gen_rtx_TRUNCATE (HImode, val);
39898 x = gen_rtx_VEC_DUPLICATE (mode, x);
39899 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39900 return true;
39901 }
39902 goto widen;
39903
39904 case V8QImode:
39905 if (!mmx_ok)
39906 return false;
39907 goto widen;
39908
39909 case V8HImode:
39910 if (TARGET_AVX2)
39911 return ix86_vector_duplicate_value (mode, target, val);
39912
39913 if (TARGET_SSE2)
39914 {
39915 struct expand_vec_perm_d dperm;
39916 rtx tmp1, tmp2;
39917
39918 permute:
39919 memset (&dperm, 0, sizeof (dperm));
39920 dperm.target = target;
39921 dperm.vmode = mode;
39922 dperm.nelt = GET_MODE_NUNITS (mode);
39923 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39924 dperm.one_operand_p = true;
39925
39926 /* Extend to SImode using a paradoxical SUBREG. */
39927 tmp1 = gen_reg_rtx (SImode);
39928 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39929
39930 /* Insert the SImode value as low element of a V4SImode vector. */
39931 tmp2 = gen_reg_rtx (V4SImode);
39932 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39933 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39934
39935 ok = (expand_vec_perm_1 (&dperm)
39936 || expand_vec_perm_broadcast_1 (&dperm));
39937 gcc_assert (ok);
39938 return ok;
39939 }
39940 goto widen;
39941
39942 case V16QImode:
39943 if (TARGET_AVX2)
39944 return ix86_vector_duplicate_value (mode, target, val);
39945
39946 if (TARGET_SSE2)
39947 goto permute;
39948 goto widen;
39949
39950 widen:
39951 /* Replicate the value once into the next wider mode and recurse. */
39952 {
39953 enum machine_mode smode, wsmode, wvmode;
39954 rtx x;
39955
39956 smode = GET_MODE_INNER (mode);
39957 wvmode = get_mode_wider_vector (mode);
39958 wsmode = GET_MODE_INNER (wvmode);
39959
39960 val = convert_modes (wsmode, smode, val, true);
39961 x = expand_simple_binop (wsmode, ASHIFT, val,
39962 GEN_INT (GET_MODE_BITSIZE (smode)),
39963 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39964 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39965
39966 x = gen_reg_rtx (wvmode);
39967 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39968 gcc_assert (ok);
39969 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39970 return ok;
39971 }
39972
39973 case V16HImode:
39974 case V32QImode:
39975 if (TARGET_AVX2)
39976 return ix86_vector_duplicate_value (mode, target, val);
39977 else
39978 {
39979 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39980 rtx x = gen_reg_rtx (hvmode);
39981
39982 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39983 gcc_assert (ok);
39984
39985 x = gen_rtx_VEC_CONCAT (mode, x, x);
39986 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39987 }
39988 return true;
39989
39990 case V64QImode:
39991 case V32HImode:
39992 if (TARGET_AVX512BW)
39993 return ix86_vector_duplicate_value (mode, target, val);
39994 else
39995 {
39996 enum machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
39997 rtx x = gen_reg_rtx (hvmode);
39998
39999 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
40000 gcc_assert (ok);
40001
40002 x = gen_rtx_VEC_CONCAT (mode, x, x);
40003 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40004 }
40005 return true;
40006
40007 default:
40008 return false;
40009 }
40010 }
40011
40012 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40013 whose ONE_VAR element is VAR, and other elements are zero. Return true
40014 if successful. */
40015
40016 static bool
40017 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
40018 rtx target, rtx var, int one_var)
40019 {
40020 enum machine_mode vsimode;
40021 rtx new_target;
40022 rtx x, tmp;
40023 bool use_vector_set = false;
40024
40025 switch (mode)
40026 {
40027 case V2DImode:
40028 /* For SSE4.1, we normally use vector set. But if the second
40029 element is zero and inter-unit moves are OK, we use movq
40030 instead. */
40031 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
40032 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
40033 && one_var == 0));
40034 break;
40035 case V16QImode:
40036 case V4SImode:
40037 case V4SFmode:
40038 use_vector_set = TARGET_SSE4_1;
40039 break;
40040 case V8HImode:
40041 use_vector_set = TARGET_SSE2;
40042 break;
40043 case V4HImode:
40044 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
40045 break;
40046 case V32QImode:
40047 case V16HImode:
40048 case V8SImode:
40049 case V8SFmode:
40050 case V4DFmode:
40051 use_vector_set = TARGET_AVX;
40052 break;
40053 case V4DImode:
40054 /* Use ix86_expand_vector_set in 64bit mode only. */
40055 use_vector_set = TARGET_AVX && TARGET_64BIT;
40056 break;
40057 default:
40058 break;
40059 }
40060
40061 if (use_vector_set)
40062 {
40063 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
40064 var = force_reg (GET_MODE_INNER (mode), var);
40065 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40066 return true;
40067 }
40068
40069 switch (mode)
40070 {
40071 case V2SFmode:
40072 case V2SImode:
40073 if (!mmx_ok)
40074 return false;
40075 /* FALLTHRU */
40076
40077 case V2DFmode:
40078 case V2DImode:
40079 if (one_var != 0)
40080 return false;
40081 var = force_reg (GET_MODE_INNER (mode), var);
40082 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
40083 emit_insn (gen_rtx_SET (VOIDmode, target, x));
40084 return true;
40085
40086 case V4SFmode:
40087 case V4SImode:
40088 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
40089 new_target = gen_reg_rtx (mode);
40090 else
40091 new_target = target;
40092 var = force_reg (GET_MODE_INNER (mode), var);
40093 x = gen_rtx_VEC_DUPLICATE (mode, var);
40094 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
40095 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
40096 if (one_var != 0)
40097 {
40098 /* We need to shuffle the value to the correct position, so
40099 create a new pseudo to store the intermediate result. */
40100
40101 /* With SSE2, we can use the integer shuffle insns. */
40102 if (mode != V4SFmode && TARGET_SSE2)
40103 {
40104 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
40105 const1_rtx,
40106 GEN_INT (one_var == 1 ? 0 : 1),
40107 GEN_INT (one_var == 2 ? 0 : 1),
40108 GEN_INT (one_var == 3 ? 0 : 1)));
40109 if (target != new_target)
40110 emit_move_insn (target, new_target);
40111 return true;
40112 }
40113
40114 /* Otherwise convert the intermediate result to V4SFmode and
40115 use the SSE1 shuffle instructions. */
40116 if (mode != V4SFmode)
40117 {
40118 tmp = gen_reg_rtx (V4SFmode);
40119 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
40120 }
40121 else
40122 tmp = new_target;
40123
40124 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
40125 const1_rtx,
40126 GEN_INT (one_var == 1 ? 0 : 1),
40127 GEN_INT (one_var == 2 ? 0+4 : 1+4),
40128 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
40129
40130 if (mode != V4SFmode)
40131 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
40132 else if (tmp != target)
40133 emit_move_insn (target, tmp);
40134 }
40135 else if (target != new_target)
40136 emit_move_insn (target, new_target);
40137 return true;
40138
40139 case V8HImode:
40140 case V16QImode:
40141 vsimode = V4SImode;
40142 goto widen;
40143 case V4HImode:
40144 case V8QImode:
40145 if (!mmx_ok)
40146 return false;
40147 vsimode = V2SImode;
40148 goto widen;
40149 widen:
40150 if (one_var != 0)
40151 return false;
40152
40153 /* Zero extend the variable element to SImode and recurse. */
40154 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
40155
40156 x = gen_reg_rtx (vsimode);
40157 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
40158 var, one_var))
40159 gcc_unreachable ();
40160
40161 emit_move_insn (target, gen_lowpart (mode, x));
40162 return true;
40163
40164 default:
40165 return false;
40166 }
40167 }
40168
40169 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40170 consisting of the values in VALS. It is known that all elements
40171 except ONE_VAR are constants. Return true if successful. */
40172
40173 static bool
40174 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
40175 rtx target, rtx vals, int one_var)
40176 {
40177 rtx var = XVECEXP (vals, 0, one_var);
40178 enum machine_mode wmode;
40179 rtx const_vec, x;
40180
40181 const_vec = copy_rtx (vals);
40182 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
40183 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
40184
40185 switch (mode)
40186 {
40187 case V2DFmode:
40188 case V2DImode:
40189 case V2SFmode:
40190 case V2SImode:
40191 /* For the two element vectors, it's just as easy to use
40192 the general case. */
40193 return false;
40194
40195 case V4DImode:
40196 /* Use ix86_expand_vector_set in 64bit mode only. */
40197 if (!TARGET_64BIT)
40198 return false;
40199 case V4DFmode:
40200 case V8SFmode:
40201 case V8SImode:
40202 case V16HImode:
40203 case V32QImode:
40204 case V4SFmode:
40205 case V4SImode:
40206 case V8HImode:
40207 case V4HImode:
40208 break;
40209
40210 case V16QImode:
40211 if (TARGET_SSE4_1)
40212 break;
40213 wmode = V8HImode;
40214 goto widen;
40215 case V8QImode:
40216 wmode = V4HImode;
40217 goto widen;
40218 widen:
40219 /* There's no way to set one QImode entry easily. Combine
40220 the variable value with its adjacent constant value, and
40221 promote to an HImode set. */
40222 x = XVECEXP (vals, 0, one_var ^ 1);
40223 if (one_var & 1)
40224 {
40225 var = convert_modes (HImode, QImode, var, true);
40226 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40227 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40228 x = GEN_INT (INTVAL (x) & 0xff);
40229 }
40230 else
40231 {
40232 var = convert_modes (HImode, QImode, var, true);
40233 x = gen_int_mode (INTVAL (x) << 8, HImode);
40234 }
40235 if (x != const0_rtx)
40236 var = expand_simple_binop (HImode, IOR, var, x, var,
40237 1, OPTAB_LIB_WIDEN);
40238
40239 x = gen_reg_rtx (wmode);
40240 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40241 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40242
40243 emit_move_insn (target, gen_lowpart (mode, x));
40244 return true;
40245
40246 default:
40247 return false;
40248 }
40249
40250 emit_move_insn (target, const_vec);
40251 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40252 return true;
40253 }
40254
40255 /* A subroutine of ix86_expand_vector_init_general. Use vector
40256 concatenate to handle the most general case: all values variable,
40257 and none identical. */
40258
40259 static void
40260 ix86_expand_vector_init_concat (enum machine_mode mode,
40261 rtx target, rtx *ops, int n)
40262 {
40263 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40264 rtx first[16], second[8], third[4];
40265 rtvec v;
40266 int i, j;
40267
40268 switch (n)
40269 {
40270 case 2:
40271 switch (mode)
40272 {
40273 case V16SImode:
40274 cmode = V8SImode;
40275 break;
40276 case V16SFmode:
40277 cmode = V8SFmode;
40278 break;
40279 case V8DImode:
40280 cmode = V4DImode;
40281 break;
40282 case V8DFmode:
40283 cmode = V4DFmode;
40284 break;
40285 case V8SImode:
40286 cmode = V4SImode;
40287 break;
40288 case V8SFmode:
40289 cmode = V4SFmode;
40290 break;
40291 case V4DImode:
40292 cmode = V2DImode;
40293 break;
40294 case V4DFmode:
40295 cmode = V2DFmode;
40296 break;
40297 case V4SImode:
40298 cmode = V2SImode;
40299 break;
40300 case V4SFmode:
40301 cmode = V2SFmode;
40302 break;
40303 case V2DImode:
40304 cmode = DImode;
40305 break;
40306 case V2SImode:
40307 cmode = SImode;
40308 break;
40309 case V2DFmode:
40310 cmode = DFmode;
40311 break;
40312 case V2SFmode:
40313 cmode = SFmode;
40314 break;
40315 default:
40316 gcc_unreachable ();
40317 }
40318
40319 if (!register_operand (ops[1], cmode))
40320 ops[1] = force_reg (cmode, ops[1]);
40321 if (!register_operand (ops[0], cmode))
40322 ops[0] = force_reg (cmode, ops[0]);
40323 emit_insn (gen_rtx_SET (VOIDmode, target,
40324 gen_rtx_VEC_CONCAT (mode, ops[0],
40325 ops[1])));
40326 break;
40327
40328 case 4:
40329 switch (mode)
40330 {
40331 case V4DImode:
40332 cmode = V2DImode;
40333 break;
40334 case V4DFmode:
40335 cmode = V2DFmode;
40336 break;
40337 case V4SImode:
40338 cmode = V2SImode;
40339 break;
40340 case V4SFmode:
40341 cmode = V2SFmode;
40342 break;
40343 default:
40344 gcc_unreachable ();
40345 }
40346 goto half;
40347
40348 case 8:
40349 switch (mode)
40350 {
40351 case V8DImode:
40352 cmode = V2DImode;
40353 hmode = V4DImode;
40354 break;
40355 case V8DFmode:
40356 cmode = V2DFmode;
40357 hmode = V4DFmode;
40358 break;
40359 case V8SImode:
40360 cmode = V2SImode;
40361 hmode = V4SImode;
40362 break;
40363 case V8SFmode:
40364 cmode = V2SFmode;
40365 hmode = V4SFmode;
40366 break;
40367 default:
40368 gcc_unreachable ();
40369 }
40370 goto half;
40371
40372 case 16:
40373 switch (mode)
40374 {
40375 case V16SImode:
40376 cmode = V2SImode;
40377 hmode = V4SImode;
40378 gmode = V8SImode;
40379 break;
40380 case V16SFmode:
40381 cmode = V2SFmode;
40382 hmode = V4SFmode;
40383 gmode = V8SFmode;
40384 break;
40385 default:
40386 gcc_unreachable ();
40387 }
40388 goto half;
40389
40390 half:
40391 /* FIXME: We process inputs backward to help RA. PR 36222. */
40392 i = n - 1;
40393 j = (n >> 1) - 1;
40394 for (; i > 0; i -= 2, j--)
40395 {
40396 first[j] = gen_reg_rtx (cmode);
40397 v = gen_rtvec (2, ops[i - 1], ops[i]);
40398 ix86_expand_vector_init (false, first[j],
40399 gen_rtx_PARALLEL (cmode, v));
40400 }
40401
40402 n >>= 1;
40403 if (n > 4)
40404 {
40405 gcc_assert (hmode != VOIDmode);
40406 gcc_assert (gmode != VOIDmode);
40407 for (i = j = 0; i < n; i += 2, j++)
40408 {
40409 second[j] = gen_reg_rtx (hmode);
40410 ix86_expand_vector_init_concat (hmode, second [j],
40411 &first [i], 2);
40412 }
40413 n >>= 1;
40414 for (i = j = 0; i < n; i += 2, j++)
40415 {
40416 third[j] = gen_reg_rtx (gmode);
40417 ix86_expand_vector_init_concat (gmode, third[j],
40418 &second[i], 2);
40419 }
40420 n >>= 1;
40421 ix86_expand_vector_init_concat (mode, target, third, n);
40422 }
40423 else if (n > 2)
40424 {
40425 gcc_assert (hmode != VOIDmode);
40426 for (i = j = 0; i < n; i += 2, j++)
40427 {
40428 second[j] = gen_reg_rtx (hmode);
40429 ix86_expand_vector_init_concat (hmode, second [j],
40430 &first [i], 2);
40431 }
40432 n >>= 1;
40433 ix86_expand_vector_init_concat (mode, target, second, n);
40434 }
40435 else
40436 ix86_expand_vector_init_concat (mode, target, first, n);
40437 break;
40438
40439 default:
40440 gcc_unreachable ();
40441 }
40442 }
40443
40444 /* A subroutine of ix86_expand_vector_init_general. Use vector
40445 interleave to handle the most general case: all values variable,
40446 and none identical. */
40447
40448 static void
40449 ix86_expand_vector_init_interleave (enum machine_mode mode,
40450 rtx target, rtx *ops, int n)
40451 {
40452 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40453 int i, j;
40454 rtx op0, op1;
40455 rtx (*gen_load_even) (rtx, rtx, rtx);
40456 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40457 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40458
40459 switch (mode)
40460 {
40461 case V8HImode:
40462 gen_load_even = gen_vec_setv8hi;
40463 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40464 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40465 inner_mode = HImode;
40466 first_imode = V4SImode;
40467 second_imode = V2DImode;
40468 third_imode = VOIDmode;
40469 break;
40470 case V16QImode:
40471 gen_load_even = gen_vec_setv16qi;
40472 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40473 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40474 inner_mode = QImode;
40475 first_imode = V8HImode;
40476 second_imode = V4SImode;
40477 third_imode = V2DImode;
40478 break;
40479 default:
40480 gcc_unreachable ();
40481 }
40482
40483 for (i = 0; i < n; i++)
40484 {
40485 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40486 op0 = gen_reg_rtx (SImode);
40487 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40488
40489 /* Insert the SImode value as low element of V4SImode vector. */
40490 op1 = gen_reg_rtx (V4SImode);
40491 op0 = gen_rtx_VEC_MERGE (V4SImode,
40492 gen_rtx_VEC_DUPLICATE (V4SImode,
40493 op0),
40494 CONST0_RTX (V4SImode),
40495 const1_rtx);
40496 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40497
40498 /* Cast the V4SImode vector back to a vector in orignal mode. */
40499 op0 = gen_reg_rtx (mode);
40500 emit_move_insn (op0, gen_lowpart (mode, op1));
40501
40502 /* Load even elements into the second position. */
40503 emit_insn (gen_load_even (op0,
40504 force_reg (inner_mode,
40505 ops [i + i + 1]),
40506 const1_rtx));
40507
40508 /* Cast vector to FIRST_IMODE vector. */
40509 ops[i] = gen_reg_rtx (first_imode);
40510 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40511 }
40512
40513 /* Interleave low FIRST_IMODE vectors. */
40514 for (i = j = 0; i < n; i += 2, j++)
40515 {
40516 op0 = gen_reg_rtx (first_imode);
40517 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40518
40519 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40520 ops[j] = gen_reg_rtx (second_imode);
40521 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40522 }
40523
40524 /* Interleave low SECOND_IMODE vectors. */
40525 switch (second_imode)
40526 {
40527 case V4SImode:
40528 for (i = j = 0; i < n / 2; i += 2, j++)
40529 {
40530 op0 = gen_reg_rtx (second_imode);
40531 emit_insn (gen_interleave_second_low (op0, ops[i],
40532 ops[i + 1]));
40533
40534 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40535 vector. */
40536 ops[j] = gen_reg_rtx (third_imode);
40537 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40538 }
40539 second_imode = V2DImode;
40540 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40541 /* FALLTHRU */
40542
40543 case V2DImode:
40544 op0 = gen_reg_rtx (second_imode);
40545 emit_insn (gen_interleave_second_low (op0, ops[0],
40546 ops[1]));
40547
40548 /* Cast the SECOND_IMODE vector back to a vector on original
40549 mode. */
40550 emit_insn (gen_rtx_SET (VOIDmode, target,
40551 gen_lowpart (mode, op0)));
40552 break;
40553
40554 default:
40555 gcc_unreachable ();
40556 }
40557 }
40558
40559 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40560 all values variable, and none identical. */
40561
40562 static void
40563 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40564 rtx target, rtx vals)
40565 {
40566 rtx ops[64], op0, op1, op2, op3, op4, op5;
40567 enum machine_mode half_mode = VOIDmode;
40568 enum machine_mode quarter_mode = VOIDmode;
40569 int n, i;
40570
40571 switch (mode)
40572 {
40573 case V2SFmode:
40574 case V2SImode:
40575 if (!mmx_ok && !TARGET_SSE)
40576 break;
40577 /* FALLTHRU */
40578
40579 case V16SImode:
40580 case V16SFmode:
40581 case V8DFmode:
40582 case V8DImode:
40583 case V8SFmode:
40584 case V8SImode:
40585 case V4DFmode:
40586 case V4DImode:
40587 case V4SFmode:
40588 case V4SImode:
40589 case V2DFmode:
40590 case V2DImode:
40591 n = GET_MODE_NUNITS (mode);
40592 for (i = 0; i < n; i++)
40593 ops[i] = XVECEXP (vals, 0, i);
40594 ix86_expand_vector_init_concat (mode, target, ops, n);
40595 return;
40596
40597 case V32QImode:
40598 half_mode = V16QImode;
40599 goto half;
40600
40601 case V16HImode:
40602 half_mode = V8HImode;
40603 goto half;
40604
40605 half:
40606 n = GET_MODE_NUNITS (mode);
40607 for (i = 0; i < n; i++)
40608 ops[i] = XVECEXP (vals, 0, i);
40609 op0 = gen_reg_rtx (half_mode);
40610 op1 = gen_reg_rtx (half_mode);
40611 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40612 n >> 2);
40613 ix86_expand_vector_init_interleave (half_mode, op1,
40614 &ops [n >> 1], n >> 2);
40615 emit_insn (gen_rtx_SET (VOIDmode, target,
40616 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40617 return;
40618
40619 case V64QImode:
40620 quarter_mode = V16QImode;
40621 half_mode = V32QImode;
40622 goto quarter;
40623
40624 case V32HImode:
40625 quarter_mode = V8HImode;
40626 half_mode = V16HImode;
40627 goto quarter;
40628
40629 quarter:
40630 n = GET_MODE_NUNITS (mode);
40631 for (i = 0; i < n; i++)
40632 ops[i] = XVECEXP (vals, 0, i);
40633 op0 = gen_reg_rtx (quarter_mode);
40634 op1 = gen_reg_rtx (quarter_mode);
40635 op2 = gen_reg_rtx (quarter_mode);
40636 op3 = gen_reg_rtx (quarter_mode);
40637 op4 = gen_reg_rtx (half_mode);
40638 op5 = gen_reg_rtx (half_mode);
40639 ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
40640 n >> 3);
40641 ix86_expand_vector_init_interleave (quarter_mode, op1,
40642 &ops [n >> 2], n >> 3);
40643 ix86_expand_vector_init_interleave (quarter_mode, op2,
40644 &ops [n >> 1], n >> 3);
40645 ix86_expand_vector_init_interleave (quarter_mode, op3,
40646 &ops [(n >> 1) | (n >> 2)], n >> 3);
40647 emit_insn (gen_rtx_SET (VOIDmode, op4,
40648 gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
40649 emit_insn (gen_rtx_SET (VOIDmode, op5,
40650 gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
40651 emit_insn (gen_rtx_SET (VOIDmode, target,
40652 gen_rtx_VEC_CONCAT (mode, op4, op5)));
40653 return;
40654
40655 case V16QImode:
40656 if (!TARGET_SSE4_1)
40657 break;
40658 /* FALLTHRU */
40659
40660 case V8HImode:
40661 if (!TARGET_SSE2)
40662 break;
40663
40664 /* Don't use ix86_expand_vector_init_interleave if we can't
40665 move from GPR to SSE register directly. */
40666 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40667 break;
40668
40669 n = GET_MODE_NUNITS (mode);
40670 for (i = 0; i < n; i++)
40671 ops[i] = XVECEXP (vals, 0, i);
40672 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40673 return;
40674
40675 case V4HImode:
40676 case V8QImode:
40677 break;
40678
40679 default:
40680 gcc_unreachable ();
40681 }
40682
40683 {
40684 int i, j, n_elts, n_words, n_elt_per_word;
40685 enum machine_mode inner_mode;
40686 rtx words[4], shift;
40687
40688 inner_mode = GET_MODE_INNER (mode);
40689 n_elts = GET_MODE_NUNITS (mode);
40690 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40691 n_elt_per_word = n_elts / n_words;
40692 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40693
40694 for (i = 0; i < n_words; ++i)
40695 {
40696 rtx word = NULL_RTX;
40697
40698 for (j = 0; j < n_elt_per_word; ++j)
40699 {
40700 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40701 elt = convert_modes (word_mode, inner_mode, elt, true);
40702
40703 if (j == 0)
40704 word = elt;
40705 else
40706 {
40707 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40708 word, 1, OPTAB_LIB_WIDEN);
40709 word = expand_simple_binop (word_mode, IOR, word, elt,
40710 word, 1, OPTAB_LIB_WIDEN);
40711 }
40712 }
40713
40714 words[i] = word;
40715 }
40716
40717 if (n_words == 1)
40718 emit_move_insn (target, gen_lowpart (mode, words[0]));
40719 else if (n_words == 2)
40720 {
40721 rtx tmp = gen_reg_rtx (mode);
40722 emit_clobber (tmp);
40723 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40724 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40725 emit_move_insn (target, tmp);
40726 }
40727 else if (n_words == 4)
40728 {
40729 rtx tmp = gen_reg_rtx (V4SImode);
40730 gcc_assert (word_mode == SImode);
40731 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40732 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40733 emit_move_insn (target, gen_lowpart (mode, tmp));
40734 }
40735 else
40736 gcc_unreachable ();
40737 }
40738 }
40739
40740 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40741 instructions unless MMX_OK is true. */
40742
40743 void
40744 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40745 {
40746 enum machine_mode mode = GET_MODE (target);
40747 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40748 int n_elts = GET_MODE_NUNITS (mode);
40749 int n_var = 0, one_var = -1;
40750 bool all_same = true, all_const_zero = true;
40751 int i;
40752 rtx x;
40753
40754 for (i = 0; i < n_elts; ++i)
40755 {
40756 x = XVECEXP (vals, 0, i);
40757 if (!(CONST_INT_P (x)
40758 || GET_CODE (x) == CONST_DOUBLE
40759 || GET_CODE (x) == CONST_FIXED))
40760 n_var++, one_var = i;
40761 else if (x != CONST0_RTX (inner_mode))
40762 all_const_zero = false;
40763 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40764 all_same = false;
40765 }
40766
40767 /* Constants are best loaded from the constant pool. */
40768 if (n_var == 0)
40769 {
40770 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40771 return;
40772 }
40773
40774 /* If all values are identical, broadcast the value. */
40775 if (all_same
40776 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40777 XVECEXP (vals, 0, 0)))
40778 return;
40779
40780 /* Values where only one field is non-constant are best loaded from
40781 the pool and overwritten via move later. */
40782 if (n_var == 1)
40783 {
40784 if (all_const_zero
40785 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40786 XVECEXP (vals, 0, one_var),
40787 one_var))
40788 return;
40789
40790 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40791 return;
40792 }
40793
40794 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40795 }
40796
40797 void
40798 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40799 {
40800 enum machine_mode mode = GET_MODE (target);
40801 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40802 enum machine_mode half_mode;
40803 bool use_vec_merge = false;
40804 rtx tmp;
40805 static rtx (*gen_extract[6][2]) (rtx, rtx)
40806 = {
40807 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40808 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40809 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40810 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40811 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40812 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40813 };
40814 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40815 = {
40816 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40817 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40818 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40819 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40820 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40821 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40822 };
40823 int i, j, n;
40824
40825 switch (mode)
40826 {
40827 case V2SFmode:
40828 case V2SImode:
40829 if (mmx_ok)
40830 {
40831 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40832 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40833 if (elt == 0)
40834 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40835 else
40836 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40837 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40838 return;
40839 }
40840 break;
40841
40842 case V2DImode:
40843 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40844 if (use_vec_merge)
40845 break;
40846
40847 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40848 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40849 if (elt == 0)
40850 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40851 else
40852 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40853 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40854 return;
40855
40856 case V2DFmode:
40857 {
40858 rtx op0, op1;
40859
40860 /* For the two element vectors, we implement a VEC_CONCAT with
40861 the extraction of the other element. */
40862
40863 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40864 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40865
40866 if (elt == 0)
40867 op0 = val, op1 = tmp;
40868 else
40869 op0 = tmp, op1 = val;
40870
40871 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40872 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40873 }
40874 return;
40875
40876 case V4SFmode:
40877 use_vec_merge = TARGET_SSE4_1;
40878 if (use_vec_merge)
40879 break;
40880
40881 switch (elt)
40882 {
40883 case 0:
40884 use_vec_merge = true;
40885 break;
40886
40887 case 1:
40888 /* tmp = target = A B C D */
40889 tmp = copy_to_reg (target);
40890 /* target = A A B B */
40891 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40892 /* target = X A B B */
40893 ix86_expand_vector_set (false, target, val, 0);
40894 /* target = A X C D */
40895 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40896 const1_rtx, const0_rtx,
40897 GEN_INT (2+4), GEN_INT (3+4)));
40898 return;
40899
40900 case 2:
40901 /* tmp = target = A B C D */
40902 tmp = copy_to_reg (target);
40903 /* tmp = X B C D */
40904 ix86_expand_vector_set (false, tmp, val, 0);
40905 /* target = A B X D */
40906 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40907 const0_rtx, const1_rtx,
40908 GEN_INT (0+4), GEN_INT (3+4)));
40909 return;
40910
40911 case 3:
40912 /* tmp = target = A B C D */
40913 tmp = copy_to_reg (target);
40914 /* tmp = X B C D */
40915 ix86_expand_vector_set (false, tmp, val, 0);
40916 /* target = A B X D */
40917 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40918 const0_rtx, const1_rtx,
40919 GEN_INT (2+4), GEN_INT (0+4)));
40920 return;
40921
40922 default:
40923 gcc_unreachable ();
40924 }
40925 break;
40926
40927 case V4SImode:
40928 use_vec_merge = TARGET_SSE4_1;
40929 if (use_vec_merge)
40930 break;
40931
40932 /* Element 0 handled by vec_merge below. */
40933 if (elt == 0)
40934 {
40935 use_vec_merge = true;
40936 break;
40937 }
40938
40939 if (TARGET_SSE2)
40940 {
40941 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40942 store into element 0, then shuffle them back. */
40943
40944 rtx order[4];
40945
40946 order[0] = GEN_INT (elt);
40947 order[1] = const1_rtx;
40948 order[2] = const2_rtx;
40949 order[3] = GEN_INT (3);
40950 order[elt] = const0_rtx;
40951
40952 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40953 order[1], order[2], order[3]));
40954
40955 ix86_expand_vector_set (false, target, val, 0);
40956
40957 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40958 order[1], order[2], order[3]));
40959 }
40960 else
40961 {
40962 /* For SSE1, we have to reuse the V4SF code. */
40963 rtx t = gen_reg_rtx (V4SFmode);
40964 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40965 emit_move_insn (target, gen_lowpart (mode, t));
40966 }
40967 return;
40968
40969 case V8HImode:
40970 use_vec_merge = TARGET_SSE2;
40971 break;
40972 case V4HImode:
40973 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40974 break;
40975
40976 case V16QImode:
40977 use_vec_merge = TARGET_SSE4_1;
40978 break;
40979
40980 case V8QImode:
40981 break;
40982
40983 case V32QImode:
40984 half_mode = V16QImode;
40985 j = 0;
40986 n = 16;
40987 goto half;
40988
40989 case V16HImode:
40990 half_mode = V8HImode;
40991 j = 1;
40992 n = 8;
40993 goto half;
40994
40995 case V8SImode:
40996 half_mode = V4SImode;
40997 j = 2;
40998 n = 4;
40999 goto half;
41000
41001 case V4DImode:
41002 half_mode = V2DImode;
41003 j = 3;
41004 n = 2;
41005 goto half;
41006
41007 case V8SFmode:
41008 half_mode = V4SFmode;
41009 j = 4;
41010 n = 4;
41011 goto half;
41012
41013 case V4DFmode:
41014 half_mode = V2DFmode;
41015 j = 5;
41016 n = 2;
41017 goto half;
41018
41019 half:
41020 /* Compute offset. */
41021 i = elt / n;
41022 elt %= n;
41023
41024 gcc_assert (i <= 1);
41025
41026 /* Extract the half. */
41027 tmp = gen_reg_rtx (half_mode);
41028 emit_insn (gen_extract[j][i] (tmp, target));
41029
41030 /* Put val in tmp at elt. */
41031 ix86_expand_vector_set (false, tmp, val, elt);
41032
41033 /* Put it back. */
41034 emit_insn (gen_insert[j][i] (target, target, tmp));
41035 return;
41036
41037 case V8DFmode:
41038 if (TARGET_AVX512F)
41039 {
41040 tmp = gen_reg_rtx (mode);
41041 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41042 gen_rtx_VEC_DUPLICATE (mode, val)));
41043 emit_insn (gen_avx512f_blendmv8df (target, tmp, target,
41044 force_reg (QImode, GEN_INT (1 << elt))));
41045 return;
41046 }
41047 else
41048 break;
41049 case V8DImode:
41050 if (TARGET_AVX512F)
41051 {
41052 tmp = gen_reg_rtx (mode);
41053 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41054 gen_rtx_VEC_DUPLICATE (mode, val)));
41055 emit_insn (gen_avx512f_blendmv8di (target, tmp, target,
41056 force_reg (QImode, GEN_INT (1 << elt))));
41057 return;
41058 }
41059 else
41060 break;
41061 case V16SFmode:
41062 if (TARGET_AVX512F)
41063 {
41064 tmp = gen_reg_rtx (mode);
41065 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41066 gen_rtx_VEC_DUPLICATE (mode, val)));
41067 emit_insn (gen_avx512f_blendmv16sf (target, tmp, target,
41068 force_reg (HImode, GEN_INT (1 << elt))));
41069 return;
41070 }
41071 else
41072 break;
41073 case V16SImode:
41074 if (TARGET_AVX512F)
41075 {
41076 tmp = gen_reg_rtx (mode);
41077 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41078 gen_rtx_VEC_DUPLICATE (mode, val)));
41079 emit_insn (gen_avx512f_blendmv16si (target, tmp, target,
41080 force_reg (HImode, GEN_INT (1 << elt))));
41081 return;
41082 }
41083 else
41084 break;
41085 case V32HImode:
41086 if (TARGET_AVX512F && TARGET_AVX512BW)
41087 {
41088 tmp = gen_reg_rtx (mode);
41089 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41090 gen_rtx_VEC_DUPLICATE (mode, val)));
41091 emit_insn (gen_avx512bw_blendmv32hi (target, tmp, target,
41092 force_reg (SImode, GEN_INT (1 << elt))));
41093 return;
41094 }
41095 else
41096 break;
41097 case V64QImode:
41098 if (TARGET_AVX512F && TARGET_AVX512BW)
41099 {
41100 tmp = gen_reg_rtx (mode);
41101 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41102 gen_rtx_VEC_DUPLICATE (mode, val)));
41103 emit_insn (gen_avx512bw_blendmv64qi (target, tmp, target,
41104 force_reg (DImode, GEN_INT (1 << elt))));
41105 return;
41106 }
41107 else
41108 break;
41109
41110 default:
41111 break;
41112 }
41113
41114 if (use_vec_merge)
41115 {
41116 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
41117 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
41118 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41119 }
41120 else
41121 {
41122 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41123
41124 emit_move_insn (mem, target);
41125
41126 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41127 emit_move_insn (tmp, val);
41128
41129 emit_move_insn (target, mem);
41130 }
41131 }
41132
41133 void
41134 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
41135 {
41136 enum machine_mode mode = GET_MODE (vec);
41137 enum machine_mode inner_mode = GET_MODE_INNER (mode);
41138 bool use_vec_extr = false;
41139 rtx tmp;
41140
41141 switch (mode)
41142 {
41143 case V2SImode:
41144 case V2SFmode:
41145 if (!mmx_ok)
41146 break;
41147 /* FALLTHRU */
41148
41149 case V2DFmode:
41150 case V2DImode:
41151 use_vec_extr = true;
41152 break;
41153
41154 case V4SFmode:
41155 use_vec_extr = TARGET_SSE4_1;
41156 if (use_vec_extr)
41157 break;
41158
41159 switch (elt)
41160 {
41161 case 0:
41162 tmp = vec;
41163 break;
41164
41165 case 1:
41166 case 3:
41167 tmp = gen_reg_rtx (mode);
41168 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
41169 GEN_INT (elt), GEN_INT (elt),
41170 GEN_INT (elt+4), GEN_INT (elt+4)));
41171 break;
41172
41173 case 2:
41174 tmp = gen_reg_rtx (mode);
41175 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
41176 break;
41177
41178 default:
41179 gcc_unreachable ();
41180 }
41181 vec = tmp;
41182 use_vec_extr = true;
41183 elt = 0;
41184 break;
41185
41186 case V4SImode:
41187 use_vec_extr = TARGET_SSE4_1;
41188 if (use_vec_extr)
41189 break;
41190
41191 if (TARGET_SSE2)
41192 {
41193 switch (elt)
41194 {
41195 case 0:
41196 tmp = vec;
41197 break;
41198
41199 case 1:
41200 case 3:
41201 tmp = gen_reg_rtx (mode);
41202 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
41203 GEN_INT (elt), GEN_INT (elt),
41204 GEN_INT (elt), GEN_INT (elt)));
41205 break;
41206
41207 case 2:
41208 tmp = gen_reg_rtx (mode);
41209 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
41210 break;
41211
41212 default:
41213 gcc_unreachable ();
41214 }
41215 vec = tmp;
41216 use_vec_extr = true;
41217 elt = 0;
41218 }
41219 else
41220 {
41221 /* For SSE1, we have to reuse the V4SF code. */
41222 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
41223 gen_lowpart (V4SFmode, vec), elt);
41224 return;
41225 }
41226 break;
41227
41228 case V8HImode:
41229 use_vec_extr = TARGET_SSE2;
41230 break;
41231 case V4HImode:
41232 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
41233 break;
41234
41235 case V16QImode:
41236 use_vec_extr = TARGET_SSE4_1;
41237 break;
41238
41239 case V8SFmode:
41240 if (TARGET_AVX)
41241 {
41242 tmp = gen_reg_rtx (V4SFmode);
41243 if (elt < 4)
41244 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
41245 else
41246 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
41247 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41248 return;
41249 }
41250 break;
41251
41252 case V4DFmode:
41253 if (TARGET_AVX)
41254 {
41255 tmp = gen_reg_rtx (V2DFmode);
41256 if (elt < 2)
41257 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
41258 else
41259 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
41260 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41261 return;
41262 }
41263 break;
41264
41265 case V32QImode:
41266 if (TARGET_AVX)
41267 {
41268 tmp = gen_reg_rtx (V16QImode);
41269 if (elt < 16)
41270 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
41271 else
41272 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
41273 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41274 return;
41275 }
41276 break;
41277
41278 case V16HImode:
41279 if (TARGET_AVX)
41280 {
41281 tmp = gen_reg_rtx (V8HImode);
41282 if (elt < 8)
41283 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
41284 else
41285 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
41286 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41287 return;
41288 }
41289 break;
41290
41291 case V8SImode:
41292 if (TARGET_AVX)
41293 {
41294 tmp = gen_reg_rtx (V4SImode);
41295 if (elt < 4)
41296 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
41297 else
41298 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
41299 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41300 return;
41301 }
41302 break;
41303
41304 case V4DImode:
41305 if (TARGET_AVX)
41306 {
41307 tmp = gen_reg_rtx (V2DImode);
41308 if (elt < 2)
41309 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
41310 else
41311 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
41312 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41313 return;
41314 }
41315 break;
41316
41317 case V32HImode:
41318 if (TARGET_AVX512BW)
41319 {
41320 tmp = gen_reg_rtx (V16HImode);
41321 if (elt < 16)
41322 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
41323 else
41324 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
41325 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41326 return;
41327 }
41328 break;
41329
41330 case V64QImode:
41331 if (TARGET_AVX512BW)
41332 {
41333 tmp = gen_reg_rtx (V32QImode);
41334 if (elt < 32)
41335 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
41336 else
41337 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
41338 ix86_expand_vector_extract (false, target, tmp, elt & 31);
41339 return;
41340 }
41341 break;
41342
41343 case V16SFmode:
41344 tmp = gen_reg_rtx (V8SFmode);
41345 if (elt < 8)
41346 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41347 else
41348 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41349 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41350 return;
41351
41352 case V8DFmode:
41353 tmp = gen_reg_rtx (V4DFmode);
41354 if (elt < 4)
41355 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41356 else
41357 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41358 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41359 return;
41360
41361 case V16SImode:
41362 tmp = gen_reg_rtx (V8SImode);
41363 if (elt < 8)
41364 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41365 else
41366 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41367 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41368 return;
41369
41370 case V8DImode:
41371 tmp = gen_reg_rtx (V4DImode);
41372 if (elt < 4)
41373 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41374 else
41375 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41376 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41377 return;
41378
41379 case V8QImode:
41380 /* ??? Could extract the appropriate HImode element and shift. */
41381 default:
41382 break;
41383 }
41384
41385 if (use_vec_extr)
41386 {
41387 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41388 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41389
41390 /* Let the rtl optimizers know about the zero extension performed. */
41391 if (inner_mode == QImode || inner_mode == HImode)
41392 {
41393 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41394 target = gen_lowpart (SImode, target);
41395 }
41396
41397 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41398 }
41399 else
41400 {
41401 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41402
41403 emit_move_insn (mem, vec);
41404
41405 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41406 emit_move_insn (target, tmp);
41407 }
41408 }
41409
41410 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41411 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41412 The upper bits of DEST are undefined, though they shouldn't cause
41413 exceptions (some bits from src or all zeros are ok). */
41414
41415 static void
41416 emit_reduc_half (rtx dest, rtx src, int i)
41417 {
41418 rtx tem, d = dest;
41419 switch (GET_MODE (src))
41420 {
41421 case V4SFmode:
41422 if (i == 128)
41423 tem = gen_sse_movhlps (dest, src, src);
41424 else
41425 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41426 GEN_INT (1 + 4), GEN_INT (1 + 4));
41427 break;
41428 case V2DFmode:
41429 tem = gen_vec_interleave_highv2df (dest, src, src);
41430 break;
41431 case V16QImode:
41432 case V8HImode:
41433 case V4SImode:
41434 case V2DImode:
41435 d = gen_reg_rtx (V1TImode);
41436 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41437 GEN_INT (i / 2));
41438 break;
41439 case V8SFmode:
41440 if (i == 256)
41441 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41442 else
41443 tem = gen_avx_shufps256 (dest, src, src,
41444 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41445 break;
41446 case V4DFmode:
41447 if (i == 256)
41448 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41449 else
41450 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41451 break;
41452 case V32QImode:
41453 case V16HImode:
41454 case V8SImode:
41455 case V4DImode:
41456 if (i == 256)
41457 {
41458 if (GET_MODE (dest) != V4DImode)
41459 d = gen_reg_rtx (V4DImode);
41460 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41461 gen_lowpart (V4DImode, src),
41462 const1_rtx);
41463 }
41464 else
41465 {
41466 d = gen_reg_rtx (V2TImode);
41467 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41468 GEN_INT (i / 2));
41469 }
41470 break;
41471 case V64QImode:
41472 case V32HImode:
41473 case V16SImode:
41474 case V16SFmode:
41475 case V8DImode:
41476 case V8DFmode:
41477 if (i > 128)
41478 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41479 gen_lowpart (V16SImode, src),
41480 gen_lowpart (V16SImode, src),
41481 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41482 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41483 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41484 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41485 GEN_INT (0xC), GEN_INT (0xD),
41486 GEN_INT (0xE), GEN_INT (0xF),
41487 GEN_INT (0x10), GEN_INT (0x11),
41488 GEN_INT (0x12), GEN_INT (0x13),
41489 GEN_INT (0x14), GEN_INT (0x15),
41490 GEN_INT (0x16), GEN_INT (0x17));
41491 else
41492 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41493 gen_lowpart (V16SImode, src),
41494 GEN_INT (i == 128 ? 0x2 : 0x1),
41495 GEN_INT (0x3),
41496 GEN_INT (0x3),
41497 GEN_INT (0x3),
41498 GEN_INT (i == 128 ? 0x6 : 0x5),
41499 GEN_INT (0x7),
41500 GEN_INT (0x7),
41501 GEN_INT (0x7),
41502 GEN_INT (i == 128 ? 0xA : 0x9),
41503 GEN_INT (0xB),
41504 GEN_INT (0xB),
41505 GEN_INT (0xB),
41506 GEN_INT (i == 128 ? 0xE : 0xD),
41507 GEN_INT (0xF),
41508 GEN_INT (0xF),
41509 GEN_INT (0xF));
41510 break;
41511 default:
41512 gcc_unreachable ();
41513 }
41514 emit_insn (tem);
41515 if (d != dest)
41516 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41517 }
41518
41519 /* Expand a vector reduction. FN is the binary pattern to reduce;
41520 DEST is the destination; IN is the input vector. */
41521
41522 void
41523 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41524 {
41525 rtx half, dst, vec = in;
41526 enum machine_mode mode = GET_MODE (in);
41527 int i;
41528
41529 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41530 if (TARGET_SSE4_1
41531 && mode == V8HImode
41532 && fn == gen_uminv8hi3)
41533 {
41534 emit_insn (gen_sse4_1_phminposuw (dest, in));
41535 return;
41536 }
41537
41538 for (i = GET_MODE_BITSIZE (mode);
41539 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41540 i >>= 1)
41541 {
41542 half = gen_reg_rtx (mode);
41543 emit_reduc_half (half, vec, i);
41544 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41545 dst = dest;
41546 else
41547 dst = gen_reg_rtx (mode);
41548 emit_insn (fn (dst, half, vec));
41549 vec = dst;
41550 }
41551 }
41552 \f
41553 /* Target hook for scalar_mode_supported_p. */
41554 static bool
41555 ix86_scalar_mode_supported_p (enum machine_mode mode)
41556 {
41557 if (DECIMAL_FLOAT_MODE_P (mode))
41558 return default_decimal_float_supported_p ();
41559 else if (mode == TFmode)
41560 return true;
41561 else
41562 return default_scalar_mode_supported_p (mode);
41563 }
41564
41565 /* Implements target hook vector_mode_supported_p. */
41566 static bool
41567 ix86_vector_mode_supported_p (enum machine_mode mode)
41568 {
41569 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41570 return true;
41571 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41572 return true;
41573 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41574 return true;
41575 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41576 return true;
41577 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41578 return true;
41579 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41580 return true;
41581 return false;
41582 }
41583
41584 /* Implement target hook libgcc_floating_mode_supported_p. */
41585 static bool
41586 ix86_libgcc_floating_mode_supported_p (enum machine_mode mode)
41587 {
41588 switch (mode)
41589 {
41590 case SFmode:
41591 case DFmode:
41592 case XFmode:
41593 return true;
41594
41595 case TFmode:
41596 #ifdef IX86_NO_LIBGCC_TFMODE
41597 return false;
41598 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
41599 return TARGET_LONG_DOUBLE_128;
41600 #else
41601 return true;
41602 #endif
41603
41604 default:
41605 return false;
41606 }
41607 }
41608
41609 /* Target hook for c_mode_for_suffix. */
41610 static enum machine_mode
41611 ix86_c_mode_for_suffix (char suffix)
41612 {
41613 if (suffix == 'q')
41614 return TFmode;
41615 if (suffix == 'w')
41616 return XFmode;
41617
41618 return VOIDmode;
41619 }
41620
41621 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41622
41623 We do this in the new i386 backend to maintain source compatibility
41624 with the old cc0-based compiler. */
41625
41626 static tree
41627 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41628 {
41629 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41630 clobbers);
41631 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41632 clobbers);
41633 return clobbers;
41634 }
41635
41636 /* Implements target vector targetm.asm.encode_section_info. */
41637
41638 static void ATTRIBUTE_UNUSED
41639 ix86_encode_section_info (tree decl, rtx rtl, int first)
41640 {
41641 default_encode_section_info (decl, rtl, first);
41642
41643 if (TREE_CODE (decl) == VAR_DECL
41644 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41645 && ix86_in_large_data_p (decl))
41646 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41647 }
41648
41649 /* Worker function for REVERSE_CONDITION. */
41650
41651 enum rtx_code
41652 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41653 {
41654 return (mode != CCFPmode && mode != CCFPUmode
41655 ? reverse_condition (code)
41656 : reverse_condition_maybe_unordered (code));
41657 }
41658
41659 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41660 to OPERANDS[0]. */
41661
41662 const char *
41663 output_387_reg_move (rtx insn, rtx *operands)
41664 {
41665 if (REG_P (operands[0]))
41666 {
41667 if (REG_P (operands[1])
41668 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41669 {
41670 if (REGNO (operands[0]) == FIRST_STACK_REG)
41671 return output_387_ffreep (operands, 0);
41672 return "fstp\t%y0";
41673 }
41674 if (STACK_TOP_P (operands[0]))
41675 return "fld%Z1\t%y1";
41676 return "fst\t%y0";
41677 }
41678 else if (MEM_P (operands[0]))
41679 {
41680 gcc_assert (REG_P (operands[1]));
41681 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41682 return "fstp%Z0\t%y0";
41683 else
41684 {
41685 /* There is no non-popping store to memory for XFmode.
41686 So if we need one, follow the store with a load. */
41687 if (GET_MODE (operands[0]) == XFmode)
41688 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41689 else
41690 return "fst%Z0\t%y0";
41691 }
41692 }
41693 else
41694 gcc_unreachable();
41695 }
41696
41697 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41698 FP status register is set. */
41699
41700 void
41701 ix86_emit_fp_unordered_jump (rtx label)
41702 {
41703 rtx reg = gen_reg_rtx (HImode);
41704 rtx temp;
41705
41706 emit_insn (gen_x86_fnstsw_1 (reg));
41707
41708 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41709 {
41710 emit_insn (gen_x86_sahf_1 (reg));
41711
41712 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41713 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41714 }
41715 else
41716 {
41717 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41718
41719 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41720 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41721 }
41722
41723 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41724 gen_rtx_LABEL_REF (VOIDmode, label),
41725 pc_rtx);
41726 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41727
41728 emit_jump_insn (temp);
41729 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41730 }
41731
41732 /* Output code to perform a log1p XFmode calculation. */
41733
41734 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41735 {
41736 rtx_code_label *label1 = gen_label_rtx ();
41737 rtx_code_label *label2 = gen_label_rtx ();
41738
41739 rtx tmp = gen_reg_rtx (XFmode);
41740 rtx tmp2 = gen_reg_rtx (XFmode);
41741 rtx test;
41742
41743 emit_insn (gen_absxf2 (tmp, op1));
41744 test = gen_rtx_GE (VOIDmode, tmp,
41745 CONST_DOUBLE_FROM_REAL_VALUE (
41746 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41747 XFmode));
41748 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41749
41750 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41751 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41752 emit_jump (label2);
41753
41754 emit_label (label1);
41755 emit_move_insn (tmp, CONST1_RTX (XFmode));
41756 emit_insn (gen_addxf3 (tmp, op1, tmp));
41757 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41758 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41759
41760 emit_label (label2);
41761 }
41762
41763 /* Emit code for round calculation. */
41764 void ix86_emit_i387_round (rtx op0, rtx op1)
41765 {
41766 enum machine_mode inmode = GET_MODE (op1);
41767 enum machine_mode outmode = GET_MODE (op0);
41768 rtx e1, e2, res, tmp, tmp1, half;
41769 rtx scratch = gen_reg_rtx (HImode);
41770 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41771 rtx_code_label *jump_label = gen_label_rtx ();
41772 rtx insn;
41773 rtx (*gen_abs) (rtx, rtx);
41774 rtx (*gen_neg) (rtx, rtx);
41775
41776 switch (inmode)
41777 {
41778 case SFmode:
41779 gen_abs = gen_abssf2;
41780 break;
41781 case DFmode:
41782 gen_abs = gen_absdf2;
41783 break;
41784 case XFmode:
41785 gen_abs = gen_absxf2;
41786 break;
41787 default:
41788 gcc_unreachable ();
41789 }
41790
41791 switch (outmode)
41792 {
41793 case SFmode:
41794 gen_neg = gen_negsf2;
41795 break;
41796 case DFmode:
41797 gen_neg = gen_negdf2;
41798 break;
41799 case XFmode:
41800 gen_neg = gen_negxf2;
41801 break;
41802 case HImode:
41803 gen_neg = gen_neghi2;
41804 break;
41805 case SImode:
41806 gen_neg = gen_negsi2;
41807 break;
41808 case DImode:
41809 gen_neg = gen_negdi2;
41810 break;
41811 default:
41812 gcc_unreachable ();
41813 }
41814
41815 e1 = gen_reg_rtx (inmode);
41816 e2 = gen_reg_rtx (inmode);
41817 res = gen_reg_rtx (outmode);
41818
41819 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41820
41821 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41822
41823 /* scratch = fxam(op1) */
41824 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41825 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41826 UNSPEC_FXAM)));
41827 /* e1 = fabs(op1) */
41828 emit_insn (gen_abs (e1, op1));
41829
41830 /* e2 = e1 + 0.5 */
41831 half = force_reg (inmode, half);
41832 emit_insn (gen_rtx_SET (VOIDmode, e2,
41833 gen_rtx_PLUS (inmode, e1, half)));
41834
41835 /* res = floor(e2) */
41836 if (inmode != XFmode)
41837 {
41838 tmp1 = gen_reg_rtx (XFmode);
41839
41840 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41841 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41842 }
41843 else
41844 tmp1 = e2;
41845
41846 switch (outmode)
41847 {
41848 case SFmode:
41849 case DFmode:
41850 {
41851 rtx tmp0 = gen_reg_rtx (XFmode);
41852
41853 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41854
41855 emit_insn (gen_rtx_SET (VOIDmode, res,
41856 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41857 UNSPEC_TRUNC_NOOP)));
41858 }
41859 break;
41860 case XFmode:
41861 emit_insn (gen_frndintxf2_floor (res, tmp1));
41862 break;
41863 case HImode:
41864 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41865 break;
41866 case SImode:
41867 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41868 break;
41869 case DImode:
41870 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41871 break;
41872 default:
41873 gcc_unreachable ();
41874 }
41875
41876 /* flags = signbit(a) */
41877 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41878
41879 /* if (flags) then res = -res */
41880 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41881 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41882 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41883 pc_rtx);
41884 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41885 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41886 JUMP_LABEL (insn) = jump_label;
41887
41888 emit_insn (gen_neg (res, res));
41889
41890 emit_label (jump_label);
41891 LABEL_NUSES (jump_label) = 1;
41892
41893 emit_move_insn (op0, res);
41894 }
41895
41896 /* Output code to perform a Newton-Rhapson approximation of a single precision
41897 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41898
41899 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41900 {
41901 rtx x0, x1, e0, e1;
41902
41903 x0 = gen_reg_rtx (mode);
41904 e0 = gen_reg_rtx (mode);
41905 e1 = gen_reg_rtx (mode);
41906 x1 = gen_reg_rtx (mode);
41907
41908 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41909
41910 b = force_reg (mode, b);
41911
41912 /* x0 = rcp(b) estimate */
41913 if (mode == V16SFmode || mode == V8DFmode)
41914 emit_insn (gen_rtx_SET (VOIDmode, x0,
41915 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41916 UNSPEC_RCP14)));
41917 else
41918 emit_insn (gen_rtx_SET (VOIDmode, x0,
41919 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41920 UNSPEC_RCP)));
41921
41922 /* e0 = x0 * b */
41923 emit_insn (gen_rtx_SET (VOIDmode, e0,
41924 gen_rtx_MULT (mode, x0, b)));
41925
41926 /* e0 = x0 * e0 */
41927 emit_insn (gen_rtx_SET (VOIDmode, e0,
41928 gen_rtx_MULT (mode, x0, e0)));
41929
41930 /* e1 = x0 + x0 */
41931 emit_insn (gen_rtx_SET (VOIDmode, e1,
41932 gen_rtx_PLUS (mode, x0, x0)));
41933
41934 /* x1 = e1 - e0 */
41935 emit_insn (gen_rtx_SET (VOIDmode, x1,
41936 gen_rtx_MINUS (mode, e1, e0)));
41937
41938 /* res = a * x1 */
41939 emit_insn (gen_rtx_SET (VOIDmode, res,
41940 gen_rtx_MULT (mode, a, x1)));
41941 }
41942
41943 /* Output code to perform a Newton-Rhapson approximation of a
41944 single precision floating point [reciprocal] square root. */
41945
41946 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41947 bool recip)
41948 {
41949 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41950 REAL_VALUE_TYPE r;
41951 int unspec;
41952
41953 x0 = gen_reg_rtx (mode);
41954 e0 = gen_reg_rtx (mode);
41955 e1 = gen_reg_rtx (mode);
41956 e2 = gen_reg_rtx (mode);
41957 e3 = gen_reg_rtx (mode);
41958
41959 real_from_integer (&r, VOIDmode, -3, SIGNED);
41960 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41961
41962 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41963 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41964 unspec = UNSPEC_RSQRT;
41965
41966 if (VECTOR_MODE_P (mode))
41967 {
41968 mthree = ix86_build_const_vector (mode, true, mthree);
41969 mhalf = ix86_build_const_vector (mode, true, mhalf);
41970 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41971 if (GET_MODE_SIZE (mode) == 64)
41972 unspec = UNSPEC_RSQRT14;
41973 }
41974
41975 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41976 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41977
41978 a = force_reg (mode, a);
41979
41980 /* x0 = rsqrt(a) estimate */
41981 emit_insn (gen_rtx_SET (VOIDmode, x0,
41982 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41983 unspec)));
41984
41985 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41986 if (!recip)
41987 {
41988 rtx zero, mask;
41989
41990 zero = gen_reg_rtx (mode);
41991 mask = gen_reg_rtx (mode);
41992
41993 zero = force_reg (mode, CONST0_RTX(mode));
41994
41995 /* Handle masked compare. */
41996 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41997 {
41998 mask = gen_reg_rtx (HImode);
41999 /* Imm value 0x4 corresponds to not-equal comparison. */
42000 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
42001 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
42002 }
42003 else
42004 {
42005 emit_insn (gen_rtx_SET (VOIDmode, mask,
42006 gen_rtx_NE (mode, zero, a)));
42007
42008 emit_insn (gen_rtx_SET (VOIDmode, x0,
42009 gen_rtx_AND (mode, x0, mask)));
42010 }
42011 }
42012
42013 /* e0 = x0 * a */
42014 emit_insn (gen_rtx_SET (VOIDmode, e0,
42015 gen_rtx_MULT (mode, x0, a)));
42016 /* e1 = e0 * x0 */
42017 emit_insn (gen_rtx_SET (VOIDmode, e1,
42018 gen_rtx_MULT (mode, e0, x0)));
42019
42020 /* e2 = e1 - 3. */
42021 mthree = force_reg (mode, mthree);
42022 emit_insn (gen_rtx_SET (VOIDmode, e2,
42023 gen_rtx_PLUS (mode, e1, mthree)));
42024
42025 mhalf = force_reg (mode, mhalf);
42026 if (recip)
42027 /* e3 = -.5 * x0 */
42028 emit_insn (gen_rtx_SET (VOIDmode, e3,
42029 gen_rtx_MULT (mode, x0, mhalf)));
42030 else
42031 /* e3 = -.5 * e0 */
42032 emit_insn (gen_rtx_SET (VOIDmode, e3,
42033 gen_rtx_MULT (mode, e0, mhalf)));
42034 /* ret = e2 * e3 */
42035 emit_insn (gen_rtx_SET (VOIDmode, res,
42036 gen_rtx_MULT (mode, e2, e3)));
42037 }
42038
42039 #ifdef TARGET_SOLARIS
42040 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
42041
42042 static void
42043 i386_solaris_elf_named_section (const char *name, unsigned int flags,
42044 tree decl)
42045 {
42046 /* With Binutils 2.15, the "@unwind" marker must be specified on
42047 every occurrence of the ".eh_frame" section, not just the first
42048 one. */
42049 if (TARGET_64BIT
42050 && strcmp (name, ".eh_frame") == 0)
42051 {
42052 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
42053 flags & SECTION_WRITE ? "aw" : "a");
42054 return;
42055 }
42056
42057 #ifndef USE_GAS
42058 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
42059 {
42060 solaris_elf_asm_comdat_section (name, flags, decl);
42061 return;
42062 }
42063 #endif
42064
42065 default_elf_asm_named_section (name, flags, decl);
42066 }
42067 #endif /* TARGET_SOLARIS */
42068
42069 /* Return the mangling of TYPE if it is an extended fundamental type. */
42070
42071 static const char *
42072 ix86_mangle_type (const_tree type)
42073 {
42074 type = TYPE_MAIN_VARIANT (type);
42075
42076 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
42077 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
42078 return NULL;
42079
42080 switch (TYPE_MODE (type))
42081 {
42082 case TFmode:
42083 /* __float128 is "g". */
42084 return "g";
42085 case XFmode:
42086 /* "long double" or __float80 is "e". */
42087 return "e";
42088 default:
42089 return NULL;
42090 }
42091 }
42092
42093 /* For 32-bit code we can save PIC register setup by using
42094 __stack_chk_fail_local hidden function instead of calling
42095 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
42096 register, so it is better to call __stack_chk_fail directly. */
42097
42098 static tree ATTRIBUTE_UNUSED
42099 ix86_stack_protect_fail (void)
42100 {
42101 return TARGET_64BIT
42102 ? default_external_stack_protect_fail ()
42103 : default_hidden_stack_protect_fail ();
42104 }
42105
42106 /* Select a format to encode pointers in exception handling data. CODE
42107 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
42108 true if the symbol may be affected by dynamic relocations.
42109
42110 ??? All x86 object file formats are capable of representing this.
42111 After all, the relocation needed is the same as for the call insn.
42112 Whether or not a particular assembler allows us to enter such, I
42113 guess we'll have to see. */
42114 int
42115 asm_preferred_eh_data_format (int code, int global)
42116 {
42117 if (flag_pic)
42118 {
42119 int type = DW_EH_PE_sdata8;
42120 if (!TARGET_64BIT
42121 || ix86_cmodel == CM_SMALL_PIC
42122 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
42123 type = DW_EH_PE_sdata4;
42124 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
42125 }
42126 if (ix86_cmodel == CM_SMALL
42127 || (ix86_cmodel == CM_MEDIUM && code))
42128 return DW_EH_PE_udata4;
42129 return DW_EH_PE_absptr;
42130 }
42131 \f
42132 /* Expand copysign from SIGN to the positive value ABS_VALUE
42133 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
42134 the sign-bit. */
42135 static void
42136 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
42137 {
42138 enum machine_mode mode = GET_MODE (sign);
42139 rtx sgn = gen_reg_rtx (mode);
42140 if (mask == NULL_RTX)
42141 {
42142 enum machine_mode vmode;
42143
42144 if (mode == SFmode)
42145 vmode = V4SFmode;
42146 else if (mode == DFmode)
42147 vmode = V2DFmode;
42148 else
42149 vmode = mode;
42150
42151 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
42152 if (!VECTOR_MODE_P (mode))
42153 {
42154 /* We need to generate a scalar mode mask in this case. */
42155 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42156 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42157 mask = gen_reg_rtx (mode);
42158 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42159 }
42160 }
42161 else
42162 mask = gen_rtx_NOT (mode, mask);
42163 emit_insn (gen_rtx_SET (VOIDmode, sgn,
42164 gen_rtx_AND (mode, mask, sign)));
42165 emit_insn (gen_rtx_SET (VOIDmode, result,
42166 gen_rtx_IOR (mode, abs_value, sgn)));
42167 }
42168
42169 /* Expand fabs (OP0) and return a new rtx that holds the result. The
42170 mask for masking out the sign-bit is stored in *SMASK, if that is
42171 non-null. */
42172 static rtx
42173 ix86_expand_sse_fabs (rtx op0, rtx *smask)
42174 {
42175 enum machine_mode vmode, mode = GET_MODE (op0);
42176 rtx xa, mask;
42177
42178 xa = gen_reg_rtx (mode);
42179 if (mode == SFmode)
42180 vmode = V4SFmode;
42181 else if (mode == DFmode)
42182 vmode = V2DFmode;
42183 else
42184 vmode = mode;
42185 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
42186 if (!VECTOR_MODE_P (mode))
42187 {
42188 /* We need to generate a scalar mode mask in this case. */
42189 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
42190 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
42191 mask = gen_reg_rtx (mode);
42192 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
42193 }
42194 emit_insn (gen_rtx_SET (VOIDmode, xa,
42195 gen_rtx_AND (mode, op0, mask)));
42196
42197 if (smask)
42198 *smask = mask;
42199
42200 return xa;
42201 }
42202
42203 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
42204 swapping the operands if SWAP_OPERANDS is true. The expanded
42205 code is a forward jump to a newly created label in case the
42206 comparison is true. The generated label rtx is returned. */
42207 static rtx_code_label *
42208 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
42209 bool swap_operands)
42210 {
42211 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
42212 rtx_code_label *label;
42213 rtx tmp;
42214
42215 if (swap_operands)
42216 {
42217 tmp = op0;
42218 op0 = op1;
42219 op1 = tmp;
42220 }
42221
42222 label = gen_label_rtx ();
42223 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
42224 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42225 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
42226 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
42227 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
42228 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
42229 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
42230 JUMP_LABEL (tmp) = label;
42231
42232 return label;
42233 }
42234
42235 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
42236 using comparison code CODE. Operands are swapped for the comparison if
42237 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
42238 static rtx
42239 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
42240 bool swap_operands)
42241 {
42242 rtx (*insn)(rtx, rtx, rtx, rtx);
42243 enum machine_mode mode = GET_MODE (op0);
42244 rtx mask = gen_reg_rtx (mode);
42245
42246 if (swap_operands)
42247 {
42248 rtx tmp = op0;
42249 op0 = op1;
42250 op1 = tmp;
42251 }
42252
42253 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
42254
42255 emit_insn (insn (mask, op0, op1,
42256 gen_rtx_fmt_ee (code, mode, op0, op1)));
42257 return mask;
42258 }
42259
42260 /* Generate and return a rtx of mode MODE for 2**n where n is the number
42261 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
42262 static rtx
42263 ix86_gen_TWO52 (enum machine_mode mode)
42264 {
42265 REAL_VALUE_TYPE TWO52r;
42266 rtx TWO52;
42267
42268 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
42269 TWO52 = const_double_from_real_value (TWO52r, mode);
42270 TWO52 = force_reg (mode, TWO52);
42271
42272 return TWO52;
42273 }
42274
42275 /* Expand SSE sequence for computing lround from OP1 storing
42276 into OP0. */
42277 void
42278 ix86_expand_lround (rtx op0, rtx op1)
42279 {
42280 /* C code for the stuff we're doing below:
42281 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
42282 return (long)tmp;
42283 */
42284 enum machine_mode mode = GET_MODE (op1);
42285 const struct real_format *fmt;
42286 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42287 rtx adj;
42288
42289 /* load nextafter (0.5, 0.0) */
42290 fmt = REAL_MODE_FORMAT (mode);
42291 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42292 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42293
42294 /* adj = copysign (0.5, op1) */
42295 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
42296 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
42297
42298 /* adj = op1 + adj */
42299 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
42300
42301 /* op0 = (imode)adj */
42302 expand_fix (op0, adj, 0);
42303 }
42304
42305 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
42306 into OPERAND0. */
42307 void
42308 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
42309 {
42310 /* C code for the stuff we're doing below (for do_floor):
42311 xi = (long)op1;
42312 xi -= (double)xi > op1 ? 1 : 0;
42313 return xi;
42314 */
42315 enum machine_mode fmode = GET_MODE (op1);
42316 enum machine_mode imode = GET_MODE (op0);
42317 rtx ireg, freg, tmp;
42318 rtx_code_label *label;
42319
42320 /* reg = (long)op1 */
42321 ireg = gen_reg_rtx (imode);
42322 expand_fix (ireg, op1, 0);
42323
42324 /* freg = (double)reg */
42325 freg = gen_reg_rtx (fmode);
42326 expand_float (freg, ireg, 0);
42327
42328 /* ireg = (freg > op1) ? ireg - 1 : ireg */
42329 label = ix86_expand_sse_compare_and_jump (UNLE,
42330 freg, op1, !do_floor);
42331 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
42332 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
42333 emit_move_insn (ireg, tmp);
42334
42335 emit_label (label);
42336 LABEL_NUSES (label) = 1;
42337
42338 emit_move_insn (op0, ireg);
42339 }
42340
42341 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42342 result in OPERAND0. */
42343 void
42344 ix86_expand_rint (rtx operand0, rtx operand1)
42345 {
42346 /* C code for the stuff we're doing below:
42347 xa = fabs (operand1);
42348 if (!isless (xa, 2**52))
42349 return operand1;
42350 xa = xa + 2**52 - 2**52;
42351 return copysign (xa, operand1);
42352 */
42353 enum machine_mode mode = GET_MODE (operand0);
42354 rtx res, xa, TWO52, mask;
42355 rtx_code_label *label;
42356
42357 res = gen_reg_rtx (mode);
42358 emit_move_insn (res, operand1);
42359
42360 /* xa = abs (operand1) */
42361 xa = ix86_expand_sse_fabs (res, &mask);
42362
42363 /* if (!isless (xa, TWO52)) goto label; */
42364 TWO52 = ix86_gen_TWO52 (mode);
42365 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42366
42367 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42368 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42369
42370 ix86_sse_copysign_to_positive (res, xa, res, mask);
42371
42372 emit_label (label);
42373 LABEL_NUSES (label) = 1;
42374
42375 emit_move_insn (operand0, res);
42376 }
42377
42378 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42379 into OPERAND0. */
42380 void
42381 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42382 {
42383 /* C code for the stuff we expand below.
42384 double xa = fabs (x), x2;
42385 if (!isless (xa, TWO52))
42386 return x;
42387 xa = xa + TWO52 - TWO52;
42388 x2 = copysign (xa, x);
42389 Compensate. Floor:
42390 if (x2 > x)
42391 x2 -= 1;
42392 Compensate. Ceil:
42393 if (x2 < x)
42394 x2 -= -1;
42395 return x2;
42396 */
42397 enum machine_mode mode = GET_MODE (operand0);
42398 rtx xa, TWO52, tmp, one, res, mask;
42399 rtx_code_label *label;
42400
42401 TWO52 = ix86_gen_TWO52 (mode);
42402
42403 /* Temporary for holding the result, initialized to the input
42404 operand to ease control flow. */
42405 res = gen_reg_rtx (mode);
42406 emit_move_insn (res, operand1);
42407
42408 /* xa = abs (operand1) */
42409 xa = ix86_expand_sse_fabs (res, &mask);
42410
42411 /* if (!isless (xa, TWO52)) goto label; */
42412 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42413
42414 /* xa = xa + TWO52 - TWO52; */
42415 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42416 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42417
42418 /* xa = copysign (xa, operand1) */
42419 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42420
42421 /* generate 1.0 or -1.0 */
42422 one = force_reg (mode,
42423 const_double_from_real_value (do_floor
42424 ? dconst1 : dconstm1, mode));
42425
42426 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42427 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42428 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42429 gen_rtx_AND (mode, one, tmp)));
42430 /* We always need to subtract here to preserve signed zero. */
42431 tmp = expand_simple_binop (mode, MINUS,
42432 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42433 emit_move_insn (res, tmp);
42434
42435 emit_label (label);
42436 LABEL_NUSES (label) = 1;
42437
42438 emit_move_insn (operand0, res);
42439 }
42440
42441 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42442 into OPERAND0. */
42443 void
42444 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42445 {
42446 /* C code for the stuff we expand below.
42447 double xa = fabs (x), x2;
42448 if (!isless (xa, TWO52))
42449 return x;
42450 x2 = (double)(long)x;
42451 Compensate. Floor:
42452 if (x2 > x)
42453 x2 -= 1;
42454 Compensate. Ceil:
42455 if (x2 < x)
42456 x2 += 1;
42457 if (HONOR_SIGNED_ZEROS (mode))
42458 return copysign (x2, x);
42459 return x2;
42460 */
42461 enum machine_mode mode = GET_MODE (operand0);
42462 rtx xa, xi, TWO52, tmp, one, res, mask;
42463 rtx_code_label *label;
42464
42465 TWO52 = ix86_gen_TWO52 (mode);
42466
42467 /* Temporary for holding the result, initialized to the input
42468 operand to ease control flow. */
42469 res = gen_reg_rtx (mode);
42470 emit_move_insn (res, operand1);
42471
42472 /* xa = abs (operand1) */
42473 xa = ix86_expand_sse_fabs (res, &mask);
42474
42475 /* if (!isless (xa, TWO52)) goto label; */
42476 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42477
42478 /* xa = (double)(long)x */
42479 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42480 expand_fix (xi, res, 0);
42481 expand_float (xa, xi, 0);
42482
42483 /* generate 1.0 */
42484 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42485
42486 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42487 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42488 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42489 gen_rtx_AND (mode, one, tmp)));
42490 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42491 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42492 emit_move_insn (res, tmp);
42493
42494 if (HONOR_SIGNED_ZEROS (mode))
42495 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42496
42497 emit_label (label);
42498 LABEL_NUSES (label) = 1;
42499
42500 emit_move_insn (operand0, res);
42501 }
42502
42503 /* Expand SSE sequence for computing round from OPERAND1 storing
42504 into OPERAND0. Sequence that works without relying on DImode truncation
42505 via cvttsd2siq that is only available on 64bit targets. */
42506 void
42507 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42508 {
42509 /* C code for the stuff we expand below.
42510 double xa = fabs (x), xa2, x2;
42511 if (!isless (xa, TWO52))
42512 return x;
42513 Using the absolute value and copying back sign makes
42514 -0.0 -> -0.0 correct.
42515 xa2 = xa + TWO52 - TWO52;
42516 Compensate.
42517 dxa = xa2 - xa;
42518 if (dxa <= -0.5)
42519 xa2 += 1;
42520 else if (dxa > 0.5)
42521 xa2 -= 1;
42522 x2 = copysign (xa2, x);
42523 return x2;
42524 */
42525 enum machine_mode mode = GET_MODE (operand0);
42526 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42527 rtx_code_label *label;
42528
42529 TWO52 = ix86_gen_TWO52 (mode);
42530
42531 /* Temporary for holding the result, initialized to the input
42532 operand to ease control flow. */
42533 res = gen_reg_rtx (mode);
42534 emit_move_insn (res, operand1);
42535
42536 /* xa = abs (operand1) */
42537 xa = ix86_expand_sse_fabs (res, &mask);
42538
42539 /* if (!isless (xa, TWO52)) goto label; */
42540 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42541
42542 /* xa2 = xa + TWO52 - TWO52; */
42543 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42544 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42545
42546 /* dxa = xa2 - xa; */
42547 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42548
42549 /* generate 0.5, 1.0 and -0.5 */
42550 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42551 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42552 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42553 0, OPTAB_DIRECT);
42554
42555 /* Compensate. */
42556 tmp = gen_reg_rtx (mode);
42557 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42558 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42559 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42560 gen_rtx_AND (mode, one, tmp)));
42561 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42562 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42563 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42564 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42565 gen_rtx_AND (mode, one, tmp)));
42566 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42567
42568 /* res = copysign (xa2, operand1) */
42569 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42570
42571 emit_label (label);
42572 LABEL_NUSES (label) = 1;
42573
42574 emit_move_insn (operand0, res);
42575 }
42576
42577 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42578 into OPERAND0. */
42579 void
42580 ix86_expand_trunc (rtx operand0, rtx operand1)
42581 {
42582 /* C code for SSE variant we expand below.
42583 double xa = fabs (x), x2;
42584 if (!isless (xa, TWO52))
42585 return x;
42586 x2 = (double)(long)x;
42587 if (HONOR_SIGNED_ZEROS (mode))
42588 return copysign (x2, x);
42589 return x2;
42590 */
42591 enum machine_mode mode = GET_MODE (operand0);
42592 rtx xa, xi, TWO52, res, mask;
42593 rtx_code_label *label;
42594
42595 TWO52 = ix86_gen_TWO52 (mode);
42596
42597 /* Temporary for holding the result, initialized to the input
42598 operand to ease control flow. */
42599 res = gen_reg_rtx (mode);
42600 emit_move_insn (res, operand1);
42601
42602 /* xa = abs (operand1) */
42603 xa = ix86_expand_sse_fabs (res, &mask);
42604
42605 /* if (!isless (xa, TWO52)) goto label; */
42606 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42607
42608 /* x = (double)(long)x */
42609 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42610 expand_fix (xi, res, 0);
42611 expand_float (res, xi, 0);
42612
42613 if (HONOR_SIGNED_ZEROS (mode))
42614 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42615
42616 emit_label (label);
42617 LABEL_NUSES (label) = 1;
42618
42619 emit_move_insn (operand0, res);
42620 }
42621
42622 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42623 into OPERAND0. */
42624 void
42625 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42626 {
42627 enum machine_mode mode = GET_MODE (operand0);
42628 rtx xa, mask, TWO52, one, res, smask, tmp;
42629 rtx_code_label *label;
42630
42631 /* C code for SSE variant we expand below.
42632 double xa = fabs (x), x2;
42633 if (!isless (xa, TWO52))
42634 return x;
42635 xa2 = xa + TWO52 - TWO52;
42636 Compensate:
42637 if (xa2 > xa)
42638 xa2 -= 1.0;
42639 x2 = copysign (xa2, x);
42640 return x2;
42641 */
42642
42643 TWO52 = ix86_gen_TWO52 (mode);
42644
42645 /* Temporary for holding the result, initialized to the input
42646 operand to ease control flow. */
42647 res = gen_reg_rtx (mode);
42648 emit_move_insn (res, operand1);
42649
42650 /* xa = abs (operand1) */
42651 xa = ix86_expand_sse_fabs (res, &smask);
42652
42653 /* if (!isless (xa, TWO52)) goto label; */
42654 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42655
42656 /* res = xa + TWO52 - TWO52; */
42657 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42658 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42659 emit_move_insn (res, tmp);
42660
42661 /* generate 1.0 */
42662 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42663
42664 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42665 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42666 emit_insn (gen_rtx_SET (VOIDmode, mask,
42667 gen_rtx_AND (mode, mask, one)));
42668 tmp = expand_simple_binop (mode, MINUS,
42669 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42670 emit_move_insn (res, tmp);
42671
42672 /* res = copysign (res, operand1) */
42673 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42674
42675 emit_label (label);
42676 LABEL_NUSES (label) = 1;
42677
42678 emit_move_insn (operand0, res);
42679 }
42680
42681 /* Expand SSE sequence for computing round from OPERAND1 storing
42682 into OPERAND0. */
42683 void
42684 ix86_expand_round (rtx operand0, rtx operand1)
42685 {
42686 /* C code for the stuff we're doing below:
42687 double xa = fabs (x);
42688 if (!isless (xa, TWO52))
42689 return x;
42690 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42691 return copysign (xa, x);
42692 */
42693 enum machine_mode mode = GET_MODE (operand0);
42694 rtx res, TWO52, xa, xi, half, mask;
42695 rtx_code_label *label;
42696 const struct real_format *fmt;
42697 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42698
42699 /* Temporary for holding the result, initialized to the input
42700 operand to ease control flow. */
42701 res = gen_reg_rtx (mode);
42702 emit_move_insn (res, operand1);
42703
42704 TWO52 = ix86_gen_TWO52 (mode);
42705 xa = ix86_expand_sse_fabs (res, &mask);
42706 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42707
42708 /* load nextafter (0.5, 0.0) */
42709 fmt = REAL_MODE_FORMAT (mode);
42710 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42711 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42712
42713 /* xa = xa + 0.5 */
42714 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42715 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42716
42717 /* xa = (double)(int64_t)xa */
42718 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42719 expand_fix (xi, xa, 0);
42720 expand_float (xa, xi, 0);
42721
42722 /* res = copysign (xa, operand1) */
42723 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42724
42725 emit_label (label);
42726 LABEL_NUSES (label) = 1;
42727
42728 emit_move_insn (operand0, res);
42729 }
42730
42731 /* Expand SSE sequence for computing round
42732 from OP1 storing into OP0 using sse4 round insn. */
42733 void
42734 ix86_expand_round_sse4 (rtx op0, rtx op1)
42735 {
42736 enum machine_mode mode = GET_MODE (op0);
42737 rtx e1, e2, res, half;
42738 const struct real_format *fmt;
42739 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42740 rtx (*gen_copysign) (rtx, rtx, rtx);
42741 rtx (*gen_round) (rtx, rtx, rtx);
42742
42743 switch (mode)
42744 {
42745 case SFmode:
42746 gen_copysign = gen_copysignsf3;
42747 gen_round = gen_sse4_1_roundsf2;
42748 break;
42749 case DFmode:
42750 gen_copysign = gen_copysigndf3;
42751 gen_round = gen_sse4_1_rounddf2;
42752 break;
42753 default:
42754 gcc_unreachable ();
42755 }
42756
42757 /* round (a) = trunc (a + copysign (0.5, a)) */
42758
42759 /* load nextafter (0.5, 0.0) */
42760 fmt = REAL_MODE_FORMAT (mode);
42761 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42762 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42763 half = const_double_from_real_value (pred_half, mode);
42764
42765 /* e1 = copysign (0.5, op1) */
42766 e1 = gen_reg_rtx (mode);
42767 emit_insn (gen_copysign (e1, half, op1));
42768
42769 /* e2 = op1 + e1 */
42770 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42771
42772 /* res = trunc (e2) */
42773 res = gen_reg_rtx (mode);
42774 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42775
42776 emit_move_insn (op0, res);
42777 }
42778 \f
42779
42780 /* Table of valid machine attributes. */
42781 static const struct attribute_spec ix86_attribute_table[] =
42782 {
42783 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42784 affects_type_identity } */
42785 /* Stdcall attribute says callee is responsible for popping arguments
42786 if they are not variable. */
42787 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42788 true },
42789 /* Fastcall attribute says callee is responsible for popping arguments
42790 if they are not variable. */
42791 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42792 true },
42793 /* Thiscall attribute says callee is responsible for popping arguments
42794 if they are not variable. */
42795 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42796 true },
42797 /* Cdecl attribute says the callee is a normal C declaration */
42798 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42799 true },
42800 /* Regparm attribute specifies how many integer arguments are to be
42801 passed in registers. */
42802 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42803 true },
42804 /* Sseregparm attribute says we are using x86_64 calling conventions
42805 for FP arguments. */
42806 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42807 true },
42808 /* The transactional memory builtins are implicitly regparm or fastcall
42809 depending on the ABI. Override the generic do-nothing attribute that
42810 these builtins were declared with. */
42811 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42812 true },
42813 /* force_align_arg_pointer says this function realigns the stack at entry. */
42814 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42815 false, true, true, ix86_handle_cconv_attribute, false },
42816 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42817 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42818 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42819 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42820 false },
42821 #endif
42822 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42823 false },
42824 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42825 false },
42826 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42827 SUBTARGET_ATTRIBUTE_TABLE,
42828 #endif
42829 /* ms_abi and sysv_abi calling convention function attributes. */
42830 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42831 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42832 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42833 false },
42834 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42835 ix86_handle_callee_pop_aggregate_return, true },
42836 /* End element. */
42837 { NULL, 0, 0, false, false, false, NULL, false }
42838 };
42839
42840 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42841 static int
42842 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42843 tree vectype, int)
42844 {
42845 unsigned elements;
42846
42847 switch (type_of_cost)
42848 {
42849 case scalar_stmt:
42850 return ix86_cost->scalar_stmt_cost;
42851
42852 case scalar_load:
42853 return ix86_cost->scalar_load_cost;
42854
42855 case scalar_store:
42856 return ix86_cost->scalar_store_cost;
42857
42858 case vector_stmt:
42859 return ix86_cost->vec_stmt_cost;
42860
42861 case vector_load:
42862 return ix86_cost->vec_align_load_cost;
42863
42864 case vector_store:
42865 return ix86_cost->vec_store_cost;
42866
42867 case vec_to_scalar:
42868 return ix86_cost->vec_to_scalar_cost;
42869
42870 case scalar_to_vec:
42871 return ix86_cost->scalar_to_vec_cost;
42872
42873 case unaligned_load:
42874 case unaligned_store:
42875 return ix86_cost->vec_unalign_load_cost;
42876
42877 case cond_branch_taken:
42878 return ix86_cost->cond_taken_branch_cost;
42879
42880 case cond_branch_not_taken:
42881 return ix86_cost->cond_not_taken_branch_cost;
42882
42883 case vec_perm:
42884 case vec_promote_demote:
42885 return ix86_cost->vec_stmt_cost;
42886
42887 case vec_construct:
42888 elements = TYPE_VECTOR_SUBPARTS (vectype);
42889 return elements / 2 + 1;
42890
42891 default:
42892 gcc_unreachable ();
42893 }
42894 }
42895
42896 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42897 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42898 insn every time. */
42899
42900 static GTY(()) rtx_insn *vselect_insn;
42901
42902 /* Initialize vselect_insn. */
42903
42904 static void
42905 init_vselect_insn (void)
42906 {
42907 unsigned i;
42908 rtx x;
42909
42910 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42911 for (i = 0; i < MAX_VECT_LEN; ++i)
42912 XVECEXP (x, 0, i) = const0_rtx;
42913 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42914 const0_rtx), x);
42915 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42916 start_sequence ();
42917 vselect_insn = emit_insn (x);
42918 end_sequence ();
42919 }
42920
42921 /* Construct (set target (vec_select op0 (parallel perm))) and
42922 return true if that's a valid instruction in the active ISA. */
42923
42924 static bool
42925 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42926 unsigned nelt, bool testing_p)
42927 {
42928 unsigned int i;
42929 rtx x, save_vconcat;
42930 int icode;
42931
42932 if (vselect_insn == NULL_RTX)
42933 init_vselect_insn ();
42934
42935 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42936 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42937 for (i = 0; i < nelt; ++i)
42938 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42939 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42940 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42941 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42942 SET_DEST (PATTERN (vselect_insn)) = target;
42943 icode = recog_memoized (vselect_insn);
42944
42945 if (icode >= 0 && !testing_p)
42946 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42947
42948 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42949 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42950 INSN_CODE (vselect_insn) = -1;
42951
42952 return icode >= 0;
42953 }
42954
42955 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42956
42957 static bool
42958 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42959 const unsigned char *perm, unsigned nelt,
42960 bool testing_p)
42961 {
42962 enum machine_mode v2mode;
42963 rtx x;
42964 bool ok;
42965
42966 if (vselect_insn == NULL_RTX)
42967 init_vselect_insn ();
42968
42969 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42970 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42971 PUT_MODE (x, v2mode);
42972 XEXP (x, 0) = op0;
42973 XEXP (x, 1) = op1;
42974 ok = expand_vselect (target, x, perm, nelt, testing_p);
42975 XEXP (x, 0) = const0_rtx;
42976 XEXP (x, 1) = const0_rtx;
42977 return ok;
42978 }
42979
42980 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42981 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42982
42983 static bool
42984 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42985 {
42986 enum machine_mode vmode = d->vmode;
42987 unsigned i, mask, nelt = d->nelt;
42988 rtx target, op0, op1, x;
42989 rtx rperm[32], vperm;
42990
42991 if (d->one_operand_p)
42992 return false;
42993 if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
42994 && GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4)
42995 ;
42996 else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42997 ;
42998 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42999 ;
43000 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43001 ;
43002 else
43003 return false;
43004
43005 /* This is a blend, not a permute. Elements must stay in their
43006 respective lanes. */
43007 for (i = 0; i < nelt; ++i)
43008 {
43009 unsigned e = d->perm[i];
43010 if (!(e == i || e == i + nelt))
43011 return false;
43012 }
43013
43014 if (d->testing_p)
43015 return true;
43016
43017 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
43018 decision should be extracted elsewhere, so that we only try that
43019 sequence once all budget==3 options have been tried. */
43020 target = d->target;
43021 op0 = d->op0;
43022 op1 = d->op1;
43023 mask = 0;
43024
43025 switch (vmode)
43026 {
43027 case V8DFmode:
43028 case V16SFmode:
43029 case V4DFmode:
43030 case V8SFmode:
43031 case V2DFmode:
43032 case V4SFmode:
43033 case V8HImode:
43034 case V8SImode:
43035 case V32HImode:
43036 case V64QImode:
43037 case V16SImode:
43038 case V8DImode:
43039 for (i = 0; i < nelt; ++i)
43040 mask |= (d->perm[i] >= nelt) << i;
43041 break;
43042
43043 case V2DImode:
43044 for (i = 0; i < 2; ++i)
43045 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
43046 vmode = V8HImode;
43047 goto do_subreg;
43048
43049 case V4SImode:
43050 for (i = 0; i < 4; ++i)
43051 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
43052 vmode = V8HImode;
43053 goto do_subreg;
43054
43055 case V16QImode:
43056 /* See if bytes move in pairs so we can use pblendw with
43057 an immediate argument, rather than pblendvb with a vector
43058 argument. */
43059 for (i = 0; i < 16; i += 2)
43060 if (d->perm[i] + 1 != d->perm[i + 1])
43061 {
43062 use_pblendvb:
43063 for (i = 0; i < nelt; ++i)
43064 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
43065
43066 finish_pblendvb:
43067 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
43068 vperm = force_reg (vmode, vperm);
43069
43070 if (GET_MODE_SIZE (vmode) == 16)
43071 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
43072 else
43073 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
43074 if (target != d->target)
43075 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43076 return true;
43077 }
43078
43079 for (i = 0; i < 8; ++i)
43080 mask |= (d->perm[i * 2] >= 16) << i;
43081 vmode = V8HImode;
43082 /* FALLTHRU */
43083
43084 do_subreg:
43085 target = gen_reg_rtx (vmode);
43086 op0 = gen_lowpart (vmode, op0);
43087 op1 = gen_lowpart (vmode, op1);
43088 break;
43089
43090 case V32QImode:
43091 /* See if bytes move in pairs. If not, vpblendvb must be used. */
43092 for (i = 0; i < 32; i += 2)
43093 if (d->perm[i] + 1 != d->perm[i + 1])
43094 goto use_pblendvb;
43095 /* See if bytes move in quadruplets. If yes, vpblendd
43096 with immediate can be used. */
43097 for (i = 0; i < 32; i += 4)
43098 if (d->perm[i] + 2 != d->perm[i + 2])
43099 break;
43100 if (i < 32)
43101 {
43102 /* See if bytes move the same in both lanes. If yes,
43103 vpblendw with immediate can be used. */
43104 for (i = 0; i < 16; i += 2)
43105 if (d->perm[i] + 16 != d->perm[i + 16])
43106 goto use_pblendvb;
43107
43108 /* Use vpblendw. */
43109 for (i = 0; i < 16; ++i)
43110 mask |= (d->perm[i * 2] >= 32) << i;
43111 vmode = V16HImode;
43112 goto do_subreg;
43113 }
43114
43115 /* Use vpblendd. */
43116 for (i = 0; i < 8; ++i)
43117 mask |= (d->perm[i * 4] >= 32) << i;
43118 vmode = V8SImode;
43119 goto do_subreg;
43120
43121 case V16HImode:
43122 /* See if words move in pairs. If yes, vpblendd can be used. */
43123 for (i = 0; i < 16; i += 2)
43124 if (d->perm[i] + 1 != d->perm[i + 1])
43125 break;
43126 if (i < 16)
43127 {
43128 /* See if words move the same in both lanes. If not,
43129 vpblendvb must be used. */
43130 for (i = 0; i < 8; i++)
43131 if (d->perm[i] + 8 != d->perm[i + 8])
43132 {
43133 /* Use vpblendvb. */
43134 for (i = 0; i < 32; ++i)
43135 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
43136
43137 vmode = V32QImode;
43138 nelt = 32;
43139 target = gen_reg_rtx (vmode);
43140 op0 = gen_lowpart (vmode, op0);
43141 op1 = gen_lowpart (vmode, op1);
43142 goto finish_pblendvb;
43143 }
43144
43145 /* Use vpblendw. */
43146 for (i = 0; i < 16; ++i)
43147 mask |= (d->perm[i] >= 16) << i;
43148 break;
43149 }
43150
43151 /* Use vpblendd. */
43152 for (i = 0; i < 8; ++i)
43153 mask |= (d->perm[i * 2] >= 16) << i;
43154 vmode = V8SImode;
43155 goto do_subreg;
43156
43157 case V4DImode:
43158 /* Use vpblendd. */
43159 for (i = 0; i < 4; ++i)
43160 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
43161 vmode = V8SImode;
43162 goto do_subreg;
43163
43164 default:
43165 gcc_unreachable ();
43166 }
43167
43168 /* This matches five different patterns with the different modes. */
43169 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
43170 x = gen_rtx_SET (VOIDmode, target, x);
43171 emit_insn (x);
43172 if (target != d->target)
43173 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43174
43175 return true;
43176 }
43177
43178 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43179 in terms of the variable form of vpermilps.
43180
43181 Note that we will have already failed the immediate input vpermilps,
43182 which requires that the high and low part shuffle be identical; the
43183 variable form doesn't require that. */
43184
43185 static bool
43186 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
43187 {
43188 rtx rperm[8], vperm;
43189 unsigned i;
43190
43191 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
43192 return false;
43193
43194 /* We can only permute within the 128-bit lane. */
43195 for (i = 0; i < 8; ++i)
43196 {
43197 unsigned e = d->perm[i];
43198 if (i < 4 ? e >= 4 : e < 4)
43199 return false;
43200 }
43201
43202 if (d->testing_p)
43203 return true;
43204
43205 for (i = 0; i < 8; ++i)
43206 {
43207 unsigned e = d->perm[i];
43208
43209 /* Within each 128-bit lane, the elements of op0 are numbered
43210 from 0 and the elements of op1 are numbered from 4. */
43211 if (e >= 8 + 4)
43212 e -= 8;
43213 else if (e >= 4)
43214 e -= 4;
43215
43216 rperm[i] = GEN_INT (e);
43217 }
43218
43219 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
43220 vperm = force_reg (V8SImode, vperm);
43221 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
43222
43223 return true;
43224 }
43225
43226 /* Return true if permutation D can be performed as VMODE permutation
43227 instead. */
43228
43229 static bool
43230 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
43231 {
43232 unsigned int i, j, chunk;
43233
43234 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
43235 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
43236 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
43237 return false;
43238
43239 if (GET_MODE_NUNITS (vmode) >= d->nelt)
43240 return true;
43241
43242 chunk = d->nelt / GET_MODE_NUNITS (vmode);
43243 for (i = 0; i < d->nelt; i += chunk)
43244 if (d->perm[i] & (chunk - 1))
43245 return false;
43246 else
43247 for (j = 1; j < chunk; ++j)
43248 if (d->perm[i] + j != d->perm[i + j])
43249 return false;
43250
43251 return true;
43252 }
43253
43254 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43255 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
43256
43257 static bool
43258 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
43259 {
43260 unsigned i, nelt, eltsz, mask;
43261 unsigned char perm[64];
43262 enum machine_mode vmode = V16QImode;
43263 rtx rperm[64], vperm, target, op0, op1;
43264
43265 nelt = d->nelt;
43266
43267 if (!d->one_operand_p)
43268 {
43269 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
43270 {
43271 if (TARGET_AVX2
43272 && valid_perm_using_mode_p (V2TImode, d))
43273 {
43274 if (d->testing_p)
43275 return true;
43276
43277 /* Use vperm2i128 insn. The pattern uses
43278 V4DImode instead of V2TImode. */
43279 target = d->target;
43280 if (d->vmode != V4DImode)
43281 target = gen_reg_rtx (V4DImode);
43282 op0 = gen_lowpart (V4DImode, d->op0);
43283 op1 = gen_lowpart (V4DImode, d->op1);
43284 rperm[0]
43285 = GEN_INT ((d->perm[0] / (nelt / 2))
43286 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
43287 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
43288 if (target != d->target)
43289 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43290 return true;
43291 }
43292 return false;
43293 }
43294 }
43295 else
43296 {
43297 if (GET_MODE_SIZE (d->vmode) == 16)
43298 {
43299 if (!TARGET_SSSE3)
43300 return false;
43301 }
43302 else if (GET_MODE_SIZE (d->vmode) == 32)
43303 {
43304 if (!TARGET_AVX2)
43305 return false;
43306
43307 /* V4DImode should be already handled through
43308 expand_vselect by vpermq instruction. */
43309 gcc_assert (d->vmode != V4DImode);
43310
43311 vmode = V32QImode;
43312 if (d->vmode == V8SImode
43313 || d->vmode == V16HImode
43314 || d->vmode == V32QImode)
43315 {
43316 /* First see if vpermq can be used for
43317 V8SImode/V16HImode/V32QImode. */
43318 if (valid_perm_using_mode_p (V4DImode, d))
43319 {
43320 for (i = 0; i < 4; i++)
43321 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
43322 if (d->testing_p)
43323 return true;
43324 target = gen_reg_rtx (V4DImode);
43325 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
43326 perm, 4, false))
43327 {
43328 emit_move_insn (d->target,
43329 gen_lowpart (d->vmode, target));
43330 return true;
43331 }
43332 return false;
43333 }
43334
43335 /* Next see if vpermd can be used. */
43336 if (valid_perm_using_mode_p (V8SImode, d))
43337 vmode = V8SImode;
43338 }
43339 /* Or if vpermps can be used. */
43340 else if (d->vmode == V8SFmode)
43341 vmode = V8SImode;
43342
43343 if (vmode == V32QImode)
43344 {
43345 /* vpshufb only works intra lanes, it is not
43346 possible to shuffle bytes in between the lanes. */
43347 for (i = 0; i < nelt; ++i)
43348 if ((d->perm[i] ^ i) & (nelt / 2))
43349 return false;
43350 }
43351 }
43352 else if (GET_MODE_SIZE (d->vmode) == 64)
43353 {
43354 if (!TARGET_AVX512BW)
43355 return false;
43356 if (vmode == V64QImode)
43357 {
43358 /* vpshufb only works intra lanes, it is not
43359 possible to shuffle bytes in between the lanes. */
43360 for (i = 0; i < nelt; ++i)
43361 if ((d->perm[i] ^ i) & (nelt / 4))
43362 return false;
43363 }
43364 }
43365 else
43366 return false;
43367 }
43368
43369 if (d->testing_p)
43370 return true;
43371
43372 if (vmode == V8SImode)
43373 for (i = 0; i < 8; ++i)
43374 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
43375 else
43376 {
43377 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43378 if (!d->one_operand_p)
43379 mask = 2 * nelt - 1;
43380 else if (vmode == V16QImode)
43381 mask = nelt - 1;
43382 else if (vmode == V64QImode)
43383 mask = nelt / 4 - 1;
43384 else
43385 mask = nelt / 2 - 1;
43386
43387 for (i = 0; i < nelt; ++i)
43388 {
43389 unsigned j, e = d->perm[i] & mask;
43390 for (j = 0; j < eltsz; ++j)
43391 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43392 }
43393 }
43394
43395 vperm = gen_rtx_CONST_VECTOR (vmode,
43396 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43397 vperm = force_reg (vmode, vperm);
43398
43399 target = d->target;
43400 if (d->vmode != vmode)
43401 target = gen_reg_rtx (vmode);
43402 op0 = gen_lowpart (vmode, d->op0);
43403 if (d->one_operand_p)
43404 {
43405 if (vmode == V16QImode)
43406 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43407 else if (vmode == V32QImode)
43408 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43409 else if (vmode == V64QImode)
43410 emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
43411 else if (vmode == V8SFmode)
43412 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43413 else
43414 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43415 }
43416 else
43417 {
43418 op1 = gen_lowpart (vmode, d->op1);
43419 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43420 }
43421 if (target != d->target)
43422 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43423
43424 return true;
43425 }
43426
43427 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43428 in a single instruction. */
43429
43430 static bool
43431 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43432 {
43433 unsigned i, nelt = d->nelt;
43434 unsigned char perm2[MAX_VECT_LEN];
43435
43436 /* Check plain VEC_SELECT first, because AVX has instructions that could
43437 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43438 input where SEL+CONCAT may not. */
43439 if (d->one_operand_p)
43440 {
43441 int mask = nelt - 1;
43442 bool identity_perm = true;
43443 bool broadcast_perm = true;
43444
43445 for (i = 0; i < nelt; i++)
43446 {
43447 perm2[i] = d->perm[i] & mask;
43448 if (perm2[i] != i)
43449 identity_perm = false;
43450 if (perm2[i])
43451 broadcast_perm = false;
43452 }
43453
43454 if (identity_perm)
43455 {
43456 if (!d->testing_p)
43457 emit_move_insn (d->target, d->op0);
43458 return true;
43459 }
43460 else if (broadcast_perm && TARGET_AVX2)
43461 {
43462 /* Use vpbroadcast{b,w,d}. */
43463 rtx (*gen) (rtx, rtx) = NULL;
43464 switch (d->vmode)
43465 {
43466 case V64QImode:
43467 if (TARGET_AVX512BW)
43468 gen = gen_avx512bw_vec_dupv64qi;
43469 break;
43470 case V32QImode:
43471 gen = gen_avx2_pbroadcastv32qi_1;
43472 break;
43473 case V32HImode:
43474 if (TARGET_AVX512BW)
43475 gen = gen_avx512bw_vec_dupv32hi;
43476 break;
43477 case V16HImode:
43478 gen = gen_avx2_pbroadcastv16hi_1;
43479 break;
43480 case V16SImode:
43481 if (TARGET_AVX512F)
43482 gen = gen_avx512f_vec_dupv16si;
43483 break;
43484 case V8SImode:
43485 gen = gen_avx2_pbroadcastv8si_1;
43486 break;
43487 case V16QImode:
43488 gen = gen_avx2_pbroadcastv16qi;
43489 break;
43490 case V8HImode:
43491 gen = gen_avx2_pbroadcastv8hi;
43492 break;
43493 case V16SFmode:
43494 if (TARGET_AVX512F)
43495 gen = gen_avx512f_vec_dupv16sf;
43496 break;
43497 case V8SFmode:
43498 gen = gen_avx2_vec_dupv8sf_1;
43499 break;
43500 case V8DFmode:
43501 if (TARGET_AVX512F)
43502 gen = gen_avx512f_vec_dupv8df;
43503 break;
43504 case V8DImode:
43505 if (TARGET_AVX512F)
43506 gen = gen_avx512f_vec_dupv8di;
43507 break;
43508 /* For other modes prefer other shuffles this function creates. */
43509 default: break;
43510 }
43511 if (gen != NULL)
43512 {
43513 if (!d->testing_p)
43514 emit_insn (gen (d->target, d->op0));
43515 return true;
43516 }
43517 }
43518
43519 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43520 return true;
43521
43522 /* There are plenty of patterns in sse.md that are written for
43523 SEL+CONCAT and are not replicated for a single op. Perhaps
43524 that should be changed, to avoid the nastiness here. */
43525
43526 /* Recognize interleave style patterns, which means incrementing
43527 every other permutation operand. */
43528 for (i = 0; i < nelt; i += 2)
43529 {
43530 perm2[i] = d->perm[i] & mask;
43531 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43532 }
43533 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43534 d->testing_p))
43535 return true;
43536
43537 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43538 if (nelt >= 4)
43539 {
43540 for (i = 0; i < nelt; i += 4)
43541 {
43542 perm2[i + 0] = d->perm[i + 0] & mask;
43543 perm2[i + 1] = d->perm[i + 1] & mask;
43544 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43545 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43546 }
43547
43548 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43549 d->testing_p))
43550 return true;
43551 }
43552 }
43553
43554 /* Finally, try the fully general two operand permute. */
43555 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43556 d->testing_p))
43557 return true;
43558
43559 /* Recognize interleave style patterns with reversed operands. */
43560 if (!d->one_operand_p)
43561 {
43562 for (i = 0; i < nelt; ++i)
43563 {
43564 unsigned e = d->perm[i];
43565 if (e >= nelt)
43566 e -= nelt;
43567 else
43568 e += nelt;
43569 perm2[i] = e;
43570 }
43571
43572 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43573 d->testing_p))
43574 return true;
43575 }
43576
43577 /* Try the SSE4.1 blend variable merge instructions. */
43578 if (expand_vec_perm_blend (d))
43579 return true;
43580
43581 /* Try one of the AVX vpermil variable permutations. */
43582 if (expand_vec_perm_vpermil (d))
43583 return true;
43584
43585 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43586 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43587 if (expand_vec_perm_pshufb (d))
43588 return true;
43589
43590 /* Try the AVX2 vpalignr instruction. */
43591 if (expand_vec_perm_palignr (d, true))
43592 return true;
43593
43594 /* Try the AVX512F vpermi2 instructions. */
43595 if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
43596 return true;
43597
43598 return false;
43599 }
43600
43601 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43602 in terms of a pair of pshuflw + pshufhw instructions. */
43603
43604 static bool
43605 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43606 {
43607 unsigned char perm2[MAX_VECT_LEN];
43608 unsigned i;
43609 bool ok;
43610
43611 if (d->vmode != V8HImode || !d->one_operand_p)
43612 return false;
43613
43614 /* The two permutations only operate in 64-bit lanes. */
43615 for (i = 0; i < 4; ++i)
43616 if (d->perm[i] >= 4)
43617 return false;
43618 for (i = 4; i < 8; ++i)
43619 if (d->perm[i] < 4)
43620 return false;
43621
43622 if (d->testing_p)
43623 return true;
43624
43625 /* Emit the pshuflw. */
43626 memcpy (perm2, d->perm, 4);
43627 for (i = 4; i < 8; ++i)
43628 perm2[i] = i;
43629 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43630 gcc_assert (ok);
43631
43632 /* Emit the pshufhw. */
43633 memcpy (perm2 + 4, d->perm + 4, 4);
43634 for (i = 0; i < 4; ++i)
43635 perm2[i] = i;
43636 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43637 gcc_assert (ok);
43638
43639 return true;
43640 }
43641
43642 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43643 the permutation using the SSSE3 palignr instruction. This succeeds
43644 when all of the elements in PERM fit within one vector and we merely
43645 need to shift them down so that a single vector permutation has a
43646 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
43647 the vpalignr instruction itself can perform the requested permutation. */
43648
43649 static bool
43650 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
43651 {
43652 unsigned i, nelt = d->nelt;
43653 unsigned min, max, minswap, maxswap;
43654 bool in_order, ok, swap = false;
43655 rtx shift, target;
43656 struct expand_vec_perm_d dcopy;
43657
43658 /* Even with AVX, palignr only operates on 128-bit vectors,
43659 in AVX2 palignr operates on both 128-bit lanes. */
43660 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43661 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
43662 return false;
43663
43664 min = 2 * nelt;
43665 max = 0;
43666 minswap = 2 * nelt;
43667 maxswap = 0;
43668 for (i = 0; i < nelt; ++i)
43669 {
43670 unsigned e = d->perm[i];
43671 unsigned eswap = d->perm[i] ^ nelt;
43672 if (GET_MODE_SIZE (d->vmode) == 32)
43673 {
43674 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
43675 eswap = e ^ (nelt / 2);
43676 }
43677 if (e < min)
43678 min = e;
43679 if (e > max)
43680 max = e;
43681 if (eswap < minswap)
43682 minswap = eswap;
43683 if (eswap > maxswap)
43684 maxswap = eswap;
43685 }
43686 if (min == 0
43687 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
43688 {
43689 if (d->one_operand_p
43690 || minswap == 0
43691 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
43692 ? nelt / 2 : nelt))
43693 return false;
43694 swap = true;
43695 min = minswap;
43696 max = maxswap;
43697 }
43698
43699 /* Given that we have SSSE3, we know we'll be able to implement the
43700 single operand permutation after the palignr with pshufb for
43701 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
43702 first. */
43703 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
43704 return true;
43705
43706 dcopy = *d;
43707 if (swap)
43708 {
43709 dcopy.op0 = d->op1;
43710 dcopy.op1 = d->op0;
43711 for (i = 0; i < nelt; ++i)
43712 dcopy.perm[i] ^= nelt;
43713 }
43714
43715 in_order = true;
43716 for (i = 0; i < nelt; ++i)
43717 {
43718 unsigned e = dcopy.perm[i];
43719 if (GET_MODE_SIZE (d->vmode) == 32
43720 && e >= nelt
43721 && (e & (nelt / 2 - 1)) < min)
43722 e = e - min - (nelt / 2);
43723 else
43724 e = e - min;
43725 if (e != i)
43726 in_order = false;
43727 dcopy.perm[i] = e;
43728 }
43729 dcopy.one_operand_p = true;
43730
43731 if (single_insn_only_p && !in_order)
43732 return false;
43733
43734 /* For AVX2, test whether we can permute the result in one instruction. */
43735 if (d->testing_p)
43736 {
43737 if (in_order)
43738 return true;
43739 dcopy.op1 = dcopy.op0;
43740 return expand_vec_perm_1 (&dcopy);
43741 }
43742
43743 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43744 if (GET_MODE_SIZE (d->vmode) == 16)
43745 {
43746 target = gen_reg_rtx (TImode);
43747 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
43748 gen_lowpart (TImode, dcopy.op0), shift));
43749 }
43750 else
43751 {
43752 target = gen_reg_rtx (V2TImode);
43753 emit_insn (gen_avx2_palignrv2ti (target,
43754 gen_lowpart (V2TImode, dcopy.op1),
43755 gen_lowpart (V2TImode, dcopy.op0),
43756 shift));
43757 }
43758
43759 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43760
43761 /* Test for the degenerate case where the alignment by itself
43762 produces the desired permutation. */
43763 if (in_order)
43764 {
43765 emit_move_insn (d->target, dcopy.op0);
43766 return true;
43767 }
43768
43769 ok = expand_vec_perm_1 (&dcopy);
43770 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
43771
43772 return ok;
43773 }
43774
43775 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43776 the permutation using the SSE4_1 pblendv instruction. Potentially
43777 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
43778
43779 static bool
43780 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43781 {
43782 unsigned i, which, nelt = d->nelt;
43783 struct expand_vec_perm_d dcopy, dcopy1;
43784 enum machine_mode vmode = d->vmode;
43785 bool ok;
43786
43787 /* Use the same checks as in expand_vec_perm_blend. */
43788 if (d->one_operand_p)
43789 return false;
43790 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
43791 ;
43792 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
43793 ;
43794 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43795 ;
43796 else
43797 return false;
43798
43799 /* Figure out where permutation elements stay not in their
43800 respective lanes. */
43801 for (i = 0, which = 0; i < nelt; ++i)
43802 {
43803 unsigned e = d->perm[i];
43804 if (e != i)
43805 which |= (e < nelt ? 1 : 2);
43806 }
43807 /* We can pblend the part where elements stay not in their
43808 respective lanes only when these elements are all in one
43809 half of a permutation.
43810 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43811 lanes, but both 8 and 9 >= 8
43812 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43813 respective lanes and 8 >= 8, but 2 not. */
43814 if (which != 1 && which != 2)
43815 return false;
43816 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
43817 return true;
43818
43819 /* First we apply one operand permutation to the part where
43820 elements stay not in their respective lanes. */
43821 dcopy = *d;
43822 if (which == 2)
43823 dcopy.op0 = dcopy.op1 = d->op1;
43824 else
43825 dcopy.op0 = dcopy.op1 = d->op0;
43826 dcopy.one_operand_p = true;
43827
43828 for (i = 0; i < nelt; ++i)
43829 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43830
43831 ok = expand_vec_perm_1 (&dcopy);
43832 if (GET_MODE_SIZE (vmode) != 16 && !ok)
43833 return false;
43834 else
43835 gcc_assert (ok);
43836 if (d->testing_p)
43837 return true;
43838
43839 /* Next we put permuted elements into their positions. */
43840 dcopy1 = *d;
43841 if (which == 2)
43842 dcopy1.op1 = dcopy.target;
43843 else
43844 dcopy1.op0 = dcopy.target;
43845
43846 for (i = 0; i < nelt; ++i)
43847 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43848
43849 ok = expand_vec_perm_blend (&dcopy1);
43850 gcc_assert (ok);
43851
43852 return true;
43853 }
43854
43855 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43856
43857 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43858 a two vector permutation into a single vector permutation by using
43859 an interleave operation to merge the vectors. */
43860
43861 static bool
43862 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43863 {
43864 struct expand_vec_perm_d dremap, dfinal;
43865 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43866 unsigned HOST_WIDE_INT contents;
43867 unsigned char remap[2 * MAX_VECT_LEN];
43868 rtx_insn *seq;
43869 bool ok, same_halves = false;
43870
43871 if (GET_MODE_SIZE (d->vmode) == 16)
43872 {
43873 if (d->one_operand_p)
43874 return false;
43875 }
43876 else if (GET_MODE_SIZE (d->vmode) == 32)
43877 {
43878 if (!TARGET_AVX)
43879 return false;
43880 /* For 32-byte modes allow even d->one_operand_p.
43881 The lack of cross-lane shuffling in some instructions
43882 might prevent a single insn shuffle. */
43883 dfinal = *d;
43884 dfinal.testing_p = true;
43885 /* If expand_vec_perm_interleave3 can expand this into
43886 a 3 insn sequence, give up and let it be expanded as
43887 3 insn sequence. While that is one insn longer,
43888 it doesn't need a memory operand and in the common
43889 case that both interleave low and high permutations
43890 with the same operands are adjacent needs 4 insns
43891 for both after CSE. */
43892 if (expand_vec_perm_interleave3 (&dfinal))
43893 return false;
43894 }
43895 else
43896 return false;
43897
43898 /* Examine from whence the elements come. */
43899 contents = 0;
43900 for (i = 0; i < nelt; ++i)
43901 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43902
43903 memset (remap, 0xff, sizeof (remap));
43904 dremap = *d;
43905
43906 if (GET_MODE_SIZE (d->vmode) == 16)
43907 {
43908 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43909
43910 /* Split the two input vectors into 4 halves. */
43911 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43912 h2 = h1 << nelt2;
43913 h3 = h2 << nelt2;
43914 h4 = h3 << nelt2;
43915
43916 /* If the elements from the low halves use interleave low, and similarly
43917 for interleave high. If the elements are from mis-matched halves, we
43918 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43919 if ((contents & (h1 | h3)) == contents)
43920 {
43921 /* punpckl* */
43922 for (i = 0; i < nelt2; ++i)
43923 {
43924 remap[i] = i * 2;
43925 remap[i + nelt] = i * 2 + 1;
43926 dremap.perm[i * 2] = i;
43927 dremap.perm[i * 2 + 1] = i + nelt;
43928 }
43929 if (!TARGET_SSE2 && d->vmode == V4SImode)
43930 dremap.vmode = V4SFmode;
43931 }
43932 else if ((contents & (h2 | h4)) == contents)
43933 {
43934 /* punpckh* */
43935 for (i = 0; i < nelt2; ++i)
43936 {
43937 remap[i + nelt2] = i * 2;
43938 remap[i + nelt + nelt2] = i * 2 + 1;
43939 dremap.perm[i * 2] = i + nelt2;
43940 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43941 }
43942 if (!TARGET_SSE2 && d->vmode == V4SImode)
43943 dremap.vmode = V4SFmode;
43944 }
43945 else if ((contents & (h1 | h4)) == contents)
43946 {
43947 /* shufps */
43948 for (i = 0; i < nelt2; ++i)
43949 {
43950 remap[i] = i;
43951 remap[i + nelt + nelt2] = i + nelt2;
43952 dremap.perm[i] = i;
43953 dremap.perm[i + nelt2] = i + nelt + nelt2;
43954 }
43955 if (nelt != 4)
43956 {
43957 /* shufpd */
43958 dremap.vmode = V2DImode;
43959 dremap.nelt = 2;
43960 dremap.perm[0] = 0;
43961 dremap.perm[1] = 3;
43962 }
43963 }
43964 else if ((contents & (h2 | h3)) == contents)
43965 {
43966 /* shufps */
43967 for (i = 0; i < nelt2; ++i)
43968 {
43969 remap[i + nelt2] = i;
43970 remap[i + nelt] = i + nelt2;
43971 dremap.perm[i] = i + nelt2;
43972 dremap.perm[i + nelt2] = i + nelt;
43973 }
43974 if (nelt != 4)
43975 {
43976 /* shufpd */
43977 dremap.vmode = V2DImode;
43978 dremap.nelt = 2;
43979 dremap.perm[0] = 1;
43980 dremap.perm[1] = 2;
43981 }
43982 }
43983 else
43984 return false;
43985 }
43986 else
43987 {
43988 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43989 unsigned HOST_WIDE_INT q[8];
43990 unsigned int nonzero_halves[4];
43991
43992 /* Split the two input vectors into 8 quarters. */
43993 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43994 for (i = 1; i < 8; ++i)
43995 q[i] = q[0] << (nelt4 * i);
43996 for (i = 0; i < 4; ++i)
43997 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43998 {
43999 nonzero_halves[nzcnt] = i;
44000 ++nzcnt;
44001 }
44002
44003 if (nzcnt == 1)
44004 {
44005 gcc_assert (d->one_operand_p);
44006 nonzero_halves[1] = nonzero_halves[0];
44007 same_halves = true;
44008 }
44009 else if (d->one_operand_p)
44010 {
44011 gcc_assert (nonzero_halves[0] == 0);
44012 gcc_assert (nonzero_halves[1] == 1);
44013 }
44014
44015 if (nzcnt <= 2)
44016 {
44017 if (d->perm[0] / nelt2 == nonzero_halves[1])
44018 {
44019 /* Attempt to increase the likelihood that dfinal
44020 shuffle will be intra-lane. */
44021 char tmph = nonzero_halves[0];
44022 nonzero_halves[0] = nonzero_halves[1];
44023 nonzero_halves[1] = tmph;
44024 }
44025
44026 /* vperm2f128 or vperm2i128. */
44027 for (i = 0; i < nelt2; ++i)
44028 {
44029 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
44030 remap[i + nonzero_halves[0] * nelt2] = i;
44031 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
44032 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
44033 }
44034
44035 if (d->vmode != V8SFmode
44036 && d->vmode != V4DFmode
44037 && d->vmode != V8SImode)
44038 {
44039 dremap.vmode = V8SImode;
44040 dremap.nelt = 8;
44041 for (i = 0; i < 4; ++i)
44042 {
44043 dremap.perm[i] = i + nonzero_halves[0] * 4;
44044 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
44045 }
44046 }
44047 }
44048 else if (d->one_operand_p)
44049 return false;
44050 else if (TARGET_AVX2
44051 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
44052 {
44053 /* vpunpckl* */
44054 for (i = 0; i < nelt4; ++i)
44055 {
44056 remap[i] = i * 2;
44057 remap[i + nelt] = i * 2 + 1;
44058 remap[i + nelt2] = i * 2 + nelt2;
44059 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
44060 dremap.perm[i * 2] = i;
44061 dremap.perm[i * 2 + 1] = i + nelt;
44062 dremap.perm[i * 2 + nelt2] = i + nelt2;
44063 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
44064 }
44065 }
44066 else if (TARGET_AVX2
44067 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
44068 {
44069 /* vpunpckh* */
44070 for (i = 0; i < nelt4; ++i)
44071 {
44072 remap[i + nelt4] = i * 2;
44073 remap[i + nelt + nelt4] = i * 2 + 1;
44074 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
44075 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
44076 dremap.perm[i * 2] = i + nelt4;
44077 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
44078 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
44079 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
44080 }
44081 }
44082 else
44083 return false;
44084 }
44085
44086 /* Use the remapping array set up above to move the elements from their
44087 swizzled locations into their final destinations. */
44088 dfinal = *d;
44089 for (i = 0; i < nelt; ++i)
44090 {
44091 unsigned e = remap[d->perm[i]];
44092 gcc_assert (e < nelt);
44093 /* If same_halves is true, both halves of the remapped vector are the
44094 same. Avoid cross-lane accesses if possible. */
44095 if (same_halves && i >= nelt2)
44096 {
44097 gcc_assert (e < nelt2);
44098 dfinal.perm[i] = e + nelt2;
44099 }
44100 else
44101 dfinal.perm[i] = e;
44102 }
44103 if (!d->testing_p)
44104 {
44105 dremap.target = gen_reg_rtx (dremap.vmode);
44106 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
44107 }
44108 dfinal.op1 = dfinal.op0;
44109 dfinal.one_operand_p = true;
44110
44111 /* Test if the final remap can be done with a single insn. For V4SFmode or
44112 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
44113 start_sequence ();
44114 ok = expand_vec_perm_1 (&dfinal);
44115 seq = get_insns ();
44116 end_sequence ();
44117
44118 if (!ok)
44119 return false;
44120
44121 if (d->testing_p)
44122 return true;
44123
44124 if (dremap.vmode != dfinal.vmode)
44125 {
44126 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
44127 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
44128 }
44129
44130 ok = expand_vec_perm_1 (&dremap);
44131 gcc_assert (ok);
44132
44133 emit_insn (seq);
44134 return true;
44135 }
44136
44137 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44138 a single vector cross-lane permutation into vpermq followed
44139 by any of the single insn permutations. */
44140
44141 static bool
44142 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
44143 {
44144 struct expand_vec_perm_d dremap, dfinal;
44145 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
44146 unsigned contents[2];
44147 bool ok;
44148
44149 if (!(TARGET_AVX2
44150 && (d->vmode == V32QImode || d->vmode == V16HImode)
44151 && d->one_operand_p))
44152 return false;
44153
44154 contents[0] = 0;
44155 contents[1] = 0;
44156 for (i = 0; i < nelt2; ++i)
44157 {
44158 contents[0] |= 1u << (d->perm[i] / nelt4);
44159 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
44160 }
44161
44162 for (i = 0; i < 2; ++i)
44163 {
44164 unsigned int cnt = 0;
44165 for (j = 0; j < 4; ++j)
44166 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
44167 return false;
44168 }
44169
44170 if (d->testing_p)
44171 return true;
44172
44173 dremap = *d;
44174 dremap.vmode = V4DImode;
44175 dremap.nelt = 4;
44176 dremap.target = gen_reg_rtx (V4DImode);
44177 dremap.op0 = gen_lowpart (V4DImode, d->op0);
44178 dremap.op1 = dremap.op0;
44179 dremap.one_operand_p = true;
44180 for (i = 0; i < 2; ++i)
44181 {
44182 unsigned int cnt = 0;
44183 for (j = 0; j < 4; ++j)
44184 if ((contents[i] & (1u << j)) != 0)
44185 dremap.perm[2 * i + cnt++] = j;
44186 for (; cnt < 2; ++cnt)
44187 dremap.perm[2 * i + cnt] = 0;
44188 }
44189
44190 dfinal = *d;
44191 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
44192 dfinal.op1 = dfinal.op0;
44193 dfinal.one_operand_p = true;
44194 for (i = 0, j = 0; i < nelt; ++i)
44195 {
44196 if (i == nelt2)
44197 j = 2;
44198 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
44199 if ((d->perm[i] / nelt4) == dremap.perm[j])
44200 ;
44201 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
44202 dfinal.perm[i] |= nelt4;
44203 else
44204 gcc_unreachable ();
44205 }
44206
44207 ok = expand_vec_perm_1 (&dremap);
44208 gcc_assert (ok);
44209
44210 ok = expand_vec_perm_1 (&dfinal);
44211 gcc_assert (ok);
44212
44213 return true;
44214 }
44215
44216 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
44217 a vector permutation using two instructions, vperm2f128 resp.
44218 vperm2i128 followed by any single in-lane permutation. */
44219
44220 static bool
44221 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
44222 {
44223 struct expand_vec_perm_d dfirst, dsecond;
44224 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
44225 bool ok;
44226
44227 if (!TARGET_AVX
44228 || GET_MODE_SIZE (d->vmode) != 32
44229 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
44230 return false;
44231
44232 dsecond = *d;
44233 dsecond.one_operand_p = false;
44234 dsecond.testing_p = true;
44235
44236 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
44237 immediate. For perm < 16 the second permutation uses
44238 d->op0 as first operand, for perm >= 16 it uses d->op1
44239 as first operand. The second operand is the result of
44240 vperm2[fi]128. */
44241 for (perm = 0; perm < 32; perm++)
44242 {
44243 /* Ignore permutations which do not move anything cross-lane. */
44244 if (perm < 16)
44245 {
44246 /* The second shuffle for e.g. V4DFmode has
44247 0123 and ABCD operands.
44248 Ignore AB23, as 23 is already in the second lane
44249 of the first operand. */
44250 if ((perm & 0xc) == (1 << 2)) continue;
44251 /* And 01CD, as 01 is in the first lane of the first
44252 operand. */
44253 if ((perm & 3) == 0) continue;
44254 /* And 4567, as then the vperm2[fi]128 doesn't change
44255 anything on the original 4567 second operand. */
44256 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
44257 }
44258 else
44259 {
44260 /* The second shuffle for e.g. V4DFmode has
44261 4567 and ABCD operands.
44262 Ignore AB67, as 67 is already in the second lane
44263 of the first operand. */
44264 if ((perm & 0xc) == (3 << 2)) continue;
44265 /* And 45CD, as 45 is in the first lane of the first
44266 operand. */
44267 if ((perm & 3) == 2) continue;
44268 /* And 0123, as then the vperm2[fi]128 doesn't change
44269 anything on the original 0123 first operand. */
44270 if ((perm & 0xf) == (1 << 2)) continue;
44271 }
44272
44273 for (i = 0; i < nelt; i++)
44274 {
44275 j = d->perm[i] / nelt2;
44276 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
44277 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
44278 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
44279 dsecond.perm[i] = d->perm[i] & (nelt - 1);
44280 else
44281 break;
44282 }
44283
44284 if (i == nelt)
44285 {
44286 start_sequence ();
44287 ok = expand_vec_perm_1 (&dsecond);
44288 end_sequence ();
44289 }
44290 else
44291 ok = false;
44292
44293 if (ok)
44294 {
44295 if (d->testing_p)
44296 return true;
44297
44298 /* Found a usable second shuffle. dfirst will be
44299 vperm2f128 on d->op0 and d->op1. */
44300 dsecond.testing_p = false;
44301 dfirst = *d;
44302 dfirst.target = gen_reg_rtx (d->vmode);
44303 for (i = 0; i < nelt; i++)
44304 dfirst.perm[i] = (i & (nelt2 - 1))
44305 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
44306
44307 canonicalize_perm (&dfirst);
44308 ok = expand_vec_perm_1 (&dfirst);
44309 gcc_assert (ok);
44310
44311 /* And dsecond is some single insn shuffle, taking
44312 d->op0 and result of vperm2f128 (if perm < 16) or
44313 d->op1 and result of vperm2f128 (otherwise). */
44314 if (perm >= 16)
44315 dsecond.op0 = dsecond.op1;
44316 dsecond.op1 = dfirst.target;
44317
44318 ok = expand_vec_perm_1 (&dsecond);
44319 gcc_assert (ok);
44320
44321 return true;
44322 }
44323
44324 /* For one operand, the only useful vperm2f128 permutation is 0x01
44325 aka lanes swap. */
44326 if (d->one_operand_p)
44327 return false;
44328 }
44329
44330 return false;
44331 }
44332
44333 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44334 a two vector permutation using 2 intra-lane interleave insns
44335 and cross-lane shuffle for 32-byte vectors. */
44336
44337 static bool
44338 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
44339 {
44340 unsigned i, nelt;
44341 rtx (*gen) (rtx, rtx, rtx);
44342
44343 if (d->one_operand_p)
44344 return false;
44345 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
44346 ;
44347 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
44348 ;
44349 else
44350 return false;
44351
44352 nelt = d->nelt;
44353 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
44354 return false;
44355 for (i = 0; i < nelt; i += 2)
44356 if (d->perm[i] != d->perm[0] + i / 2
44357 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
44358 return false;
44359
44360 if (d->testing_p)
44361 return true;
44362
44363 switch (d->vmode)
44364 {
44365 case V32QImode:
44366 if (d->perm[0])
44367 gen = gen_vec_interleave_highv32qi;
44368 else
44369 gen = gen_vec_interleave_lowv32qi;
44370 break;
44371 case V16HImode:
44372 if (d->perm[0])
44373 gen = gen_vec_interleave_highv16hi;
44374 else
44375 gen = gen_vec_interleave_lowv16hi;
44376 break;
44377 case V8SImode:
44378 if (d->perm[0])
44379 gen = gen_vec_interleave_highv8si;
44380 else
44381 gen = gen_vec_interleave_lowv8si;
44382 break;
44383 case V4DImode:
44384 if (d->perm[0])
44385 gen = gen_vec_interleave_highv4di;
44386 else
44387 gen = gen_vec_interleave_lowv4di;
44388 break;
44389 case V8SFmode:
44390 if (d->perm[0])
44391 gen = gen_vec_interleave_highv8sf;
44392 else
44393 gen = gen_vec_interleave_lowv8sf;
44394 break;
44395 case V4DFmode:
44396 if (d->perm[0])
44397 gen = gen_vec_interleave_highv4df;
44398 else
44399 gen = gen_vec_interleave_lowv4df;
44400 break;
44401 default:
44402 gcc_unreachable ();
44403 }
44404
44405 emit_insn (gen (d->target, d->op0, d->op1));
44406 return true;
44407 }
44408
44409 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
44410 a single vector permutation using a single intra-lane vector
44411 permutation, vperm2f128 swapping the lanes and vblend* insn blending
44412 the non-swapped and swapped vectors together. */
44413
44414 static bool
44415 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
44416 {
44417 struct expand_vec_perm_d dfirst, dsecond;
44418 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
44419 rtx_insn *seq;
44420 bool ok;
44421 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
44422
44423 if (!TARGET_AVX
44424 || TARGET_AVX2
44425 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
44426 || !d->one_operand_p)
44427 return false;
44428
44429 dfirst = *d;
44430 for (i = 0; i < nelt; i++)
44431 dfirst.perm[i] = 0xff;
44432 for (i = 0, msk = 0; i < nelt; i++)
44433 {
44434 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
44435 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
44436 return false;
44437 dfirst.perm[j] = d->perm[i];
44438 if (j != i)
44439 msk |= (1 << i);
44440 }
44441 for (i = 0; i < nelt; i++)
44442 if (dfirst.perm[i] == 0xff)
44443 dfirst.perm[i] = i;
44444
44445 if (!d->testing_p)
44446 dfirst.target = gen_reg_rtx (dfirst.vmode);
44447
44448 start_sequence ();
44449 ok = expand_vec_perm_1 (&dfirst);
44450 seq = get_insns ();
44451 end_sequence ();
44452
44453 if (!ok)
44454 return false;
44455
44456 if (d->testing_p)
44457 return true;
44458
44459 emit_insn (seq);
44460
44461 dsecond = *d;
44462 dsecond.op0 = dfirst.target;
44463 dsecond.op1 = dfirst.target;
44464 dsecond.one_operand_p = true;
44465 dsecond.target = gen_reg_rtx (dsecond.vmode);
44466 for (i = 0; i < nelt; i++)
44467 dsecond.perm[i] = i ^ nelt2;
44468
44469 ok = expand_vec_perm_1 (&dsecond);
44470 gcc_assert (ok);
44471
44472 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
44473 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
44474 return true;
44475 }
44476
44477 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44478 permutation using two vperm2f128, followed by a vshufpd insn blending
44479 the two vectors together. */
44480
44481 static bool
44482 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
44483 {
44484 struct expand_vec_perm_d dfirst, dsecond, dthird;
44485 bool ok;
44486
44487 if (!TARGET_AVX || (d->vmode != V4DFmode))
44488 return false;
44489
44490 if (d->testing_p)
44491 return true;
44492
44493 dfirst = *d;
44494 dsecond = *d;
44495 dthird = *d;
44496
44497 dfirst.perm[0] = (d->perm[0] & ~1);
44498 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44499 dfirst.perm[2] = (d->perm[2] & ~1);
44500 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44501 dsecond.perm[0] = (d->perm[1] & ~1);
44502 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44503 dsecond.perm[2] = (d->perm[3] & ~1);
44504 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44505 dthird.perm[0] = (d->perm[0] % 2);
44506 dthird.perm[1] = (d->perm[1] % 2) + 4;
44507 dthird.perm[2] = (d->perm[2] % 2) + 2;
44508 dthird.perm[3] = (d->perm[3] % 2) + 6;
44509
44510 dfirst.target = gen_reg_rtx (dfirst.vmode);
44511 dsecond.target = gen_reg_rtx (dsecond.vmode);
44512 dthird.op0 = dfirst.target;
44513 dthird.op1 = dsecond.target;
44514 dthird.one_operand_p = false;
44515
44516 canonicalize_perm (&dfirst);
44517 canonicalize_perm (&dsecond);
44518
44519 ok = expand_vec_perm_1 (&dfirst)
44520 && expand_vec_perm_1 (&dsecond)
44521 && expand_vec_perm_1 (&dthird);
44522
44523 gcc_assert (ok);
44524
44525 return true;
44526 }
44527
44528 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44529 permutation with two pshufb insns and an ior. We should have already
44530 failed all two instruction sequences. */
44531
44532 static bool
44533 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44534 {
44535 rtx rperm[2][16], vperm, l, h, op, m128;
44536 unsigned int i, nelt, eltsz;
44537
44538 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44539 return false;
44540 gcc_assert (!d->one_operand_p);
44541
44542 if (d->testing_p)
44543 return true;
44544
44545 nelt = d->nelt;
44546 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44547
44548 /* Generate two permutation masks. If the required element is within
44549 the given vector it is shuffled into the proper lane. If the required
44550 element is in the other vector, force a zero into the lane by setting
44551 bit 7 in the permutation mask. */
44552 m128 = GEN_INT (-128);
44553 for (i = 0; i < nelt; ++i)
44554 {
44555 unsigned j, e = d->perm[i];
44556 unsigned which = (e >= nelt);
44557 if (e >= nelt)
44558 e -= nelt;
44559
44560 for (j = 0; j < eltsz; ++j)
44561 {
44562 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44563 rperm[1-which][i*eltsz + j] = m128;
44564 }
44565 }
44566
44567 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44568 vperm = force_reg (V16QImode, vperm);
44569
44570 l = gen_reg_rtx (V16QImode);
44571 op = gen_lowpart (V16QImode, d->op0);
44572 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44573
44574 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44575 vperm = force_reg (V16QImode, vperm);
44576
44577 h = gen_reg_rtx (V16QImode);
44578 op = gen_lowpart (V16QImode, d->op1);
44579 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44580
44581 op = d->target;
44582 if (d->vmode != V16QImode)
44583 op = gen_reg_rtx (V16QImode);
44584 emit_insn (gen_iorv16qi3 (op, l, h));
44585 if (op != d->target)
44586 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44587
44588 return true;
44589 }
44590
44591 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44592 with two vpshufb insns, vpermq and vpor. We should have already failed
44593 all two or three instruction sequences. */
44594
44595 static bool
44596 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44597 {
44598 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44599 unsigned int i, nelt, eltsz;
44600
44601 if (!TARGET_AVX2
44602 || !d->one_operand_p
44603 || (d->vmode != V32QImode && d->vmode != V16HImode))
44604 return false;
44605
44606 if (d->testing_p)
44607 return true;
44608
44609 nelt = d->nelt;
44610 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44611
44612 /* Generate two permutation masks. If the required element is within
44613 the same lane, it is shuffled in. If the required element from the
44614 other lane, force a zero by setting bit 7 in the permutation mask.
44615 In the other mask the mask has non-negative elements if element
44616 is requested from the other lane, but also moved to the other lane,
44617 so that the result of vpshufb can have the two V2TImode halves
44618 swapped. */
44619 m128 = GEN_INT (-128);
44620 for (i = 0; i < nelt; ++i)
44621 {
44622 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44623 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44624
44625 for (j = 0; j < eltsz; ++j)
44626 {
44627 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44628 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44629 }
44630 }
44631
44632 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44633 vperm = force_reg (V32QImode, vperm);
44634
44635 h = gen_reg_rtx (V32QImode);
44636 op = gen_lowpart (V32QImode, d->op0);
44637 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44638
44639 /* Swap the 128-byte lanes of h into hp. */
44640 hp = gen_reg_rtx (V4DImode);
44641 op = gen_lowpart (V4DImode, h);
44642 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44643 const1_rtx));
44644
44645 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44646 vperm = force_reg (V32QImode, vperm);
44647
44648 l = gen_reg_rtx (V32QImode);
44649 op = gen_lowpart (V32QImode, d->op0);
44650 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44651
44652 op = d->target;
44653 if (d->vmode != V32QImode)
44654 op = gen_reg_rtx (V32QImode);
44655 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44656 if (op != d->target)
44657 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44658
44659 return true;
44660 }
44661
44662 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44663 and extract-odd permutations of two V32QImode and V16QImode operand
44664 with two vpshufb insns, vpor and vpermq. We should have already
44665 failed all two or three instruction sequences. */
44666
44667 static bool
44668 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44669 {
44670 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44671 unsigned int i, nelt, eltsz;
44672
44673 if (!TARGET_AVX2
44674 || d->one_operand_p
44675 || (d->vmode != V32QImode && d->vmode != V16HImode))
44676 return false;
44677
44678 for (i = 0; i < d->nelt; ++i)
44679 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44680 return false;
44681
44682 if (d->testing_p)
44683 return true;
44684
44685 nelt = d->nelt;
44686 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44687
44688 /* Generate two permutation masks. In the first permutation mask
44689 the first quarter will contain indexes for the first half
44690 of the op0, the second quarter will contain bit 7 set, third quarter
44691 will contain indexes for the second half of the op0 and the
44692 last quarter bit 7 set. In the second permutation mask
44693 the first quarter will contain bit 7 set, the second quarter
44694 indexes for the first half of the op1, the third quarter bit 7 set
44695 and last quarter indexes for the second half of the op1.
44696 I.e. the first mask e.g. for V32QImode extract even will be:
44697 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44698 (all values masked with 0xf except for -128) and second mask
44699 for extract even will be
44700 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44701 m128 = GEN_INT (-128);
44702 for (i = 0; i < nelt; ++i)
44703 {
44704 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44705 unsigned which = d->perm[i] >= nelt;
44706 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44707
44708 for (j = 0; j < eltsz; ++j)
44709 {
44710 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44711 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44712 }
44713 }
44714
44715 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44716 vperm = force_reg (V32QImode, vperm);
44717
44718 l = gen_reg_rtx (V32QImode);
44719 op = gen_lowpart (V32QImode, d->op0);
44720 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44721
44722 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44723 vperm = force_reg (V32QImode, vperm);
44724
44725 h = gen_reg_rtx (V32QImode);
44726 op = gen_lowpart (V32QImode, d->op1);
44727 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44728
44729 ior = gen_reg_rtx (V32QImode);
44730 emit_insn (gen_iorv32qi3 (ior, l, h));
44731
44732 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44733 op = gen_reg_rtx (V4DImode);
44734 ior = gen_lowpart (V4DImode, ior);
44735 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44736 const1_rtx, GEN_INT (3)));
44737 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44738
44739 return true;
44740 }
44741
44742 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44743 and extract-odd permutations. */
44744
44745 static bool
44746 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44747 {
44748 rtx t1, t2, t3, t4, t5;
44749
44750 switch (d->vmode)
44751 {
44752 case V4DFmode:
44753 if (d->testing_p)
44754 break;
44755 t1 = gen_reg_rtx (V4DFmode);
44756 t2 = gen_reg_rtx (V4DFmode);
44757
44758 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44759 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44760 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44761
44762 /* Now an unpck[lh]pd will produce the result required. */
44763 if (odd)
44764 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44765 else
44766 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44767 emit_insn (t3);
44768 break;
44769
44770 case V8SFmode:
44771 {
44772 int mask = odd ? 0xdd : 0x88;
44773
44774 if (d->testing_p)
44775 break;
44776 t1 = gen_reg_rtx (V8SFmode);
44777 t2 = gen_reg_rtx (V8SFmode);
44778 t3 = gen_reg_rtx (V8SFmode);
44779
44780 /* Shuffle within the 128-bit lanes to produce:
44781 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44782 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44783 GEN_INT (mask)));
44784
44785 /* Shuffle the lanes around to produce:
44786 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44787 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44788 GEN_INT (0x3)));
44789
44790 /* Shuffle within the 128-bit lanes to produce:
44791 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44792 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44793
44794 /* Shuffle within the 128-bit lanes to produce:
44795 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44796 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44797
44798 /* Shuffle the lanes around to produce:
44799 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44800 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44801 GEN_INT (0x20)));
44802 }
44803 break;
44804
44805 case V2DFmode:
44806 case V4SFmode:
44807 case V2DImode:
44808 case V4SImode:
44809 /* These are always directly implementable by expand_vec_perm_1. */
44810 gcc_unreachable ();
44811
44812 case V8HImode:
44813 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44814 return expand_vec_perm_pshufb2 (d);
44815 else
44816 {
44817 if (d->testing_p)
44818 break;
44819 /* We need 2*log2(N)-1 operations to achieve odd/even
44820 with interleave. */
44821 t1 = gen_reg_rtx (V8HImode);
44822 t2 = gen_reg_rtx (V8HImode);
44823 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44824 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44825 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44826 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44827 if (odd)
44828 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44829 else
44830 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44831 emit_insn (t3);
44832 }
44833 break;
44834
44835 case V16QImode:
44836 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44837 return expand_vec_perm_pshufb2 (d);
44838 else
44839 {
44840 if (d->testing_p)
44841 break;
44842 t1 = gen_reg_rtx (V16QImode);
44843 t2 = gen_reg_rtx (V16QImode);
44844 t3 = gen_reg_rtx (V16QImode);
44845 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44846 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44847 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44848 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44849 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44850 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44851 if (odd)
44852 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44853 else
44854 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44855 emit_insn (t3);
44856 }
44857 break;
44858
44859 case V16HImode:
44860 case V32QImode:
44861 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44862
44863 case V4DImode:
44864 if (!TARGET_AVX2)
44865 {
44866 struct expand_vec_perm_d d_copy = *d;
44867 d_copy.vmode = V4DFmode;
44868 if (d->testing_p)
44869 d_copy.target = gen_lowpart (V4DFmode, d->target);
44870 else
44871 d_copy.target = gen_reg_rtx (V4DFmode);
44872 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44873 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44874 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44875 {
44876 if (!d->testing_p)
44877 emit_move_insn (d->target,
44878 gen_lowpart (V4DImode, d_copy.target));
44879 return true;
44880 }
44881 return false;
44882 }
44883
44884 if (d->testing_p)
44885 break;
44886
44887 t1 = gen_reg_rtx (V4DImode);
44888 t2 = gen_reg_rtx (V4DImode);
44889
44890 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44891 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44892 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44893
44894 /* Now an vpunpck[lh]qdq will produce the result required. */
44895 if (odd)
44896 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44897 else
44898 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44899 emit_insn (t3);
44900 break;
44901
44902 case V8SImode:
44903 if (!TARGET_AVX2)
44904 {
44905 struct expand_vec_perm_d d_copy = *d;
44906 d_copy.vmode = V8SFmode;
44907 if (d->testing_p)
44908 d_copy.target = gen_lowpart (V8SFmode, d->target);
44909 else
44910 d_copy.target = gen_reg_rtx (V8SFmode);
44911 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44912 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44913 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44914 {
44915 if (!d->testing_p)
44916 emit_move_insn (d->target,
44917 gen_lowpart (V8SImode, d_copy.target));
44918 return true;
44919 }
44920 return false;
44921 }
44922
44923 if (d->testing_p)
44924 break;
44925
44926 t1 = gen_reg_rtx (V8SImode);
44927 t2 = gen_reg_rtx (V8SImode);
44928 t3 = gen_reg_rtx (V4DImode);
44929 t4 = gen_reg_rtx (V4DImode);
44930 t5 = gen_reg_rtx (V4DImode);
44931
44932 /* Shuffle the lanes around into
44933 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44934 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44935 gen_lowpart (V4DImode, d->op1),
44936 GEN_INT (0x20)));
44937 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44938 gen_lowpart (V4DImode, d->op1),
44939 GEN_INT (0x31)));
44940
44941 /* Swap the 2nd and 3rd position in each lane into
44942 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44943 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44944 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44945 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44946 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44947
44948 /* Now an vpunpck[lh]qdq will produce
44949 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44950 if (odd)
44951 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44952 gen_lowpart (V4DImode, t2));
44953 else
44954 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44955 gen_lowpart (V4DImode, t2));
44956 emit_insn (t3);
44957 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44958 break;
44959
44960 default:
44961 gcc_unreachable ();
44962 }
44963
44964 return true;
44965 }
44966
44967 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44968 extract-even and extract-odd permutations. */
44969
44970 static bool
44971 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44972 {
44973 unsigned i, odd, nelt = d->nelt;
44974
44975 odd = d->perm[0];
44976 if (odd != 0 && odd != 1)
44977 return false;
44978
44979 for (i = 1; i < nelt; ++i)
44980 if (d->perm[i] != 2 * i + odd)
44981 return false;
44982
44983 return expand_vec_perm_even_odd_1 (d, odd);
44984 }
44985
44986 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44987 permutations. We assume that expand_vec_perm_1 has already failed. */
44988
44989 static bool
44990 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44991 {
44992 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44993 enum machine_mode vmode = d->vmode;
44994 unsigned char perm2[4];
44995 rtx op0 = d->op0, dest;
44996 bool ok;
44997
44998 switch (vmode)
44999 {
45000 case V4DFmode:
45001 case V8SFmode:
45002 /* These are special-cased in sse.md so that we can optionally
45003 use the vbroadcast instruction. They expand to two insns
45004 if the input happens to be in a register. */
45005 gcc_unreachable ();
45006
45007 case V2DFmode:
45008 case V2DImode:
45009 case V4SFmode:
45010 case V4SImode:
45011 /* These are always implementable using standard shuffle patterns. */
45012 gcc_unreachable ();
45013
45014 case V8HImode:
45015 case V16QImode:
45016 /* These can be implemented via interleave. We save one insn by
45017 stopping once we have promoted to V4SImode and then use pshufd. */
45018 if (d->testing_p)
45019 return true;
45020 do
45021 {
45022 rtx dest;
45023 rtx (*gen) (rtx, rtx, rtx)
45024 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
45025 : gen_vec_interleave_lowv8hi;
45026
45027 if (elt >= nelt2)
45028 {
45029 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
45030 : gen_vec_interleave_highv8hi;
45031 elt -= nelt2;
45032 }
45033 nelt2 /= 2;
45034
45035 dest = gen_reg_rtx (vmode);
45036 emit_insn (gen (dest, op0, op0));
45037 vmode = get_mode_wider_vector (vmode);
45038 op0 = gen_lowpart (vmode, dest);
45039 }
45040 while (vmode != V4SImode);
45041
45042 memset (perm2, elt, 4);
45043 dest = gen_reg_rtx (V4SImode);
45044 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
45045 gcc_assert (ok);
45046 if (!d->testing_p)
45047 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
45048 return true;
45049
45050 case V32QImode:
45051 case V16HImode:
45052 case V8SImode:
45053 case V4DImode:
45054 /* For AVX2 broadcasts of the first element vpbroadcast* or
45055 vpermq should be used by expand_vec_perm_1. */
45056 gcc_assert (!TARGET_AVX2 || d->perm[0]);
45057 return false;
45058
45059 default:
45060 gcc_unreachable ();
45061 }
45062 }
45063
45064 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
45065 broadcast permutations. */
45066
45067 static bool
45068 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
45069 {
45070 unsigned i, elt, nelt = d->nelt;
45071
45072 if (!d->one_operand_p)
45073 return false;
45074
45075 elt = d->perm[0];
45076 for (i = 1; i < nelt; ++i)
45077 if (d->perm[i] != elt)
45078 return false;
45079
45080 return expand_vec_perm_broadcast_1 (d);
45081 }
45082
45083 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
45084 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
45085 all the shorter instruction sequences. */
45086
45087 static bool
45088 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
45089 {
45090 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
45091 unsigned int i, nelt, eltsz;
45092 bool used[4];
45093
45094 if (!TARGET_AVX2
45095 || d->one_operand_p
45096 || (d->vmode != V32QImode && d->vmode != V16HImode))
45097 return false;
45098
45099 if (d->testing_p)
45100 return true;
45101
45102 nelt = d->nelt;
45103 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
45104
45105 /* Generate 4 permutation masks. If the required element is within
45106 the same lane, it is shuffled in. If the required element from the
45107 other lane, force a zero by setting bit 7 in the permutation mask.
45108 In the other mask the mask has non-negative elements if element
45109 is requested from the other lane, but also moved to the other lane,
45110 so that the result of vpshufb can have the two V2TImode halves
45111 swapped. */
45112 m128 = GEN_INT (-128);
45113 for (i = 0; i < 32; ++i)
45114 {
45115 rperm[0][i] = m128;
45116 rperm[1][i] = m128;
45117 rperm[2][i] = m128;
45118 rperm[3][i] = m128;
45119 }
45120 used[0] = false;
45121 used[1] = false;
45122 used[2] = false;
45123 used[3] = false;
45124 for (i = 0; i < nelt; ++i)
45125 {
45126 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
45127 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
45128 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
45129
45130 for (j = 0; j < eltsz; ++j)
45131 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
45132 used[which] = true;
45133 }
45134
45135 for (i = 0; i < 2; ++i)
45136 {
45137 if (!used[2 * i + 1])
45138 {
45139 h[i] = NULL_RTX;
45140 continue;
45141 }
45142 vperm = gen_rtx_CONST_VECTOR (V32QImode,
45143 gen_rtvec_v (32, rperm[2 * i + 1]));
45144 vperm = force_reg (V32QImode, vperm);
45145 h[i] = gen_reg_rtx (V32QImode);
45146 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
45147 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
45148 }
45149
45150 /* Swap the 128-byte lanes of h[X]. */
45151 for (i = 0; i < 2; ++i)
45152 {
45153 if (h[i] == NULL_RTX)
45154 continue;
45155 op = gen_reg_rtx (V4DImode);
45156 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
45157 const2_rtx, GEN_INT (3), const0_rtx,
45158 const1_rtx));
45159 h[i] = gen_lowpart (V32QImode, op);
45160 }
45161
45162 for (i = 0; i < 2; ++i)
45163 {
45164 if (!used[2 * i])
45165 {
45166 l[i] = NULL_RTX;
45167 continue;
45168 }
45169 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
45170 vperm = force_reg (V32QImode, vperm);
45171 l[i] = gen_reg_rtx (V32QImode);
45172 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
45173 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
45174 }
45175
45176 for (i = 0; i < 2; ++i)
45177 {
45178 if (h[i] && l[i])
45179 {
45180 op = gen_reg_rtx (V32QImode);
45181 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
45182 l[i] = op;
45183 }
45184 else if (h[i])
45185 l[i] = h[i];
45186 }
45187
45188 gcc_assert (l[0] && l[1]);
45189 op = d->target;
45190 if (d->vmode != V32QImode)
45191 op = gen_reg_rtx (V32QImode);
45192 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
45193 if (op != d->target)
45194 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
45195 return true;
45196 }
45197
45198 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
45199 With all of the interface bits taken care of, perform the expansion
45200 in D and return true on success. */
45201
45202 static bool
45203 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
45204 {
45205 /* Try a single instruction expansion. */
45206 if (expand_vec_perm_1 (d))
45207 return true;
45208
45209 /* Try sequences of two instructions. */
45210
45211 if (expand_vec_perm_pshuflw_pshufhw (d))
45212 return true;
45213
45214 if (expand_vec_perm_palignr (d, false))
45215 return true;
45216
45217 if (expand_vec_perm_interleave2 (d))
45218 return true;
45219
45220 if (expand_vec_perm_broadcast (d))
45221 return true;
45222
45223 if (expand_vec_perm_vpermq_perm_1 (d))
45224 return true;
45225
45226 if (expand_vec_perm_vperm2f128 (d))
45227 return true;
45228
45229 if (expand_vec_perm_pblendv (d))
45230 return true;
45231
45232 /* Try sequences of three instructions. */
45233
45234 if (expand_vec_perm_2vperm2f128_vshuf (d))
45235 return true;
45236
45237 if (expand_vec_perm_pshufb2 (d))
45238 return true;
45239
45240 if (expand_vec_perm_interleave3 (d))
45241 return true;
45242
45243 if (expand_vec_perm_vperm2f128_vblend (d))
45244 return true;
45245
45246 /* Try sequences of four instructions. */
45247
45248 if (expand_vec_perm_vpshufb2_vpermq (d))
45249 return true;
45250
45251 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
45252 return true;
45253
45254 /* ??? Look for narrow permutations whose element orderings would
45255 allow the promotion to a wider mode. */
45256
45257 /* ??? Look for sequences of interleave or a wider permute that place
45258 the data into the correct lanes for a half-vector shuffle like
45259 pshuf[lh]w or vpermilps. */
45260
45261 /* ??? Look for sequences of interleave that produce the desired results.
45262 The combinatorics of punpck[lh] get pretty ugly... */
45263
45264 if (expand_vec_perm_even_odd (d))
45265 return true;
45266
45267 /* Even longer sequences. */
45268 if (expand_vec_perm_vpshufb4_vpermq2 (d))
45269 return true;
45270
45271 return false;
45272 }
45273
45274 /* If a permutation only uses one operand, make it clear. Returns true
45275 if the permutation references both operands. */
45276
45277 static bool
45278 canonicalize_perm (struct expand_vec_perm_d *d)
45279 {
45280 int i, which, nelt = d->nelt;
45281
45282 for (i = which = 0; i < nelt; ++i)
45283 which |= (d->perm[i] < nelt ? 1 : 2);
45284
45285 d->one_operand_p = true;
45286 switch (which)
45287 {
45288 default:
45289 gcc_unreachable();
45290
45291 case 3:
45292 if (!rtx_equal_p (d->op0, d->op1))
45293 {
45294 d->one_operand_p = false;
45295 break;
45296 }
45297 /* The elements of PERM do not suggest that only the first operand
45298 is used, but both operands are identical. Allow easier matching
45299 of the permutation by folding the permutation into the single
45300 input vector. */
45301 /* FALLTHRU */
45302
45303 case 2:
45304 for (i = 0; i < nelt; ++i)
45305 d->perm[i] &= nelt - 1;
45306 d->op0 = d->op1;
45307 break;
45308
45309 case 1:
45310 d->op1 = d->op0;
45311 break;
45312 }
45313
45314 return (which == 3);
45315 }
45316
45317 bool
45318 ix86_expand_vec_perm_const (rtx operands[4])
45319 {
45320 struct expand_vec_perm_d d;
45321 unsigned char perm[MAX_VECT_LEN];
45322 int i, nelt;
45323 bool two_args;
45324 rtx sel;
45325
45326 d.target = operands[0];
45327 d.op0 = operands[1];
45328 d.op1 = operands[2];
45329 sel = operands[3];
45330
45331 d.vmode = GET_MODE (d.target);
45332 gcc_assert (VECTOR_MODE_P (d.vmode));
45333 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45334 d.testing_p = false;
45335
45336 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
45337 gcc_assert (XVECLEN (sel, 0) == nelt);
45338 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
45339
45340 for (i = 0; i < nelt; ++i)
45341 {
45342 rtx e = XVECEXP (sel, 0, i);
45343 int ei = INTVAL (e) & (2 * nelt - 1);
45344 d.perm[i] = ei;
45345 perm[i] = ei;
45346 }
45347
45348 two_args = canonicalize_perm (&d);
45349
45350 if (ix86_expand_vec_perm_const_1 (&d))
45351 return true;
45352
45353 /* If the selector says both arguments are needed, but the operands are the
45354 same, the above tried to expand with one_operand_p and flattened selector.
45355 If that didn't work, retry without one_operand_p; we succeeded with that
45356 during testing. */
45357 if (two_args && d.one_operand_p)
45358 {
45359 d.one_operand_p = false;
45360 memcpy (d.perm, perm, sizeof (perm));
45361 return ix86_expand_vec_perm_const_1 (&d);
45362 }
45363
45364 return false;
45365 }
45366
45367 /* Implement targetm.vectorize.vec_perm_const_ok. */
45368
45369 static bool
45370 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
45371 const unsigned char *sel)
45372 {
45373 struct expand_vec_perm_d d;
45374 unsigned int i, nelt, which;
45375 bool ret;
45376
45377 d.vmode = vmode;
45378 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45379 d.testing_p = true;
45380
45381 /* Given sufficient ISA support we can just return true here
45382 for selected vector modes. */
45383 switch (d.vmode)
45384 {
45385 case V16SFmode:
45386 case V16SImode:
45387 case V8DImode:
45388 case V8DFmode:
45389 if (TARGET_AVX512F)
45390 /* All implementable with a single vpermi2 insn. */
45391 return true;
45392 break;
45393 case V32HImode:
45394 if (TARGET_AVX512BW)
45395 /* All implementable with a single vpermi2 insn. */
45396 return true;
45397 break;
45398 case V8SImode:
45399 case V8SFmode:
45400 case V4DFmode:
45401 case V4DImode:
45402 if (TARGET_AVX512VL)
45403 /* All implementable with a single vpermi2 insn. */
45404 return true;
45405 break;
45406 case V16HImode:
45407 if (TARGET_AVX2)
45408 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
45409 return true;
45410 break;
45411 case V32QImode:
45412 if (TARGET_AVX2)
45413 /* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns. */
45414 return true;
45415 break;
45416 case V4SImode:
45417 case V4SFmode:
45418 case V8HImode:
45419 case V16QImode:
45420 /* All implementable with a single vpperm insn. */
45421 if (TARGET_XOP)
45422 return true;
45423 /* All implementable with 2 pshufb + 1 ior. */
45424 if (TARGET_SSSE3)
45425 return true;
45426 break;
45427 case V2DImode:
45428 case V2DFmode:
45429 /* All implementable with shufpd or unpck[lh]pd. */
45430 return true;
45431 default:
45432 return false;
45433 }
45434
45435 /* Extract the values from the vector CST into the permutation
45436 array in D. */
45437 memcpy (d.perm, sel, nelt);
45438 for (i = which = 0; i < nelt; ++i)
45439 {
45440 unsigned char e = d.perm[i];
45441 gcc_assert (e < 2 * nelt);
45442 which |= (e < nelt ? 1 : 2);
45443 }
45444
45445 /* For all elements from second vector, fold the elements to first. */
45446 if (which == 2)
45447 for (i = 0; i < nelt; ++i)
45448 d.perm[i] -= nelt;
45449
45450 /* Check whether the mask can be applied to the vector type. */
45451 d.one_operand_p = (which != 3);
45452
45453 /* Implementable with shufps or pshufd. */
45454 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
45455 return true;
45456
45457 /* Otherwise we have to go through the motions and see if we can
45458 figure out how to generate the requested permutation. */
45459 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
45460 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
45461 if (!d.one_operand_p)
45462 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
45463
45464 start_sequence ();
45465 ret = ix86_expand_vec_perm_const_1 (&d);
45466 end_sequence ();
45467
45468 return ret;
45469 }
45470
45471 void
45472 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
45473 {
45474 struct expand_vec_perm_d d;
45475 unsigned i, nelt;
45476
45477 d.target = targ;
45478 d.op0 = op0;
45479 d.op1 = op1;
45480 d.vmode = GET_MODE (targ);
45481 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45482 d.one_operand_p = false;
45483 d.testing_p = false;
45484
45485 for (i = 0; i < nelt; ++i)
45486 d.perm[i] = i * 2 + odd;
45487
45488 /* We'll either be able to implement the permutation directly... */
45489 if (expand_vec_perm_1 (&d))
45490 return;
45491
45492 /* ... or we use the special-case patterns. */
45493 expand_vec_perm_even_odd_1 (&d, odd);
45494 }
45495
45496 static void
45497 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
45498 {
45499 struct expand_vec_perm_d d;
45500 unsigned i, nelt, base;
45501 bool ok;
45502
45503 d.target = targ;
45504 d.op0 = op0;
45505 d.op1 = op1;
45506 d.vmode = GET_MODE (targ);
45507 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45508 d.one_operand_p = false;
45509 d.testing_p = false;
45510
45511 base = high_p ? nelt / 2 : 0;
45512 for (i = 0; i < nelt / 2; ++i)
45513 {
45514 d.perm[i * 2] = i + base;
45515 d.perm[i * 2 + 1] = i + base + nelt;
45516 }
45517
45518 /* Note that for AVX this isn't one instruction. */
45519 ok = ix86_expand_vec_perm_const_1 (&d);
45520 gcc_assert (ok);
45521 }
45522
45523
45524 /* Expand a vector operation CODE for a V*QImode in terms of the
45525 same operation on V*HImode. */
45526
45527 void
45528 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45529 {
45530 enum machine_mode qimode = GET_MODE (dest);
45531 enum machine_mode himode;
45532 rtx (*gen_il) (rtx, rtx, rtx);
45533 rtx (*gen_ih) (rtx, rtx, rtx);
45534 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45535 struct expand_vec_perm_d d;
45536 bool ok, full_interleave;
45537 bool uns_p = false;
45538 int i;
45539
45540 switch (qimode)
45541 {
45542 case V16QImode:
45543 himode = V8HImode;
45544 gen_il = gen_vec_interleave_lowv16qi;
45545 gen_ih = gen_vec_interleave_highv16qi;
45546 break;
45547 case V32QImode:
45548 himode = V16HImode;
45549 gen_il = gen_avx2_interleave_lowv32qi;
45550 gen_ih = gen_avx2_interleave_highv32qi;
45551 break;
45552 case V64QImode:
45553 himode = V32HImode;
45554 gen_il = gen_avx512bw_interleave_lowv64qi;
45555 gen_ih = gen_avx512bw_interleave_highv64qi;
45556 break;
45557 default:
45558 gcc_unreachable ();
45559 }
45560
45561 op2_l = op2_h = op2;
45562 switch (code)
45563 {
45564 case MULT:
45565 /* Unpack data such that we've got a source byte in each low byte of
45566 each word. We don't care what goes into the high byte of each word.
45567 Rather than trying to get zero in there, most convenient is to let
45568 it be a copy of the low byte. */
45569 op2_l = gen_reg_rtx (qimode);
45570 op2_h = gen_reg_rtx (qimode);
45571 emit_insn (gen_il (op2_l, op2, op2));
45572 emit_insn (gen_ih (op2_h, op2, op2));
45573 /* FALLTHRU */
45574
45575 op1_l = gen_reg_rtx (qimode);
45576 op1_h = gen_reg_rtx (qimode);
45577 emit_insn (gen_il (op1_l, op1, op1));
45578 emit_insn (gen_ih (op1_h, op1, op1));
45579 full_interleave = qimode == V16QImode;
45580 break;
45581
45582 case ASHIFT:
45583 case LSHIFTRT:
45584 uns_p = true;
45585 /* FALLTHRU */
45586 case ASHIFTRT:
45587 op1_l = gen_reg_rtx (himode);
45588 op1_h = gen_reg_rtx (himode);
45589 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45590 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45591 full_interleave = true;
45592 break;
45593 default:
45594 gcc_unreachable ();
45595 }
45596
45597 /* Perform the operation. */
45598 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45599 1, OPTAB_DIRECT);
45600 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45601 1, OPTAB_DIRECT);
45602 gcc_assert (res_l && res_h);
45603
45604 /* Merge the data back into the right place. */
45605 d.target = dest;
45606 d.op0 = gen_lowpart (qimode, res_l);
45607 d.op1 = gen_lowpart (qimode, res_h);
45608 d.vmode = qimode;
45609 d.nelt = GET_MODE_NUNITS (qimode);
45610 d.one_operand_p = false;
45611 d.testing_p = false;
45612
45613 if (full_interleave)
45614 {
45615 /* For SSE2, we used an full interleave, so the desired
45616 results are in the even elements. */
45617 for (i = 0; i < 64; ++i)
45618 d.perm[i] = i * 2;
45619 }
45620 else
45621 {
45622 /* For AVX, the interleave used above was not cross-lane. So the
45623 extraction is evens but with the second and third quarter swapped.
45624 Happily, that is even one insn shorter than even extraction. */
45625 for (i = 0; i < 64; ++i)
45626 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45627 }
45628
45629 ok = ix86_expand_vec_perm_const_1 (&d);
45630 gcc_assert (ok);
45631
45632 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45633 gen_rtx_fmt_ee (code, qimode, op1, op2));
45634 }
45635
45636 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45637 if op is CONST_VECTOR with all odd elements equal to their
45638 preceding element. */
45639
45640 static bool
45641 const_vector_equal_evenodd_p (rtx op)
45642 {
45643 enum machine_mode mode = GET_MODE (op);
45644 int i, nunits = GET_MODE_NUNITS (mode);
45645 if (GET_CODE (op) != CONST_VECTOR
45646 || nunits != CONST_VECTOR_NUNITS (op))
45647 return false;
45648 for (i = 0; i < nunits; i += 2)
45649 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45650 return false;
45651 return true;
45652 }
45653
45654 void
45655 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45656 bool uns_p, bool odd_p)
45657 {
45658 enum machine_mode mode = GET_MODE (op1);
45659 enum machine_mode wmode = GET_MODE (dest);
45660 rtx x;
45661 rtx orig_op1 = op1, orig_op2 = op2;
45662
45663 if (!nonimmediate_operand (op1, mode))
45664 op1 = force_reg (mode, op1);
45665 if (!nonimmediate_operand (op2, mode))
45666 op2 = force_reg (mode, op2);
45667
45668 /* We only play even/odd games with vectors of SImode. */
45669 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45670
45671 /* If we're looking for the odd results, shift those members down to
45672 the even slots. For some cpus this is faster than a PSHUFD. */
45673 if (odd_p)
45674 {
45675 /* For XOP use vpmacsdqh, but only for smult, as it is only
45676 signed. */
45677 if (TARGET_XOP && mode == V4SImode && !uns_p)
45678 {
45679 x = force_reg (wmode, CONST0_RTX (wmode));
45680 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45681 return;
45682 }
45683
45684 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45685 if (!const_vector_equal_evenodd_p (orig_op1))
45686 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45687 x, NULL, 1, OPTAB_DIRECT);
45688 if (!const_vector_equal_evenodd_p (orig_op2))
45689 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45690 x, NULL, 1, OPTAB_DIRECT);
45691 op1 = gen_lowpart (mode, op1);
45692 op2 = gen_lowpart (mode, op2);
45693 }
45694
45695 if (mode == V16SImode)
45696 {
45697 if (uns_p)
45698 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45699 else
45700 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45701 }
45702 else if (mode == V8SImode)
45703 {
45704 if (uns_p)
45705 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45706 else
45707 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45708 }
45709 else if (uns_p)
45710 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45711 else if (TARGET_SSE4_1)
45712 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45713 else
45714 {
45715 rtx s1, s2, t0, t1, t2;
45716
45717 /* The easiest way to implement this without PMULDQ is to go through
45718 the motions as if we are performing a full 64-bit multiply. With
45719 the exception that we need to do less shuffling of the elements. */
45720
45721 /* Compute the sign-extension, aka highparts, of the two operands. */
45722 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45723 op1, pc_rtx, pc_rtx);
45724 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45725 op2, pc_rtx, pc_rtx);
45726
45727 /* Multiply LO(A) * HI(B), and vice-versa. */
45728 t1 = gen_reg_rtx (wmode);
45729 t2 = gen_reg_rtx (wmode);
45730 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45731 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45732
45733 /* Multiply LO(A) * LO(B). */
45734 t0 = gen_reg_rtx (wmode);
45735 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45736
45737 /* Combine and shift the highparts into place. */
45738 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45739 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45740 1, OPTAB_DIRECT);
45741
45742 /* Combine high and low parts. */
45743 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45744 return;
45745 }
45746 emit_insn (x);
45747 }
45748
45749 void
45750 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45751 bool uns_p, bool high_p)
45752 {
45753 enum machine_mode wmode = GET_MODE (dest);
45754 enum machine_mode mode = GET_MODE (op1);
45755 rtx t1, t2, t3, t4, mask;
45756
45757 switch (mode)
45758 {
45759 case V4SImode:
45760 t1 = gen_reg_rtx (mode);
45761 t2 = gen_reg_rtx (mode);
45762 if (TARGET_XOP && !uns_p)
45763 {
45764 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45765 shuffle the elements once so that all elements are in the right
45766 place for immediate use: { A C B D }. */
45767 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45768 const1_rtx, GEN_INT (3)));
45769 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45770 const1_rtx, GEN_INT (3)));
45771 }
45772 else
45773 {
45774 /* Put the elements into place for the multiply. */
45775 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45776 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45777 high_p = false;
45778 }
45779 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45780 break;
45781
45782 case V8SImode:
45783 /* Shuffle the elements between the lanes. After this we
45784 have { A B E F | C D G H } for each operand. */
45785 t1 = gen_reg_rtx (V4DImode);
45786 t2 = gen_reg_rtx (V4DImode);
45787 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45788 const0_rtx, const2_rtx,
45789 const1_rtx, GEN_INT (3)));
45790 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45791 const0_rtx, const2_rtx,
45792 const1_rtx, GEN_INT (3)));
45793
45794 /* Shuffle the elements within the lanes. After this we
45795 have { A A B B | C C D D } or { E E F F | G G H H }. */
45796 t3 = gen_reg_rtx (V8SImode);
45797 t4 = gen_reg_rtx (V8SImode);
45798 mask = GEN_INT (high_p
45799 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45800 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45801 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45802 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45803
45804 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45805 break;
45806
45807 case V8HImode:
45808 case V16HImode:
45809 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45810 uns_p, OPTAB_DIRECT);
45811 t2 = expand_binop (mode,
45812 uns_p ? umul_highpart_optab : smul_highpart_optab,
45813 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45814 gcc_assert (t1 && t2);
45815
45816 t3 = gen_reg_rtx (mode);
45817 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45818 emit_move_insn (dest, gen_lowpart (wmode, t3));
45819 break;
45820
45821 case V16QImode:
45822 case V32QImode:
45823 case V32HImode:
45824 case V16SImode:
45825 case V64QImode:
45826 t1 = gen_reg_rtx (wmode);
45827 t2 = gen_reg_rtx (wmode);
45828 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45829 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45830
45831 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45832 break;
45833
45834 default:
45835 gcc_unreachable ();
45836 }
45837 }
45838
45839 void
45840 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45841 {
45842 rtx res_1, res_2, res_3, res_4;
45843
45844 res_1 = gen_reg_rtx (V4SImode);
45845 res_2 = gen_reg_rtx (V4SImode);
45846 res_3 = gen_reg_rtx (V2DImode);
45847 res_4 = gen_reg_rtx (V2DImode);
45848 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45849 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45850
45851 /* Move the results in element 2 down to element 1; we don't care
45852 what goes in elements 2 and 3. Then we can merge the parts
45853 back together with an interleave.
45854
45855 Note that two other sequences were tried:
45856 (1) Use interleaves at the start instead of psrldq, which allows
45857 us to use a single shufps to merge things back at the end.
45858 (2) Use shufps here to combine the two vectors, then pshufd to
45859 put the elements in the correct order.
45860 In both cases the cost of the reformatting stall was too high
45861 and the overall sequence slower. */
45862
45863 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45864 const0_rtx, const2_rtx,
45865 const0_rtx, const0_rtx));
45866 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45867 const0_rtx, const2_rtx,
45868 const0_rtx, const0_rtx));
45869 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45870
45871 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45872 }
45873
45874 void
45875 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45876 {
45877 enum machine_mode mode = GET_MODE (op0);
45878 rtx t1, t2, t3, t4, t5, t6;
45879
45880 if (TARGET_AVX512DQ && mode == V8DImode)
45881 emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
45882 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
45883 emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
45884 else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
45885 emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
45886 else if (TARGET_XOP && mode == V2DImode)
45887 {
45888 /* op1: A,B,C,D, op2: E,F,G,H */
45889 op1 = gen_lowpart (V4SImode, op1);
45890 op2 = gen_lowpart (V4SImode, op2);
45891
45892 t1 = gen_reg_rtx (V4SImode);
45893 t2 = gen_reg_rtx (V4SImode);
45894 t3 = gen_reg_rtx (V2DImode);
45895 t4 = gen_reg_rtx (V2DImode);
45896
45897 /* t1: B,A,D,C */
45898 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45899 GEN_INT (1),
45900 GEN_INT (0),
45901 GEN_INT (3),
45902 GEN_INT (2)));
45903
45904 /* t2: (B*E),(A*F),(D*G),(C*H) */
45905 emit_insn (gen_mulv4si3 (t2, t1, op2));
45906
45907 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45908 emit_insn (gen_xop_phadddq (t3, t2));
45909
45910 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45911 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45912
45913 /* Multiply lower parts and add all */
45914 t5 = gen_reg_rtx (V2DImode);
45915 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45916 gen_lowpart (V4SImode, op1),
45917 gen_lowpart (V4SImode, op2)));
45918 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45919
45920 }
45921 else
45922 {
45923 enum machine_mode nmode;
45924 rtx (*umul) (rtx, rtx, rtx);
45925
45926 if (mode == V2DImode)
45927 {
45928 umul = gen_vec_widen_umult_even_v4si;
45929 nmode = V4SImode;
45930 }
45931 else if (mode == V4DImode)
45932 {
45933 umul = gen_vec_widen_umult_even_v8si;
45934 nmode = V8SImode;
45935 }
45936 else if (mode == V8DImode)
45937 {
45938 umul = gen_vec_widen_umult_even_v16si;
45939 nmode = V16SImode;
45940 }
45941 else
45942 gcc_unreachable ();
45943
45944
45945 /* Multiply low parts. */
45946 t1 = gen_reg_rtx (mode);
45947 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45948
45949 /* Shift input vectors right 32 bits so we can multiply high parts. */
45950 t6 = GEN_INT (32);
45951 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45952 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45953
45954 /* Multiply high parts by low parts. */
45955 t4 = gen_reg_rtx (mode);
45956 t5 = gen_reg_rtx (mode);
45957 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45958 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45959
45960 /* Combine and shift the highparts back. */
45961 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45962 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45963
45964 /* Combine high and low parts. */
45965 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45966 }
45967
45968 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45969 gen_rtx_MULT (mode, op1, op2));
45970 }
45971
45972 /* Calculate integer abs() using only SSE2 instructions. */
45973
45974 void
45975 ix86_expand_sse2_abs (rtx target, rtx input)
45976 {
45977 enum machine_mode mode = GET_MODE (target);
45978 rtx tmp0, tmp1, x;
45979
45980 switch (mode)
45981 {
45982 /* For 32-bit signed integer X, the best way to calculate the absolute
45983 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45984 case V4SImode:
45985 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45986 GEN_INT (GET_MODE_BITSIZE
45987 (GET_MODE_INNER (mode)) - 1),
45988 NULL, 0, OPTAB_DIRECT);
45989 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45990 NULL, 0, OPTAB_DIRECT);
45991 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45992 target, 0, OPTAB_DIRECT);
45993 break;
45994
45995 /* For 16-bit signed integer X, the best way to calculate the absolute
45996 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45997 case V8HImode:
45998 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45999
46000 x = expand_simple_binop (mode, SMAX, tmp0, input,
46001 target, 0, OPTAB_DIRECT);
46002 break;
46003
46004 /* For 8-bit signed integer X, the best way to calculate the absolute
46005 value of X is min ((unsigned char) X, (unsigned char) (-X)),
46006 as SSE2 provides the PMINUB insn. */
46007 case V16QImode:
46008 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
46009
46010 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
46011 target, 0, OPTAB_DIRECT);
46012 break;
46013
46014 default:
46015 gcc_unreachable ();
46016 }
46017
46018 if (x != target)
46019 emit_move_insn (target, x);
46020 }
46021
46022 /* Expand an insert into a vector register through pinsr insn.
46023 Return true if successful. */
46024
46025 bool
46026 ix86_expand_pinsr (rtx *operands)
46027 {
46028 rtx dst = operands[0];
46029 rtx src = operands[3];
46030
46031 unsigned int size = INTVAL (operands[1]);
46032 unsigned int pos = INTVAL (operands[2]);
46033
46034 if (GET_CODE (dst) == SUBREG)
46035 {
46036 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
46037 dst = SUBREG_REG (dst);
46038 }
46039
46040 if (GET_CODE (src) == SUBREG)
46041 src = SUBREG_REG (src);
46042
46043 switch (GET_MODE (dst))
46044 {
46045 case V16QImode:
46046 case V8HImode:
46047 case V4SImode:
46048 case V2DImode:
46049 {
46050 enum machine_mode srcmode, dstmode;
46051 rtx (*pinsr)(rtx, rtx, rtx, rtx);
46052
46053 srcmode = mode_for_size (size, MODE_INT, 0);
46054
46055 switch (srcmode)
46056 {
46057 case QImode:
46058 if (!TARGET_SSE4_1)
46059 return false;
46060 dstmode = V16QImode;
46061 pinsr = gen_sse4_1_pinsrb;
46062 break;
46063
46064 case HImode:
46065 if (!TARGET_SSE2)
46066 return false;
46067 dstmode = V8HImode;
46068 pinsr = gen_sse2_pinsrw;
46069 break;
46070
46071 case SImode:
46072 if (!TARGET_SSE4_1)
46073 return false;
46074 dstmode = V4SImode;
46075 pinsr = gen_sse4_1_pinsrd;
46076 break;
46077
46078 case DImode:
46079 gcc_assert (TARGET_64BIT);
46080 if (!TARGET_SSE4_1)
46081 return false;
46082 dstmode = V2DImode;
46083 pinsr = gen_sse4_1_pinsrq;
46084 break;
46085
46086 default:
46087 return false;
46088 }
46089
46090 rtx d = dst;
46091 if (GET_MODE (dst) != dstmode)
46092 d = gen_reg_rtx (dstmode);
46093 src = gen_lowpart (srcmode, src);
46094
46095 pos /= size;
46096
46097 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
46098 GEN_INT (1 << pos)));
46099 if (d != dst)
46100 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
46101 return true;
46102 }
46103
46104 default:
46105 return false;
46106 }
46107 }
46108 \f
46109 /* This function returns the calling abi specific va_list type node.
46110 It returns the FNDECL specific va_list type. */
46111
46112 static tree
46113 ix86_fn_abi_va_list (tree fndecl)
46114 {
46115 if (!TARGET_64BIT)
46116 return va_list_type_node;
46117 gcc_assert (fndecl != NULL_TREE);
46118
46119 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
46120 return ms_va_list_type_node;
46121 else
46122 return sysv_va_list_type_node;
46123 }
46124
46125 /* Returns the canonical va_list type specified by TYPE. If there
46126 is no valid TYPE provided, it return NULL_TREE. */
46127
46128 static tree
46129 ix86_canonical_va_list_type (tree type)
46130 {
46131 tree wtype, htype;
46132
46133 /* Resolve references and pointers to va_list type. */
46134 if (TREE_CODE (type) == MEM_REF)
46135 type = TREE_TYPE (type);
46136 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
46137 type = TREE_TYPE (type);
46138 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
46139 type = TREE_TYPE (type);
46140
46141 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
46142 {
46143 wtype = va_list_type_node;
46144 gcc_assert (wtype != NULL_TREE);
46145 htype = type;
46146 if (TREE_CODE (wtype) == ARRAY_TYPE)
46147 {
46148 /* If va_list is an array type, the argument may have decayed
46149 to a pointer type, e.g. by being passed to another function.
46150 In that case, unwrap both types so that we can compare the
46151 underlying records. */
46152 if (TREE_CODE (htype) == ARRAY_TYPE
46153 || POINTER_TYPE_P (htype))
46154 {
46155 wtype = TREE_TYPE (wtype);
46156 htype = TREE_TYPE (htype);
46157 }
46158 }
46159 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46160 return va_list_type_node;
46161 wtype = sysv_va_list_type_node;
46162 gcc_assert (wtype != NULL_TREE);
46163 htype = type;
46164 if (TREE_CODE (wtype) == ARRAY_TYPE)
46165 {
46166 /* If va_list is an array type, the argument may have decayed
46167 to a pointer type, e.g. by being passed to another function.
46168 In that case, unwrap both types so that we can compare the
46169 underlying records. */
46170 if (TREE_CODE (htype) == ARRAY_TYPE
46171 || POINTER_TYPE_P (htype))
46172 {
46173 wtype = TREE_TYPE (wtype);
46174 htype = TREE_TYPE (htype);
46175 }
46176 }
46177 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46178 return sysv_va_list_type_node;
46179 wtype = ms_va_list_type_node;
46180 gcc_assert (wtype != NULL_TREE);
46181 htype = type;
46182 if (TREE_CODE (wtype) == ARRAY_TYPE)
46183 {
46184 /* If va_list is an array type, the argument may have decayed
46185 to a pointer type, e.g. by being passed to another function.
46186 In that case, unwrap both types so that we can compare the
46187 underlying records. */
46188 if (TREE_CODE (htype) == ARRAY_TYPE
46189 || POINTER_TYPE_P (htype))
46190 {
46191 wtype = TREE_TYPE (wtype);
46192 htype = TREE_TYPE (htype);
46193 }
46194 }
46195 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
46196 return ms_va_list_type_node;
46197 return NULL_TREE;
46198 }
46199 return std_canonical_va_list_type (type);
46200 }
46201
46202 /* Iterate through the target-specific builtin types for va_list.
46203 IDX denotes the iterator, *PTREE is set to the result type of
46204 the va_list builtin, and *PNAME to its internal type.
46205 Returns zero if there is no element for this index, otherwise
46206 IDX should be increased upon the next call.
46207 Note, do not iterate a base builtin's name like __builtin_va_list.
46208 Used from c_common_nodes_and_builtins. */
46209
46210 static int
46211 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
46212 {
46213 if (TARGET_64BIT)
46214 {
46215 switch (idx)
46216 {
46217 default:
46218 break;
46219
46220 case 0:
46221 *ptree = ms_va_list_type_node;
46222 *pname = "__builtin_ms_va_list";
46223 return 1;
46224
46225 case 1:
46226 *ptree = sysv_va_list_type_node;
46227 *pname = "__builtin_sysv_va_list";
46228 return 1;
46229 }
46230 }
46231
46232 return 0;
46233 }
46234
46235 #undef TARGET_SCHED_DISPATCH
46236 #define TARGET_SCHED_DISPATCH has_dispatch
46237 #undef TARGET_SCHED_DISPATCH_DO
46238 #define TARGET_SCHED_DISPATCH_DO do_dispatch
46239 #undef TARGET_SCHED_REASSOCIATION_WIDTH
46240 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
46241 #undef TARGET_SCHED_REORDER
46242 #define TARGET_SCHED_REORDER ix86_sched_reorder
46243 #undef TARGET_SCHED_ADJUST_PRIORITY
46244 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
46245 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
46246 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
46247 ix86_dependencies_evaluation_hook
46248
46249 /* The size of the dispatch window is the total number of bytes of
46250 object code allowed in a window. */
46251 #define DISPATCH_WINDOW_SIZE 16
46252
46253 /* Number of dispatch windows considered for scheduling. */
46254 #define MAX_DISPATCH_WINDOWS 3
46255
46256 /* Maximum number of instructions in a window. */
46257 #define MAX_INSN 4
46258
46259 /* Maximum number of immediate operands in a window. */
46260 #define MAX_IMM 4
46261
46262 /* Maximum number of immediate bits allowed in a window. */
46263 #define MAX_IMM_SIZE 128
46264
46265 /* Maximum number of 32 bit immediates allowed in a window. */
46266 #define MAX_IMM_32 4
46267
46268 /* Maximum number of 64 bit immediates allowed in a window. */
46269 #define MAX_IMM_64 2
46270
46271 /* Maximum total of loads or prefetches allowed in a window. */
46272 #define MAX_LOAD 2
46273
46274 /* Maximum total of stores allowed in a window. */
46275 #define MAX_STORE 1
46276
46277 #undef BIG
46278 #define BIG 100
46279
46280
46281 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
46282 enum dispatch_group {
46283 disp_no_group = 0,
46284 disp_load,
46285 disp_store,
46286 disp_load_store,
46287 disp_prefetch,
46288 disp_imm,
46289 disp_imm_32,
46290 disp_imm_64,
46291 disp_branch,
46292 disp_cmp,
46293 disp_jcc,
46294 disp_last
46295 };
46296
46297 /* Number of allowable groups in a dispatch window. It is an array
46298 indexed by dispatch_group enum. 100 is used as a big number,
46299 because the number of these kind of operations does not have any
46300 effect in dispatch window, but we need them for other reasons in
46301 the table. */
46302 static unsigned int num_allowable_groups[disp_last] = {
46303 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
46304 };
46305
46306 char group_name[disp_last + 1][16] = {
46307 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
46308 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
46309 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
46310 };
46311
46312 /* Instruction path. */
46313 enum insn_path {
46314 no_path = 0,
46315 path_single, /* Single micro op. */
46316 path_double, /* Double micro op. */
46317 path_multi, /* Instructions with more than 2 micro op.. */
46318 last_path
46319 };
46320
46321 /* sched_insn_info defines a window to the instructions scheduled in
46322 the basic block. It contains a pointer to the insn_info table and
46323 the instruction scheduled.
46324
46325 Windows are allocated for each basic block and are linked
46326 together. */
46327 typedef struct sched_insn_info_s {
46328 rtx insn;
46329 enum dispatch_group group;
46330 enum insn_path path;
46331 int byte_len;
46332 int imm_bytes;
46333 } sched_insn_info;
46334
46335 /* Linked list of dispatch windows. This is a two way list of
46336 dispatch windows of a basic block. It contains information about
46337 the number of uops in the window and the total number of
46338 instructions and of bytes in the object code for this dispatch
46339 window. */
46340 typedef struct dispatch_windows_s {
46341 int num_insn; /* Number of insn in the window. */
46342 int num_uops; /* Number of uops in the window. */
46343 int window_size; /* Number of bytes in the window. */
46344 int window_num; /* Window number between 0 or 1. */
46345 int num_imm; /* Number of immediates in an insn. */
46346 int num_imm_32; /* Number of 32 bit immediates in an insn. */
46347 int num_imm_64; /* Number of 64 bit immediates in an insn. */
46348 int imm_size; /* Total immediates in the window. */
46349 int num_loads; /* Total memory loads in the window. */
46350 int num_stores; /* Total memory stores in the window. */
46351 int violation; /* Violation exists in window. */
46352 sched_insn_info *window; /* Pointer to the window. */
46353 struct dispatch_windows_s *next;
46354 struct dispatch_windows_s *prev;
46355 } dispatch_windows;
46356
46357 /* Immediate valuse used in an insn. */
46358 typedef struct imm_info_s
46359 {
46360 int imm;
46361 int imm32;
46362 int imm64;
46363 } imm_info;
46364
46365 static dispatch_windows *dispatch_window_list;
46366 static dispatch_windows *dispatch_window_list1;
46367
46368 /* Get dispatch group of insn. */
46369
46370 static enum dispatch_group
46371 get_mem_group (rtx_insn *insn)
46372 {
46373 enum attr_memory memory;
46374
46375 if (INSN_CODE (insn) < 0)
46376 return disp_no_group;
46377 memory = get_attr_memory (insn);
46378 if (memory == MEMORY_STORE)
46379 return disp_store;
46380
46381 if (memory == MEMORY_LOAD)
46382 return disp_load;
46383
46384 if (memory == MEMORY_BOTH)
46385 return disp_load_store;
46386
46387 return disp_no_group;
46388 }
46389
46390 /* Return true if insn is a compare instruction. */
46391
46392 static bool
46393 is_cmp (rtx_insn *insn)
46394 {
46395 enum attr_type type;
46396
46397 type = get_attr_type (insn);
46398 return (type == TYPE_TEST
46399 || type == TYPE_ICMP
46400 || type == TYPE_FCMP
46401 || GET_CODE (PATTERN (insn)) == COMPARE);
46402 }
46403
46404 /* Return true if a dispatch violation encountered. */
46405
46406 static bool
46407 dispatch_violation (void)
46408 {
46409 if (dispatch_window_list->next)
46410 return dispatch_window_list->next->violation;
46411 return dispatch_window_list->violation;
46412 }
46413
46414 /* Return true if insn is a branch instruction. */
46415
46416 static bool
46417 is_branch (rtx insn)
46418 {
46419 return (CALL_P (insn) || JUMP_P (insn));
46420 }
46421
46422 /* Return true if insn is a prefetch instruction. */
46423
46424 static bool
46425 is_prefetch (rtx insn)
46426 {
46427 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
46428 }
46429
46430 /* This function initializes a dispatch window and the list container holding a
46431 pointer to the window. */
46432
46433 static void
46434 init_window (int window_num)
46435 {
46436 int i;
46437 dispatch_windows *new_list;
46438
46439 if (window_num == 0)
46440 new_list = dispatch_window_list;
46441 else
46442 new_list = dispatch_window_list1;
46443
46444 new_list->num_insn = 0;
46445 new_list->num_uops = 0;
46446 new_list->window_size = 0;
46447 new_list->next = NULL;
46448 new_list->prev = NULL;
46449 new_list->window_num = window_num;
46450 new_list->num_imm = 0;
46451 new_list->num_imm_32 = 0;
46452 new_list->num_imm_64 = 0;
46453 new_list->imm_size = 0;
46454 new_list->num_loads = 0;
46455 new_list->num_stores = 0;
46456 new_list->violation = false;
46457
46458 for (i = 0; i < MAX_INSN; i++)
46459 {
46460 new_list->window[i].insn = NULL;
46461 new_list->window[i].group = disp_no_group;
46462 new_list->window[i].path = no_path;
46463 new_list->window[i].byte_len = 0;
46464 new_list->window[i].imm_bytes = 0;
46465 }
46466 return;
46467 }
46468
46469 /* This function allocates and initializes a dispatch window and the
46470 list container holding a pointer to the window. */
46471
46472 static dispatch_windows *
46473 allocate_window (void)
46474 {
46475 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
46476 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
46477
46478 return new_list;
46479 }
46480
46481 /* This routine initializes the dispatch scheduling information. It
46482 initiates building dispatch scheduler tables and constructs the
46483 first dispatch window. */
46484
46485 static void
46486 init_dispatch_sched (void)
46487 {
46488 /* Allocate a dispatch list and a window. */
46489 dispatch_window_list = allocate_window ();
46490 dispatch_window_list1 = allocate_window ();
46491 init_window (0);
46492 init_window (1);
46493 }
46494
46495 /* This function returns true if a branch is detected. End of a basic block
46496 does not have to be a branch, but here we assume only branches end a
46497 window. */
46498
46499 static bool
46500 is_end_basic_block (enum dispatch_group group)
46501 {
46502 return group == disp_branch;
46503 }
46504
46505 /* This function is called when the end of a window processing is reached. */
46506
46507 static void
46508 process_end_window (void)
46509 {
46510 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
46511 if (dispatch_window_list->next)
46512 {
46513 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
46514 gcc_assert (dispatch_window_list->window_size
46515 + dispatch_window_list1->window_size <= 48);
46516 init_window (1);
46517 }
46518 init_window (0);
46519 }
46520
46521 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46522 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46523 for 48 bytes of instructions. Note that these windows are not dispatch
46524 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46525
46526 static dispatch_windows *
46527 allocate_next_window (int window_num)
46528 {
46529 if (window_num == 0)
46530 {
46531 if (dispatch_window_list->next)
46532 init_window (1);
46533 init_window (0);
46534 return dispatch_window_list;
46535 }
46536
46537 dispatch_window_list->next = dispatch_window_list1;
46538 dispatch_window_list1->prev = dispatch_window_list;
46539
46540 return dispatch_window_list1;
46541 }
46542
46543 /* Increment the number of immediate operands of an instruction. */
46544
46545 static int
46546 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46547 {
46548 if (*in_rtx == 0)
46549 return 0;
46550
46551 switch ( GET_CODE (*in_rtx))
46552 {
46553 case CONST:
46554 case SYMBOL_REF:
46555 case CONST_INT:
46556 (imm_values->imm)++;
46557 if (x86_64_immediate_operand (*in_rtx, SImode))
46558 (imm_values->imm32)++;
46559 else
46560 (imm_values->imm64)++;
46561 break;
46562
46563 case CONST_DOUBLE:
46564 (imm_values->imm)++;
46565 (imm_values->imm64)++;
46566 break;
46567
46568 case CODE_LABEL:
46569 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46570 {
46571 (imm_values->imm)++;
46572 (imm_values->imm32)++;
46573 }
46574 break;
46575
46576 default:
46577 break;
46578 }
46579
46580 return 0;
46581 }
46582
46583 /* Compute number of immediate operands of an instruction. */
46584
46585 static void
46586 find_constant (rtx in_rtx, imm_info *imm_values)
46587 {
46588 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46589 (rtx_function) find_constant_1, (void *) imm_values);
46590 }
46591
46592 /* Return total size of immediate operands of an instruction along with number
46593 of corresponding immediate-operands. It initializes its parameters to zero
46594 befor calling FIND_CONSTANT.
46595 INSN is the input instruction. IMM is the total of immediates.
46596 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46597 bit immediates. */
46598
46599 static int
46600 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46601 {
46602 imm_info imm_values = {0, 0, 0};
46603
46604 find_constant (insn, &imm_values);
46605 *imm = imm_values.imm;
46606 *imm32 = imm_values.imm32;
46607 *imm64 = imm_values.imm64;
46608 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46609 }
46610
46611 /* This function indicates if an operand of an instruction is an
46612 immediate. */
46613
46614 static bool
46615 has_immediate (rtx insn)
46616 {
46617 int num_imm_operand;
46618 int num_imm32_operand;
46619 int num_imm64_operand;
46620
46621 if (insn)
46622 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46623 &num_imm64_operand);
46624 return false;
46625 }
46626
46627 /* Return single or double path for instructions. */
46628
46629 static enum insn_path
46630 get_insn_path (rtx_insn *insn)
46631 {
46632 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46633
46634 if ((int)path == 0)
46635 return path_single;
46636
46637 if ((int)path == 1)
46638 return path_double;
46639
46640 return path_multi;
46641 }
46642
46643 /* Return insn dispatch group. */
46644
46645 static enum dispatch_group
46646 get_insn_group (rtx_insn *insn)
46647 {
46648 enum dispatch_group group = get_mem_group (insn);
46649 if (group)
46650 return group;
46651
46652 if (is_branch (insn))
46653 return disp_branch;
46654
46655 if (is_cmp (insn))
46656 return disp_cmp;
46657
46658 if (has_immediate (insn))
46659 return disp_imm;
46660
46661 if (is_prefetch (insn))
46662 return disp_prefetch;
46663
46664 return disp_no_group;
46665 }
46666
46667 /* Count number of GROUP restricted instructions in a dispatch
46668 window WINDOW_LIST. */
46669
46670 static int
46671 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
46672 {
46673 enum dispatch_group group = get_insn_group (insn);
46674 int imm_size;
46675 int num_imm_operand;
46676 int num_imm32_operand;
46677 int num_imm64_operand;
46678
46679 if (group == disp_no_group)
46680 return 0;
46681
46682 if (group == disp_imm)
46683 {
46684 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46685 &num_imm64_operand);
46686 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46687 || num_imm_operand + window_list->num_imm > MAX_IMM
46688 || (num_imm32_operand > 0
46689 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46690 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46691 || (num_imm64_operand > 0
46692 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46693 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46694 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46695 && num_imm64_operand > 0
46696 && ((window_list->num_imm_64 > 0
46697 && window_list->num_insn >= 2)
46698 || window_list->num_insn >= 3)))
46699 return BIG;
46700
46701 return 1;
46702 }
46703
46704 if ((group == disp_load_store
46705 && (window_list->num_loads >= MAX_LOAD
46706 || window_list->num_stores >= MAX_STORE))
46707 || ((group == disp_load
46708 || group == disp_prefetch)
46709 && window_list->num_loads >= MAX_LOAD)
46710 || (group == disp_store
46711 && window_list->num_stores >= MAX_STORE))
46712 return BIG;
46713
46714 return 1;
46715 }
46716
46717 /* This function returns true if insn satisfies dispatch rules on the
46718 last window scheduled. */
46719
46720 static bool
46721 fits_dispatch_window (rtx_insn *insn)
46722 {
46723 dispatch_windows *window_list = dispatch_window_list;
46724 dispatch_windows *window_list_next = dispatch_window_list->next;
46725 unsigned int num_restrict;
46726 enum dispatch_group group = get_insn_group (insn);
46727 enum insn_path path = get_insn_path (insn);
46728 int sum;
46729
46730 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46731 instructions should be given the lowest priority in the
46732 scheduling process in Haifa scheduler to make sure they will be
46733 scheduled in the same dispatch window as the reference to them. */
46734 if (group == disp_jcc || group == disp_cmp)
46735 return false;
46736
46737 /* Check nonrestricted. */
46738 if (group == disp_no_group || group == disp_branch)
46739 return true;
46740
46741 /* Get last dispatch window. */
46742 if (window_list_next)
46743 window_list = window_list_next;
46744
46745 if (window_list->window_num == 1)
46746 {
46747 sum = window_list->prev->window_size + window_list->window_size;
46748
46749 if (sum == 32
46750 || (min_insn_size (insn) + sum) >= 48)
46751 /* Window 1 is full. Go for next window. */
46752 return true;
46753 }
46754
46755 num_restrict = count_num_restricted (insn, window_list);
46756
46757 if (num_restrict > num_allowable_groups[group])
46758 return false;
46759
46760 /* See if it fits in the first window. */
46761 if (window_list->window_num == 0)
46762 {
46763 /* The first widow should have only single and double path
46764 uops. */
46765 if (path == path_double
46766 && (window_list->num_uops + 2) > MAX_INSN)
46767 return false;
46768 else if (path != path_single)
46769 return false;
46770 }
46771 return true;
46772 }
46773
46774 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46775 dispatch window WINDOW_LIST. */
46776
46777 static void
46778 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
46779 {
46780 int byte_len = min_insn_size (insn);
46781 int num_insn = window_list->num_insn;
46782 int imm_size;
46783 sched_insn_info *window = window_list->window;
46784 enum dispatch_group group = get_insn_group (insn);
46785 enum insn_path path = get_insn_path (insn);
46786 int num_imm_operand;
46787 int num_imm32_operand;
46788 int num_imm64_operand;
46789
46790 if (!window_list->violation && group != disp_cmp
46791 && !fits_dispatch_window (insn))
46792 window_list->violation = true;
46793
46794 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46795 &num_imm64_operand);
46796
46797 /* Initialize window with new instruction. */
46798 window[num_insn].insn = insn;
46799 window[num_insn].byte_len = byte_len;
46800 window[num_insn].group = group;
46801 window[num_insn].path = path;
46802 window[num_insn].imm_bytes = imm_size;
46803
46804 window_list->window_size += byte_len;
46805 window_list->num_insn = num_insn + 1;
46806 window_list->num_uops = window_list->num_uops + num_uops;
46807 window_list->imm_size += imm_size;
46808 window_list->num_imm += num_imm_operand;
46809 window_list->num_imm_32 += num_imm32_operand;
46810 window_list->num_imm_64 += num_imm64_operand;
46811
46812 if (group == disp_store)
46813 window_list->num_stores += 1;
46814 else if (group == disp_load
46815 || group == disp_prefetch)
46816 window_list->num_loads += 1;
46817 else if (group == disp_load_store)
46818 {
46819 window_list->num_stores += 1;
46820 window_list->num_loads += 1;
46821 }
46822 }
46823
46824 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46825 If the total bytes of instructions or the number of instructions in
46826 the window exceed allowable, it allocates a new window. */
46827
46828 static void
46829 add_to_dispatch_window (rtx_insn *insn)
46830 {
46831 int byte_len;
46832 dispatch_windows *window_list;
46833 dispatch_windows *next_list;
46834 dispatch_windows *window0_list;
46835 enum insn_path path;
46836 enum dispatch_group insn_group;
46837 bool insn_fits;
46838 int num_insn;
46839 int num_uops;
46840 int window_num;
46841 int insn_num_uops;
46842 int sum;
46843
46844 if (INSN_CODE (insn) < 0)
46845 return;
46846
46847 byte_len = min_insn_size (insn);
46848 window_list = dispatch_window_list;
46849 next_list = window_list->next;
46850 path = get_insn_path (insn);
46851 insn_group = get_insn_group (insn);
46852
46853 /* Get the last dispatch window. */
46854 if (next_list)
46855 window_list = dispatch_window_list->next;
46856
46857 if (path == path_single)
46858 insn_num_uops = 1;
46859 else if (path == path_double)
46860 insn_num_uops = 2;
46861 else
46862 insn_num_uops = (int) path;
46863
46864 /* If current window is full, get a new window.
46865 Window number zero is full, if MAX_INSN uops are scheduled in it.
46866 Window number one is full, if window zero's bytes plus window
46867 one's bytes is 32, or if the bytes of the new instruction added
46868 to the total makes it greater than 48, or it has already MAX_INSN
46869 instructions in it. */
46870 num_insn = window_list->num_insn;
46871 num_uops = window_list->num_uops;
46872 window_num = window_list->window_num;
46873 insn_fits = fits_dispatch_window (insn);
46874
46875 if (num_insn >= MAX_INSN
46876 || num_uops + insn_num_uops > MAX_INSN
46877 || !(insn_fits))
46878 {
46879 window_num = ~window_num & 1;
46880 window_list = allocate_next_window (window_num);
46881 }
46882
46883 if (window_num == 0)
46884 {
46885 add_insn_window (insn, window_list, insn_num_uops);
46886 if (window_list->num_insn >= MAX_INSN
46887 && insn_group == disp_branch)
46888 {
46889 process_end_window ();
46890 return;
46891 }
46892 }
46893 else if (window_num == 1)
46894 {
46895 window0_list = window_list->prev;
46896 sum = window0_list->window_size + window_list->window_size;
46897 if (sum == 32
46898 || (byte_len + sum) >= 48)
46899 {
46900 process_end_window ();
46901 window_list = dispatch_window_list;
46902 }
46903
46904 add_insn_window (insn, window_list, insn_num_uops);
46905 }
46906 else
46907 gcc_unreachable ();
46908
46909 if (is_end_basic_block (insn_group))
46910 {
46911 /* End of basic block is reached do end-basic-block process. */
46912 process_end_window ();
46913 return;
46914 }
46915 }
46916
46917 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46918
46919 DEBUG_FUNCTION static void
46920 debug_dispatch_window_file (FILE *file, int window_num)
46921 {
46922 dispatch_windows *list;
46923 int i;
46924
46925 if (window_num == 0)
46926 list = dispatch_window_list;
46927 else
46928 list = dispatch_window_list1;
46929
46930 fprintf (file, "Window #%d:\n", list->window_num);
46931 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46932 list->num_insn, list->num_uops, list->window_size);
46933 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46934 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46935
46936 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46937 list->num_stores);
46938 fprintf (file, " insn info:\n");
46939
46940 for (i = 0; i < MAX_INSN; i++)
46941 {
46942 if (!list->window[i].insn)
46943 break;
46944 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46945 i, group_name[list->window[i].group],
46946 i, (void *)list->window[i].insn,
46947 i, list->window[i].path,
46948 i, list->window[i].byte_len,
46949 i, list->window[i].imm_bytes);
46950 }
46951 }
46952
46953 /* Print to stdout a dispatch window. */
46954
46955 DEBUG_FUNCTION void
46956 debug_dispatch_window (int window_num)
46957 {
46958 debug_dispatch_window_file (stdout, window_num);
46959 }
46960
46961 /* Print INSN dispatch information to FILE. */
46962
46963 DEBUG_FUNCTION static void
46964 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
46965 {
46966 int byte_len;
46967 enum insn_path path;
46968 enum dispatch_group group;
46969 int imm_size;
46970 int num_imm_operand;
46971 int num_imm32_operand;
46972 int num_imm64_operand;
46973
46974 if (INSN_CODE (insn) < 0)
46975 return;
46976
46977 byte_len = min_insn_size (insn);
46978 path = get_insn_path (insn);
46979 group = get_insn_group (insn);
46980 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46981 &num_imm64_operand);
46982
46983 fprintf (file, " insn info:\n");
46984 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46985 group_name[group], path, byte_len);
46986 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46987 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46988 }
46989
46990 /* Print to STDERR the status of the ready list with respect to
46991 dispatch windows. */
46992
46993 DEBUG_FUNCTION void
46994 debug_ready_dispatch (void)
46995 {
46996 int i;
46997 int no_ready = number_in_ready ();
46998
46999 fprintf (stdout, "Number of ready: %d\n", no_ready);
47000
47001 for (i = 0; i < no_ready; i++)
47002 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
47003 }
47004
47005 /* This routine is the driver of the dispatch scheduler. */
47006
47007 static void
47008 do_dispatch (rtx_insn *insn, int mode)
47009 {
47010 if (mode == DISPATCH_INIT)
47011 init_dispatch_sched ();
47012 else if (mode == ADD_TO_DISPATCH_WINDOW)
47013 add_to_dispatch_window (insn);
47014 }
47015
47016 /* Return TRUE if Dispatch Scheduling is supported. */
47017
47018 static bool
47019 has_dispatch (rtx_insn *insn, int action)
47020 {
47021 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
47022 && flag_dispatch_scheduler)
47023 switch (action)
47024 {
47025 default:
47026 return false;
47027
47028 case IS_DISPATCH_ON:
47029 return true;
47030 break;
47031
47032 case IS_CMP:
47033 return is_cmp (insn);
47034
47035 case DISPATCH_VIOLATION:
47036 return dispatch_violation ();
47037
47038 case FITS_DISPATCH_WINDOW:
47039 return fits_dispatch_window (insn);
47040 }
47041
47042 return false;
47043 }
47044
47045 /* Implementation of reassociation_width target hook used by
47046 reassoc phase to identify parallelism level in reassociated
47047 tree. Statements tree_code is passed in OPC. Arguments type
47048 is passed in MODE.
47049
47050 Currently parallel reassociation is enabled for Atom
47051 processors only and we set reassociation width to be 2
47052 because Atom may issue up to 2 instructions per cycle.
47053
47054 Return value should be fixed if parallel reassociation is
47055 enabled for other processors. */
47056
47057 static int
47058 ix86_reassociation_width (unsigned int, enum machine_mode mode)
47059 {
47060 int res = 1;
47061
47062 /* Vector part. */
47063 if (VECTOR_MODE_P (mode))
47064 {
47065 if (TARGET_VECTOR_PARALLEL_EXECUTION)
47066 return 2;
47067 else
47068 return 1;
47069 }
47070
47071 /* Scalar part. */
47072 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
47073 res = 2;
47074 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
47075 res = 2;
47076
47077 return res;
47078 }
47079
47080 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
47081 place emms and femms instructions. */
47082
47083 static enum machine_mode
47084 ix86_preferred_simd_mode (enum machine_mode mode)
47085 {
47086 if (!TARGET_SSE)
47087 return word_mode;
47088
47089 switch (mode)
47090 {
47091 case QImode:
47092 return TARGET_AVX512BW ? V64QImode :
47093 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
47094 case HImode:
47095 return TARGET_AVX512BW ? V32HImode :
47096 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
47097 case SImode:
47098 return TARGET_AVX512F ? V16SImode :
47099 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
47100 case DImode:
47101 return TARGET_AVX512F ? V8DImode :
47102 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
47103
47104 case SFmode:
47105 if (TARGET_AVX512F)
47106 return V16SFmode;
47107 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
47108 return V8SFmode;
47109 else
47110 return V4SFmode;
47111
47112 case DFmode:
47113 if (!TARGET_VECTORIZE_DOUBLE)
47114 return word_mode;
47115 else if (TARGET_AVX512F)
47116 return V8DFmode;
47117 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
47118 return V4DFmode;
47119 else if (TARGET_SSE2)
47120 return V2DFmode;
47121 /* FALLTHRU */
47122
47123 default:
47124 return word_mode;
47125 }
47126 }
47127
47128 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
47129 vectors. If AVX512F is enabled then try vectorizing with 512bit,
47130 256bit and 128bit vectors. */
47131
47132 static unsigned int
47133 ix86_autovectorize_vector_sizes (void)
47134 {
47135 return TARGET_AVX512F ? 64 | 32 | 16 :
47136 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
47137 }
47138
47139 \f
47140
47141 /* Return class of registers which could be used for pseudo of MODE
47142 and of class RCLASS for spilling instead of memory. Return NO_REGS
47143 if it is not possible or non-profitable. */
47144 static reg_class_t
47145 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
47146 {
47147 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
47148 && (mode == SImode || (TARGET_64BIT && mode == DImode))
47149 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
47150 return ALL_SSE_REGS;
47151 return NO_REGS;
47152 }
47153
47154 /* Implement targetm.vectorize.init_cost. */
47155
47156 static void *
47157 ix86_init_cost (struct loop *)
47158 {
47159 unsigned *cost = XNEWVEC (unsigned, 3);
47160 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
47161 return cost;
47162 }
47163
47164 /* Implement targetm.vectorize.add_stmt_cost. */
47165
47166 static unsigned
47167 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
47168 struct _stmt_vec_info *stmt_info, int misalign,
47169 enum vect_cost_model_location where)
47170 {
47171 unsigned *cost = (unsigned *) data;
47172 unsigned retval = 0;
47173
47174 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
47175 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
47176
47177 /* Statements in an inner loop relative to the loop being
47178 vectorized are weighted more heavily. The value here is
47179 arbitrary and could potentially be improved with analysis. */
47180 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
47181 count *= 50; /* FIXME. */
47182
47183 retval = (unsigned) (count * stmt_cost);
47184
47185 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
47186 for Silvermont as it has out of order integer pipeline and can execute
47187 2 scalar instruction per tick, but has in order SIMD pipeline. */
47188 if (TARGET_SILVERMONT || TARGET_INTEL)
47189 if (stmt_info && stmt_info->stmt)
47190 {
47191 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
47192 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
47193 retval = (retval * 17) / 10;
47194 }
47195
47196 cost[where] += retval;
47197
47198 return retval;
47199 }
47200
47201 /* Implement targetm.vectorize.finish_cost. */
47202
47203 static void
47204 ix86_finish_cost (void *data, unsigned *prologue_cost,
47205 unsigned *body_cost, unsigned *epilogue_cost)
47206 {
47207 unsigned *cost = (unsigned *) data;
47208 *prologue_cost = cost[vect_prologue];
47209 *body_cost = cost[vect_body];
47210 *epilogue_cost = cost[vect_epilogue];
47211 }
47212
47213 /* Implement targetm.vectorize.destroy_cost_data. */
47214
47215 static void
47216 ix86_destroy_cost_data (void *data)
47217 {
47218 free (data);
47219 }
47220
47221 /* Validate target specific memory model bits in VAL. */
47222
47223 static unsigned HOST_WIDE_INT
47224 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
47225 {
47226 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
47227 bool strong;
47228
47229 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
47230 |MEMMODEL_MASK)
47231 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
47232 {
47233 warning (OPT_Winvalid_memory_model,
47234 "Unknown architecture specific memory model");
47235 return MEMMODEL_SEQ_CST;
47236 }
47237 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
47238 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
47239 {
47240 warning (OPT_Winvalid_memory_model,
47241 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
47242 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
47243 }
47244 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
47245 {
47246 warning (OPT_Winvalid_memory_model,
47247 "HLE_RELEASE not used with RELEASE or stronger memory model");
47248 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
47249 }
47250 return val;
47251 }
47252
47253 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
47254 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
47255 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
47256 or number of vecsize_mangle variants that should be emitted. */
47257
47258 static int
47259 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
47260 struct cgraph_simd_clone *clonei,
47261 tree base_type, int num)
47262 {
47263 int ret = 1;
47264
47265 if (clonei->simdlen
47266 && (clonei->simdlen < 2
47267 || clonei->simdlen > 16
47268 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
47269 {
47270 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47271 "unsupported simdlen %d", clonei->simdlen);
47272 return 0;
47273 }
47274
47275 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
47276 if (TREE_CODE (ret_type) != VOID_TYPE)
47277 switch (TYPE_MODE (ret_type))
47278 {
47279 case QImode:
47280 case HImode:
47281 case SImode:
47282 case DImode:
47283 case SFmode:
47284 case DFmode:
47285 /* case SCmode: */
47286 /* case DCmode: */
47287 break;
47288 default:
47289 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47290 "unsupported return type %qT for simd\n", ret_type);
47291 return 0;
47292 }
47293
47294 tree t;
47295 int i;
47296
47297 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
47298 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
47299 switch (TYPE_MODE (TREE_TYPE (t)))
47300 {
47301 case QImode:
47302 case HImode:
47303 case SImode:
47304 case DImode:
47305 case SFmode:
47306 case DFmode:
47307 /* case SCmode: */
47308 /* case DCmode: */
47309 break;
47310 default:
47311 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
47312 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
47313 return 0;
47314 }
47315
47316 if (clonei->cilk_elemental)
47317 {
47318 /* Parse here processor clause. If not present, default to 'b'. */
47319 clonei->vecsize_mangle = 'b';
47320 }
47321 else if (!TREE_PUBLIC (node->decl))
47322 {
47323 /* If the function isn't exported, we can pick up just one ISA
47324 for the clones. */
47325 if (TARGET_AVX2)
47326 clonei->vecsize_mangle = 'd';
47327 else if (TARGET_AVX)
47328 clonei->vecsize_mangle = 'c';
47329 else
47330 clonei->vecsize_mangle = 'b';
47331 ret = 1;
47332 }
47333 else
47334 {
47335 clonei->vecsize_mangle = "bcd"[num];
47336 ret = 3;
47337 }
47338 switch (clonei->vecsize_mangle)
47339 {
47340 case 'b':
47341 clonei->vecsize_int = 128;
47342 clonei->vecsize_float = 128;
47343 break;
47344 case 'c':
47345 clonei->vecsize_int = 128;
47346 clonei->vecsize_float = 256;
47347 break;
47348 case 'd':
47349 clonei->vecsize_int = 256;
47350 clonei->vecsize_float = 256;
47351 break;
47352 }
47353 if (clonei->simdlen == 0)
47354 {
47355 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
47356 clonei->simdlen = clonei->vecsize_int;
47357 else
47358 clonei->simdlen = clonei->vecsize_float;
47359 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
47360 if (clonei->simdlen > 16)
47361 clonei->simdlen = 16;
47362 }
47363 return ret;
47364 }
47365
47366 /* Add target attribute to SIMD clone NODE if needed. */
47367
47368 static void
47369 ix86_simd_clone_adjust (struct cgraph_node *node)
47370 {
47371 const char *str = NULL;
47372 gcc_assert (node->decl == cfun->decl);
47373 switch (node->simdclone->vecsize_mangle)
47374 {
47375 case 'b':
47376 if (!TARGET_SSE2)
47377 str = "sse2";
47378 break;
47379 case 'c':
47380 if (!TARGET_AVX)
47381 str = "avx";
47382 break;
47383 case 'd':
47384 if (!TARGET_AVX2)
47385 str = "avx2";
47386 break;
47387 default:
47388 gcc_unreachable ();
47389 }
47390 if (str == NULL)
47391 return;
47392 push_cfun (NULL);
47393 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
47394 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
47395 gcc_assert (ok);
47396 pop_cfun ();
47397 ix86_previous_fndecl = NULL_TREE;
47398 ix86_set_current_function (node->decl);
47399 }
47400
47401 /* If SIMD clone NODE can't be used in a vectorized loop
47402 in current function, return -1, otherwise return a badness of using it
47403 (0 if it is most desirable from vecsize_mangle point of view, 1
47404 slightly less desirable, etc.). */
47405
47406 static int
47407 ix86_simd_clone_usable (struct cgraph_node *node)
47408 {
47409 switch (node->simdclone->vecsize_mangle)
47410 {
47411 case 'b':
47412 if (!TARGET_SSE2)
47413 return -1;
47414 if (!TARGET_AVX)
47415 return 0;
47416 return TARGET_AVX2 ? 2 : 1;
47417 case 'c':
47418 if (!TARGET_AVX)
47419 return -1;
47420 return TARGET_AVX2 ? 1 : 0;
47421 break;
47422 case 'd':
47423 if (!TARGET_AVX2)
47424 return -1;
47425 return 0;
47426 default:
47427 gcc_unreachable ();
47428 }
47429 }
47430
47431 /* This function gives out the number of memory references.
47432 This value determines the unrolling factor for
47433 bdver3 and bdver4 architectures. */
47434
47435 static int
47436 ix86_loop_memcount (rtx *x, unsigned *mem_count)
47437 {
47438 if (*x != NULL_RTX && MEM_P (*x))
47439 {
47440 enum machine_mode mode;
47441 unsigned int n_words;
47442
47443 mode = GET_MODE (*x);
47444 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
47445
47446 if (n_words > 4)
47447 (*mem_count)+=2;
47448 else
47449 (*mem_count)+=1;
47450 }
47451 return 0;
47452 }
47453
47454 /* This function adjusts the unroll factor based on
47455 the hardware capabilities. For ex, bdver3 has
47456 a loop buffer which makes unrolling of smaller
47457 loops less important. This function decides the
47458 unroll factor using number of memory references
47459 (value 32 is used) as a heuristic. */
47460
47461 static unsigned
47462 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
47463 {
47464 basic_block *bbs;
47465 rtx_insn *insn;
47466 unsigned i;
47467 unsigned mem_count = 0;
47468
47469 if (!TARGET_ADJUST_UNROLL)
47470 return nunroll;
47471
47472 /* Count the number of memory references within the loop body. */
47473 bbs = get_loop_body (loop);
47474 for (i = 0; i < loop->num_nodes; i++)
47475 {
47476 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
47477 if (NONDEBUG_INSN_P (insn))
47478 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
47479 &mem_count);
47480 }
47481 free (bbs);
47482
47483 if (mem_count && mem_count <=32)
47484 return 32/mem_count;
47485
47486 return nunroll;
47487 }
47488
47489
47490 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
47491
47492 static bool
47493 ix86_float_exceptions_rounding_supported_p (void)
47494 {
47495 /* For x87 floating point with standard excess precision handling,
47496 there is no adddf3 pattern (since x87 floating point only has
47497 XFmode operations) so the default hook implementation gets this
47498 wrong. */
47499 return TARGET_80387 || TARGET_SSE_MATH;
47500 }
47501
47502 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
47503
47504 static void
47505 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
47506 {
47507 if (!TARGET_80387 && !TARGET_SSE_MATH)
47508 return;
47509 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
47510 if (TARGET_80387)
47511 {
47512 tree fenv_index_type = build_index_type (size_int (6));
47513 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
47514 tree fenv_var = create_tmp_var (fenv_type, NULL);
47515 mark_addressable (fenv_var);
47516 tree fenv_ptr = build_pointer_type (fenv_type);
47517 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
47518 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
47519 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
47520 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
47521 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
47522 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
47523 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
47524 tree hold_fnclex = build_call_expr (fnclex, 0);
47525 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
47526 hold_fnclex);
47527 *clear = build_call_expr (fnclex, 0);
47528 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
47529 tree fnstsw_call = build_call_expr (fnstsw, 0);
47530 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
47531 sw_var, fnstsw_call);
47532 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
47533 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
47534 exceptions_var, exceptions_x87);
47535 *update = build2 (COMPOUND_EXPR, integer_type_node,
47536 sw_mod, update_mod);
47537 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
47538 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47539 }
47540 if (TARGET_SSE_MATH)
47541 {
47542 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47543 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47544 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47545 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47546 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47547 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47548 mxcsr_orig_var, stmxcsr_hold_call);
47549 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47550 mxcsr_orig_var,
47551 build_int_cst (unsigned_type_node, 0x1f80));
47552 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47553 build_int_cst (unsigned_type_node, 0xffffffc0));
47554 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47555 mxcsr_mod_var, hold_mod_val);
47556 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47557 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47558 hold_assign_orig, hold_assign_mod);
47559 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47560 ldmxcsr_hold_call);
47561 if (*hold)
47562 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47563 else
47564 *hold = hold_all;
47565 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47566 if (*clear)
47567 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47568 ldmxcsr_clear_call);
47569 else
47570 *clear = ldmxcsr_clear_call;
47571 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47572 tree exceptions_sse = fold_convert (integer_type_node,
47573 stxmcsr_update_call);
47574 if (*update)
47575 {
47576 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47577 exceptions_var, exceptions_sse);
47578 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47579 exceptions_var, exceptions_mod);
47580 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47581 exceptions_assign);
47582 }
47583 else
47584 *update = build2 (MODIFY_EXPR, integer_type_node,
47585 exceptions_var, exceptions_sse);
47586 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47587 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47588 ldmxcsr_update_call);
47589 }
47590 tree atomic_feraiseexcept
47591 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47592 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47593 1, exceptions_var);
47594 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47595 atomic_feraiseexcept_call);
47596 }
47597
47598 /* Initialize the GCC target structure. */
47599 #undef TARGET_RETURN_IN_MEMORY
47600 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47601
47602 #undef TARGET_LEGITIMIZE_ADDRESS
47603 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47604
47605 #undef TARGET_ATTRIBUTE_TABLE
47606 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47607 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47608 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47609 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47610 # undef TARGET_MERGE_DECL_ATTRIBUTES
47611 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47612 #endif
47613
47614 #undef TARGET_COMP_TYPE_ATTRIBUTES
47615 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47616
47617 #undef TARGET_INIT_BUILTINS
47618 #define TARGET_INIT_BUILTINS ix86_init_builtins
47619 #undef TARGET_BUILTIN_DECL
47620 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47621 #undef TARGET_EXPAND_BUILTIN
47622 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47623
47624 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47625 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47626 ix86_builtin_vectorized_function
47627
47628 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47629 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47630
47631 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47632 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47633
47634 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47635 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47636
47637 #undef TARGET_BUILTIN_RECIPROCAL
47638 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47639
47640 #undef TARGET_ASM_FUNCTION_EPILOGUE
47641 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47642
47643 #undef TARGET_ENCODE_SECTION_INFO
47644 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47645 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47646 #else
47647 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47648 #endif
47649
47650 #undef TARGET_ASM_OPEN_PAREN
47651 #define TARGET_ASM_OPEN_PAREN ""
47652 #undef TARGET_ASM_CLOSE_PAREN
47653 #define TARGET_ASM_CLOSE_PAREN ""
47654
47655 #undef TARGET_ASM_BYTE_OP
47656 #define TARGET_ASM_BYTE_OP ASM_BYTE
47657
47658 #undef TARGET_ASM_ALIGNED_HI_OP
47659 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47660 #undef TARGET_ASM_ALIGNED_SI_OP
47661 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47662 #ifdef ASM_QUAD
47663 #undef TARGET_ASM_ALIGNED_DI_OP
47664 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47665 #endif
47666
47667 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47668 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47669
47670 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47671 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47672
47673 #undef TARGET_ASM_UNALIGNED_HI_OP
47674 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47675 #undef TARGET_ASM_UNALIGNED_SI_OP
47676 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47677 #undef TARGET_ASM_UNALIGNED_DI_OP
47678 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47679
47680 #undef TARGET_PRINT_OPERAND
47681 #define TARGET_PRINT_OPERAND ix86_print_operand
47682 #undef TARGET_PRINT_OPERAND_ADDRESS
47683 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47684 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47685 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47686 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47687 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47688
47689 #undef TARGET_SCHED_INIT_GLOBAL
47690 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47691 #undef TARGET_SCHED_ADJUST_COST
47692 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47693 #undef TARGET_SCHED_ISSUE_RATE
47694 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47695 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47696 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47697 ia32_multipass_dfa_lookahead
47698 #undef TARGET_SCHED_MACRO_FUSION_P
47699 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47700 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47701 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47702
47703 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47704 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47705
47706 #undef TARGET_MEMMODEL_CHECK
47707 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47708
47709 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47710 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47711
47712 #ifdef HAVE_AS_TLS
47713 #undef TARGET_HAVE_TLS
47714 #define TARGET_HAVE_TLS true
47715 #endif
47716 #undef TARGET_CANNOT_FORCE_CONST_MEM
47717 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47718 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47719 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47720
47721 #undef TARGET_DELEGITIMIZE_ADDRESS
47722 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47723
47724 #undef TARGET_MS_BITFIELD_LAYOUT_P
47725 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47726
47727 #if TARGET_MACHO
47728 #undef TARGET_BINDS_LOCAL_P
47729 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47730 #endif
47731 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47732 #undef TARGET_BINDS_LOCAL_P
47733 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47734 #endif
47735
47736 #undef TARGET_ASM_OUTPUT_MI_THUNK
47737 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47738 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47739 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47740
47741 #undef TARGET_ASM_FILE_START
47742 #define TARGET_ASM_FILE_START x86_file_start
47743
47744 #undef TARGET_OPTION_OVERRIDE
47745 #define TARGET_OPTION_OVERRIDE ix86_option_override
47746
47747 #undef TARGET_REGISTER_MOVE_COST
47748 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47749 #undef TARGET_MEMORY_MOVE_COST
47750 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47751 #undef TARGET_RTX_COSTS
47752 #define TARGET_RTX_COSTS ix86_rtx_costs
47753 #undef TARGET_ADDRESS_COST
47754 #define TARGET_ADDRESS_COST ix86_address_cost
47755
47756 #undef TARGET_FIXED_CONDITION_CODE_REGS
47757 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47758 #undef TARGET_CC_MODES_COMPATIBLE
47759 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47760
47761 #undef TARGET_MACHINE_DEPENDENT_REORG
47762 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47763
47764 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47765 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47766
47767 #undef TARGET_BUILD_BUILTIN_VA_LIST
47768 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47769
47770 #undef TARGET_FOLD_BUILTIN
47771 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47772
47773 #undef TARGET_COMPARE_VERSION_PRIORITY
47774 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47775
47776 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47777 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47778 ix86_generate_version_dispatcher_body
47779
47780 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47781 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47782 ix86_get_function_versions_dispatcher
47783
47784 #undef TARGET_ENUM_VA_LIST_P
47785 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47786
47787 #undef TARGET_FN_ABI_VA_LIST
47788 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47789
47790 #undef TARGET_CANONICAL_VA_LIST_TYPE
47791 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47792
47793 #undef TARGET_EXPAND_BUILTIN_VA_START
47794 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47795
47796 #undef TARGET_MD_ASM_CLOBBERS
47797 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47798
47799 #undef TARGET_PROMOTE_PROTOTYPES
47800 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47801 #undef TARGET_SETUP_INCOMING_VARARGS
47802 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47803 #undef TARGET_MUST_PASS_IN_STACK
47804 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47805 #undef TARGET_FUNCTION_ARG_ADVANCE
47806 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47807 #undef TARGET_FUNCTION_ARG
47808 #define TARGET_FUNCTION_ARG ix86_function_arg
47809 #undef TARGET_INIT_PIC_REG
47810 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
47811 #undef TARGET_USE_PSEUDO_PIC_REG
47812 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
47813 #undef TARGET_FUNCTION_ARG_BOUNDARY
47814 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47815 #undef TARGET_PASS_BY_REFERENCE
47816 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47817 #undef TARGET_INTERNAL_ARG_POINTER
47818 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47819 #undef TARGET_UPDATE_STACK_BOUNDARY
47820 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47821 #undef TARGET_GET_DRAP_RTX
47822 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47823 #undef TARGET_STRICT_ARGUMENT_NAMING
47824 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47825 #undef TARGET_STATIC_CHAIN
47826 #define TARGET_STATIC_CHAIN ix86_static_chain
47827 #undef TARGET_TRAMPOLINE_INIT
47828 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47829 #undef TARGET_RETURN_POPS_ARGS
47830 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47831
47832 #undef TARGET_LEGITIMATE_COMBINED_INSN
47833 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47834
47835 #undef TARGET_ASAN_SHADOW_OFFSET
47836 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47837
47838 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47839 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47840
47841 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47842 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47843
47844 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47845 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47846
47847 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
47848 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
47849 ix86_libgcc_floating_mode_supported_p
47850
47851 #undef TARGET_C_MODE_FOR_SUFFIX
47852 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47853
47854 #ifdef HAVE_AS_TLS
47855 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47856 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47857 #endif
47858
47859 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47860 #undef TARGET_INSERT_ATTRIBUTES
47861 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47862 #endif
47863
47864 #undef TARGET_MANGLE_TYPE
47865 #define TARGET_MANGLE_TYPE ix86_mangle_type
47866
47867 #if !TARGET_MACHO
47868 #undef TARGET_STACK_PROTECT_FAIL
47869 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47870 #endif
47871
47872 #undef TARGET_FUNCTION_VALUE
47873 #define TARGET_FUNCTION_VALUE ix86_function_value
47874
47875 #undef TARGET_FUNCTION_VALUE_REGNO_P
47876 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47877
47878 #undef TARGET_PROMOTE_FUNCTION_MODE
47879 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47880
47881 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47882 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47883
47884 #undef TARGET_INSTANTIATE_DECLS
47885 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47886
47887 #undef TARGET_SECONDARY_RELOAD
47888 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47889
47890 #undef TARGET_CLASS_MAX_NREGS
47891 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47892
47893 #undef TARGET_PREFERRED_RELOAD_CLASS
47894 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47895 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47896 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47897 #undef TARGET_CLASS_LIKELY_SPILLED_P
47898 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47899
47900 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47901 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47902 ix86_builtin_vectorization_cost
47903 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47904 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47905 ix86_vectorize_vec_perm_const_ok
47906 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47907 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47908 ix86_preferred_simd_mode
47909 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47910 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47911 ix86_autovectorize_vector_sizes
47912 #undef TARGET_VECTORIZE_INIT_COST
47913 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47914 #undef TARGET_VECTORIZE_ADD_STMT_COST
47915 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47916 #undef TARGET_VECTORIZE_FINISH_COST
47917 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47918 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47919 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47920
47921 #undef TARGET_SET_CURRENT_FUNCTION
47922 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47923
47924 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47925 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47926
47927 #undef TARGET_OPTION_SAVE
47928 #define TARGET_OPTION_SAVE ix86_function_specific_save
47929
47930 #undef TARGET_OPTION_RESTORE
47931 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47932
47933 #undef TARGET_OPTION_PRINT
47934 #define TARGET_OPTION_PRINT ix86_function_specific_print
47935
47936 #undef TARGET_OPTION_FUNCTION_VERSIONS
47937 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47938
47939 #undef TARGET_CAN_INLINE_P
47940 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47941
47942 #undef TARGET_EXPAND_TO_RTL_HOOK
47943 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47944
47945 #undef TARGET_LEGITIMATE_ADDRESS_P
47946 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47947
47948 #undef TARGET_LRA_P
47949 #define TARGET_LRA_P hook_bool_void_true
47950
47951 #undef TARGET_REGISTER_PRIORITY
47952 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47953
47954 #undef TARGET_REGISTER_USAGE_LEVELING_P
47955 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47956
47957 #undef TARGET_LEGITIMATE_CONSTANT_P
47958 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47959
47960 #undef TARGET_FRAME_POINTER_REQUIRED
47961 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47962
47963 #undef TARGET_CAN_ELIMINATE
47964 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47965
47966 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47967 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47968
47969 #undef TARGET_ASM_CODE_END
47970 #define TARGET_ASM_CODE_END ix86_code_end
47971
47972 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47973 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47974
47975 #if TARGET_MACHO
47976 #undef TARGET_INIT_LIBFUNCS
47977 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47978 #endif
47979
47980 #undef TARGET_LOOP_UNROLL_ADJUST
47981 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47982
47983 #undef TARGET_SPILL_CLASS
47984 #define TARGET_SPILL_CLASS ix86_spill_class
47985
47986 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47987 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47988 ix86_simd_clone_compute_vecsize_and_simdlen
47989
47990 #undef TARGET_SIMD_CLONE_ADJUST
47991 #define TARGET_SIMD_CLONE_ADJUST \
47992 ix86_simd_clone_adjust
47993
47994 #undef TARGET_SIMD_CLONE_USABLE
47995 #define TARGET_SIMD_CLONE_USABLE \
47996 ix86_simd_clone_usable
47997
47998 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47999 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
48000 ix86_float_exceptions_rounding_supported_p
48001
48002 #undef TARGET_MODE_EMIT
48003 #define TARGET_MODE_EMIT ix86_emit_mode_set
48004
48005 #undef TARGET_MODE_NEEDED
48006 #define TARGET_MODE_NEEDED ix86_mode_needed
48007
48008 #undef TARGET_MODE_AFTER
48009 #define TARGET_MODE_AFTER ix86_mode_after
48010
48011 #undef TARGET_MODE_ENTRY
48012 #define TARGET_MODE_ENTRY ix86_mode_entry
48013
48014 #undef TARGET_MODE_EXIT
48015 #define TARGET_MODE_EXIT ix86_mode_exit
48016
48017 #undef TARGET_MODE_PRIORITY
48018 #define TARGET_MODE_PRIORITY ix86_mode_priority
48019
48020 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
48021 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
48022
48023 struct gcc_target targetm = TARGET_INITIALIZER;
48024 \f
48025 #include "gt-i386.h"