AVX-512. Add shuffles (pd, 32x4, etc.).
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "hash-table.h"
56 #include "vec.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
61 #include "tree-eh.h"
62 #include "gimple-expr.h"
63 #include "is-a.h"
64 #include "gimple.h"
65 #include "gimplify.h"
66 #include "cfgloop.h"
67 #include "dwarf2.h"
68 #include "df.h"
69 #include "tm-constrs.h"
70 #include "params.h"
71 #include "cselib.h"
72 #include "debug.h"
73 #include "sched-int.h"
74 #include "sbitmap.h"
75 #include "fibheap.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "wide-int.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 TV_NONE, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish, /* todo_flags_finish */
2531 };
2532
2533 class pass_insert_vzeroupper : public rtl_opt_pass
2534 {
2535 public:
2536 pass_insert_vzeroupper(gcc::context *ctxt)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2538 {}
2539
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2542 {
2543 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2544 }
2545
2546 virtual unsigned int execute (function *)
2547 {
2548 return rest_of_handle_insert_vzeroupper ();
2549 }
2550
2551 }; // class pass_insert_vzeroupper
2552
2553 } // anon namespace
2554
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2557 {
2558 return new pass_insert_vzeroupper (ctxt);
2559 }
2560
2561 /* Return true if a red-zone is in use. */
2562
2563 static inline bool
2564 ix86_using_red_zone (void)
2565 {
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 }
2568 \f
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2571
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2576 {
2577 struct ix86_target_opts
2578 {
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2581 };
2582
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2586 {
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2596 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2597 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2598 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2599 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2600 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2601 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2602 { "-msse3", OPTION_MASK_ISA_SSE3 },
2603 { "-msse2", OPTION_MASK_ISA_SSE2 },
2604 { "-msse", OPTION_MASK_ISA_SSE },
2605 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2606 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2607 { "-mmmx", OPTION_MASK_ISA_MMX },
2608 { "-mabm", OPTION_MASK_ISA_ABM },
2609 { "-mbmi", OPTION_MASK_ISA_BMI },
2610 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2611 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2612 { "-mhle", OPTION_MASK_ISA_HLE },
2613 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2614 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2615 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2616 { "-madx", OPTION_MASK_ISA_ADX },
2617 { "-mtbm", OPTION_MASK_ISA_TBM },
2618 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2619 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2620 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2621 { "-maes", OPTION_MASK_ISA_AES },
2622 { "-msha", OPTION_MASK_ISA_SHA },
2623 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2624 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2625 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2626 { "-mf16c", OPTION_MASK_ISA_F16C },
2627 { "-mrtm", OPTION_MASK_ISA_RTM },
2628 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2629 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2630 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2631 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2632 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2633 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2634 };
2635
2636 /* Flag options. */
2637 static struct ix86_target_opts flag_opts[] =
2638 {
2639 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2640 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2641 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2642 { "-m80387", MASK_80387 },
2643 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2644 { "-malign-double", MASK_ALIGN_DOUBLE },
2645 { "-mcld", MASK_CLD },
2646 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2647 { "-mieee-fp", MASK_IEEE_FP },
2648 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2649 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2650 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2651 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2652 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2653 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2654 { "-mno-red-zone", MASK_NO_RED_ZONE },
2655 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2656 { "-mrecip", MASK_RECIP },
2657 { "-mrtd", MASK_RTD },
2658 { "-msseregparm", MASK_SSEREGPARM },
2659 { "-mstack-arg-probe", MASK_STACK_PROBE },
2660 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2661 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2662 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2663 { "-mvzeroupper", MASK_VZEROUPPER },
2664 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2665 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2666 { "-mprefer-avx128", MASK_PREFER_AVX128},
2667 };
2668
2669 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2670
2671 char isa_other[40];
2672 char target_other[40];
2673 unsigned num = 0;
2674 unsigned i, j;
2675 char *ret;
2676 char *ptr;
2677 size_t len;
2678 size_t line_len;
2679 size_t sep_len;
2680 const char *abi;
2681
2682 memset (opts, '\0', sizeof (opts));
2683
2684 /* Add -march= option. */
2685 if (arch)
2686 {
2687 opts[num][0] = "-march=";
2688 opts[num++][1] = arch;
2689 }
2690
2691 /* Add -mtune= option. */
2692 if (tune)
2693 {
2694 opts[num][0] = "-mtune=";
2695 opts[num++][1] = tune;
2696 }
2697
2698 /* Add -m32/-m64/-mx32. */
2699 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2700 {
2701 if ((isa & OPTION_MASK_ABI_64) != 0)
2702 abi = "-m64";
2703 else
2704 abi = "-mx32";
2705 isa &= ~ (OPTION_MASK_ISA_64BIT
2706 | OPTION_MASK_ABI_64
2707 | OPTION_MASK_ABI_X32);
2708 }
2709 else
2710 abi = "-m32";
2711 opts[num++][0] = abi;
2712
2713 /* Pick out the options in isa options. */
2714 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2715 {
2716 if ((isa & isa_opts[i].mask) != 0)
2717 {
2718 opts[num++][0] = isa_opts[i].option;
2719 isa &= ~ isa_opts[i].mask;
2720 }
2721 }
2722
2723 if (isa && add_nl_p)
2724 {
2725 opts[num++][0] = isa_other;
2726 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2727 isa);
2728 }
2729
2730 /* Add flag options. */
2731 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2732 {
2733 if ((flags & flag_opts[i].mask) != 0)
2734 {
2735 opts[num++][0] = flag_opts[i].option;
2736 flags &= ~ flag_opts[i].mask;
2737 }
2738 }
2739
2740 if (flags && add_nl_p)
2741 {
2742 opts[num++][0] = target_other;
2743 sprintf (target_other, "(other flags: %#x)", flags);
2744 }
2745
2746 /* Add -fpmath= option. */
2747 if (fpmath)
2748 {
2749 opts[num][0] = "-mfpmath=";
2750 switch ((int) fpmath)
2751 {
2752 case FPMATH_387:
2753 opts[num++][1] = "387";
2754 break;
2755
2756 case FPMATH_SSE:
2757 opts[num++][1] = "sse";
2758 break;
2759
2760 case FPMATH_387 | FPMATH_SSE:
2761 opts[num++][1] = "sse+387";
2762 break;
2763
2764 default:
2765 gcc_unreachable ();
2766 }
2767 }
2768
2769 /* Any options? */
2770 if (num == 0)
2771 return NULL;
2772
2773 gcc_assert (num < ARRAY_SIZE (opts));
2774
2775 /* Size the string. */
2776 len = 0;
2777 sep_len = (add_nl_p) ? 3 : 1;
2778 for (i = 0; i < num; i++)
2779 {
2780 len += sep_len;
2781 for (j = 0; j < 2; j++)
2782 if (opts[i][j])
2783 len += strlen (opts[i][j]);
2784 }
2785
2786 /* Build the string. */
2787 ret = ptr = (char *) xmalloc (len);
2788 line_len = 0;
2789
2790 for (i = 0; i < num; i++)
2791 {
2792 size_t len2[2];
2793
2794 for (j = 0; j < 2; j++)
2795 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2796
2797 if (i != 0)
2798 {
2799 *ptr++ = ' ';
2800 line_len++;
2801
2802 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2803 {
2804 *ptr++ = '\\';
2805 *ptr++ = '\n';
2806 line_len = 0;
2807 }
2808 }
2809
2810 for (j = 0; j < 2; j++)
2811 if (opts[i][j])
2812 {
2813 memcpy (ptr, opts[i][j], len2[j]);
2814 ptr += len2[j];
2815 line_len += len2[j];
2816 }
2817 }
2818
2819 *ptr = '\0';
2820 gcc_assert (ret + len >= ptr);
2821
2822 return ret;
2823 }
2824
2825 /* Return true, if profiling code should be emitted before
2826 prologue. Otherwise it returns false.
2827 Note: For x86 with "hotfix" it is sorried. */
2828 static bool
2829 ix86_profile_before_prologue (void)
2830 {
2831 return flag_fentry != 0;
2832 }
2833
2834 /* Function that is callable from the debugger to print the current
2835 options. */
2836 void ATTRIBUTE_UNUSED
2837 ix86_debug_options (void)
2838 {
2839 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2840 ix86_arch_string, ix86_tune_string,
2841 ix86_fpmath, true);
2842
2843 if (opts)
2844 {
2845 fprintf (stderr, "%s\n\n", opts);
2846 free (opts);
2847 }
2848 else
2849 fputs ("<no options>\n\n", stderr);
2850
2851 return;
2852 }
2853
2854 static const char *stringop_alg_names[] = {
2855 #define DEF_ENUM
2856 #define DEF_ALG(alg, name) #name,
2857 #include "stringop.def"
2858 #undef DEF_ENUM
2859 #undef DEF_ALG
2860 };
2861
2862 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2863 The string is of the following form (or comma separated list of it):
2864
2865 strategy_alg:max_size:[align|noalign]
2866
2867 where the full size range for the strategy is either [0, max_size] or
2868 [min_size, max_size], in which min_size is the max_size + 1 of the
2869 preceding range. The last size range must have max_size == -1.
2870
2871 Examples:
2872
2873 1.
2874 -mmemcpy-strategy=libcall:-1:noalign
2875
2876 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2877
2878
2879 2.
2880 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2881
2882 This is to tell the compiler to use the following strategy for memset
2883 1) when the expected size is between [1, 16], use rep_8byte strategy;
2884 2) when the size is between [17, 2048], use vector_loop;
2885 3) when the size is > 2048, use libcall. */
2886
2887 struct stringop_size_range
2888 {
2889 int max;
2890 stringop_alg alg;
2891 bool noalign;
2892 };
2893
2894 static void
2895 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2896 {
2897 const struct stringop_algs *default_algs;
2898 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2899 char *curr_range_str, *next_range_str;
2900 int i = 0, n = 0;
2901
2902 if (is_memset)
2903 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2904 else
2905 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2906
2907 curr_range_str = strategy_str;
2908
2909 do
2910 {
2911 int maxs;
2912 char alg_name[128];
2913 char align[16];
2914 next_range_str = strchr (curr_range_str, ',');
2915 if (next_range_str)
2916 *next_range_str++ = '\0';
2917
2918 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2919 alg_name, &maxs, align))
2920 {
2921 error ("wrong arg %s to option %s", curr_range_str,
2922 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2923 return;
2924 }
2925
2926 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2927 {
2928 error ("size ranges of option %s should be increasing",
2929 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2930 return;
2931 }
2932
2933 for (i = 0; i < last_alg; i++)
2934 if (!strcmp (alg_name, stringop_alg_names[i]))
2935 break;
2936
2937 if (i == last_alg)
2938 {
2939 error ("wrong stringop strategy name %s specified for option %s",
2940 alg_name,
2941 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2942 return;
2943 }
2944
2945 input_ranges[n].max = maxs;
2946 input_ranges[n].alg = (stringop_alg) i;
2947 if (!strcmp (align, "align"))
2948 input_ranges[n].noalign = false;
2949 else if (!strcmp (align, "noalign"))
2950 input_ranges[n].noalign = true;
2951 else
2952 {
2953 error ("unknown alignment %s specified for option %s",
2954 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2955 return;
2956 }
2957 n++;
2958 curr_range_str = next_range_str;
2959 }
2960 while (curr_range_str);
2961
2962 if (input_ranges[n - 1].max != -1)
2963 {
2964 error ("the max value for the last size range should be -1"
2965 " for option %s",
2966 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2967 return;
2968 }
2969
2970 if (n > MAX_STRINGOP_ALGS)
2971 {
2972 error ("too many size ranges specified in option %s",
2973 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2974 return;
2975 }
2976
2977 /* Now override the default algs array. */
2978 for (i = 0; i < n; i++)
2979 {
2980 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2981 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2982 = input_ranges[i].alg;
2983 *const_cast<int *>(&default_algs->size[i].noalign)
2984 = input_ranges[i].noalign;
2985 }
2986 }
2987
2988 \f
2989 /* parse -mtune-ctrl= option. When DUMP is true,
2990 print the features that are explicitly set. */
2991
2992 static void
2993 parse_mtune_ctrl_str (bool dump)
2994 {
2995 if (!ix86_tune_ctrl_string)
2996 return;
2997
2998 char *next_feature_string = NULL;
2999 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3000 char *orig = curr_feature_string;
3001 int i;
3002 do
3003 {
3004 bool clear = false;
3005
3006 next_feature_string = strchr (curr_feature_string, ',');
3007 if (next_feature_string)
3008 *next_feature_string++ = '\0';
3009 if (*curr_feature_string == '^')
3010 {
3011 curr_feature_string++;
3012 clear = true;
3013 }
3014 for (i = 0; i < X86_TUNE_LAST; i++)
3015 {
3016 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3017 {
3018 ix86_tune_features[i] = !clear;
3019 if (dump)
3020 fprintf (stderr, "Explicitly %s feature %s\n",
3021 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3022 break;
3023 }
3024 }
3025 if (i == X86_TUNE_LAST)
3026 error ("Unknown parameter to option -mtune-ctrl: %s",
3027 clear ? curr_feature_string - 1 : curr_feature_string);
3028 curr_feature_string = next_feature_string;
3029 }
3030 while (curr_feature_string);
3031 free (orig);
3032 }
3033
3034 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3035 processor type. */
3036
3037 static void
3038 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3039 {
3040 unsigned int ix86_tune_mask = 1u << ix86_tune;
3041 int i;
3042
3043 for (i = 0; i < X86_TUNE_LAST; ++i)
3044 {
3045 if (ix86_tune_no_default)
3046 ix86_tune_features[i] = 0;
3047 else
3048 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3049 }
3050
3051 if (dump)
3052 {
3053 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3054 for (i = 0; i < X86_TUNE_LAST; i++)
3055 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3056 ix86_tune_features[i] ? "on" : "off");
3057 }
3058
3059 parse_mtune_ctrl_str (dump);
3060 }
3061
3062
3063 /* Override various settings based on options. If MAIN_ARGS_P, the
3064 options are from the command line, otherwise they are from
3065 attributes. */
3066
3067 static void
3068 ix86_option_override_internal (bool main_args_p,
3069 struct gcc_options *opts,
3070 struct gcc_options *opts_set)
3071 {
3072 int i;
3073 unsigned int ix86_arch_mask;
3074 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3075 const char *prefix;
3076 const char *suffix;
3077 const char *sw;
3078
3079 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3080 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3081 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3082 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3083 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3084 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3085 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3086 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3087 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3088 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3089 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3090 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3091 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3092 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3093 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3094 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3095 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3096 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3097 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3098 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3099 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3100 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3101 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3102 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3103 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3104 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3105 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3106 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3107 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3108 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3109 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3110 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3111 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3112 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3113 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3114 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3115 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3116 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3117 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3118 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3119 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3120 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3121 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3122 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3123 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3124 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3125 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3126 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3127 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3129 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3130 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3131
3132 #define PTA_CORE2 \
3133 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3134 | PTA_CX16 | PTA_FXSR)
3135 #define PTA_NEHALEM \
3136 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3137 #define PTA_WESTMERE \
3138 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3139 #define PTA_SANDYBRIDGE \
3140 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3141 #define PTA_IVYBRIDGE \
3142 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3143 #define PTA_HASWELL \
3144 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3145 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3146 #define PTA_BROADWELL \
3147 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3148 #define PTA_BONNELL \
3149 (PTA_CORE2 | PTA_MOVBE)
3150 #define PTA_SILVERMONT \
3151 (PTA_WESTMERE | PTA_MOVBE)
3152
3153 /* if this reaches 64, need to widen struct pta flags below */
3154
3155 static struct pta
3156 {
3157 const char *const name; /* processor name or nickname. */
3158 const enum processor_type processor;
3159 const enum attr_cpu schedule;
3160 const unsigned HOST_WIDE_INT flags;
3161 }
3162 const processor_alias_table[] =
3163 {
3164 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3165 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3166 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3167 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3168 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3169 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3170 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3171 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3172 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3175 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3176 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3177 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_FXSR},
3179 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3180 PTA_MMX | PTA_SSE | PTA_FXSR},
3181 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3184 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3185 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3187 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3188 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3189 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3190 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3191 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3192 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3193 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3194 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3195 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3196 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_SANDYBRIDGE},
3198 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_SANDYBRIDGE},
3200 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3201 PTA_IVYBRIDGE},
3202 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3203 PTA_IVYBRIDGE},
3204 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3205 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3206 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3207 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3208 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3209 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3210 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3211 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3212 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3215 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3216 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3217 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3219 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3221 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3226 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3227 {"x86-64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3229 {"k8", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"opteron", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3240 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3241 {"athlon64", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3252 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3253 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3255 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3256 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3261 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3262 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3263 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3264 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3265 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3266 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3267 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3270 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3271 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3272 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3273 | PTA_XSAVEOPT | PTA_FSGSBASE},
3274 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3278 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3279 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3280 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3281 | PTA_MOVBE},
3282 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE},
3286 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3290 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3291 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3292
3293 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3294 PTA_64BIT
3295 | PTA_HLE /* flags are only used for -march switch. */ },
3296 };
3297
3298 /* -mrecip options. */
3299 static struct
3300 {
3301 const char *string; /* option name */
3302 unsigned int mask; /* mask bits to set */
3303 }
3304 const recip_options[] =
3305 {
3306 { "all", RECIP_MASK_ALL },
3307 { "none", RECIP_MASK_NONE },
3308 { "div", RECIP_MASK_DIV },
3309 { "sqrt", RECIP_MASK_SQRT },
3310 { "vec-div", RECIP_MASK_VEC_DIV },
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3312 };
3313
3314 int const pta_size = ARRAY_SIZE (processor_alias_table);
3315
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3318 if (main_args_p)
3319 {
3320 prefix = "-m";
3321 suffix = "";
3322 sw = "switch";
3323 }
3324 else
3325 {
3326 prefix = "option(\"";
3327 suffix = "\")";
3328 sw = "attribute";
3329 }
3330
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3335 #ifdef TARGET_BI_ARCH
3336 else
3337 {
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3342 -mx32. */
3343 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 #else
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3350 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3351 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3352 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3353 #endif
3354 }
3355 #endif
3356
3357 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3358 {
3359 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3360 OPTION_MASK_ABI_64 for TARGET_X32. */
3361 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3362 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3363 }
3364 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3365 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3366 | OPTION_MASK_ABI_X32
3367 | OPTION_MASK_ABI_64);
3368 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3369 {
3370 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3371 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3372 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3373 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3374 }
3375
3376 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3377 SUBTARGET_OVERRIDE_OPTIONS;
3378 #endif
3379
3380 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3381 SUBSUBTARGET_OVERRIDE_OPTIONS;
3382 #endif
3383
3384 /* -fPIC is the default for x86_64. */
3385 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3386 opts->x_flag_pic = 2;
3387
3388 /* Need to check -mtune=generic first. */
3389 if (opts->x_ix86_tune_string)
3390 {
3391 /* As special support for cross compilers we read -mtune=native
3392 as -mtune=generic. With native compilers we won't see the
3393 -mtune=native, as it was changed by the driver. */
3394 if (!strcmp (opts->x_ix86_tune_string, "native"))
3395 {
3396 opts->x_ix86_tune_string = "generic";
3397 }
3398 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3399 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3400 "%stune=k8%s or %stune=generic%s instead as appropriate",
3401 prefix, suffix, prefix, suffix, prefix, suffix);
3402 }
3403 else
3404 {
3405 if (opts->x_ix86_arch_string)
3406 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3407 if (!opts->x_ix86_tune_string)
3408 {
3409 opts->x_ix86_tune_string
3410 = processor_target_table[TARGET_CPU_DEFAULT].name;
3411 ix86_tune_defaulted = 1;
3412 }
3413
3414 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3415 or defaulted. We need to use a sensible tune option. */
3416 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3417 {
3418 opts->x_ix86_tune_string = "generic";
3419 }
3420 }
3421
3422 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3423 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3424 {
3425 /* rep; movq isn't available in 32-bit code. */
3426 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3427 opts->x_ix86_stringop_alg = no_stringop;
3428 }
3429
3430 if (!opts->x_ix86_arch_string)
3431 opts->x_ix86_arch_string
3432 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3433 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3434 else
3435 ix86_arch_specified = 1;
3436
3437 if (opts_set->x_ix86_pmode)
3438 {
3439 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3440 && opts->x_ix86_pmode == PMODE_SI)
3441 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3442 && opts->x_ix86_pmode == PMODE_DI))
3443 error ("address mode %qs not supported in the %s bit mode",
3444 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3445 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3446 }
3447 else
3448 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3449 ? PMODE_DI : PMODE_SI;
3450
3451 if (!opts_set->x_ix86_abi)
3452 opts->x_ix86_abi = DEFAULT_ABI;
3453
3454 /* For targets using ms ABI enable ms-extensions, if not
3455 explicit turned off. For non-ms ABI we turn off this
3456 option. */
3457 if (!opts_set->x_flag_ms_extensions)
3458 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3459
3460 if (opts_set->x_ix86_cmodel)
3461 {
3462 switch (opts->x_ix86_cmodel)
3463 {
3464 case CM_SMALL:
3465 case CM_SMALL_PIC:
3466 if (opts->x_flag_pic)
3467 opts->x_ix86_cmodel = CM_SMALL_PIC;
3468 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3469 error ("code model %qs not supported in the %s bit mode",
3470 "small", "32");
3471 break;
3472
3473 case CM_MEDIUM:
3474 case CM_MEDIUM_PIC:
3475 if (opts->x_flag_pic)
3476 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3477 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3478 error ("code model %qs not supported in the %s bit mode",
3479 "medium", "32");
3480 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3481 error ("code model %qs not supported in x32 mode",
3482 "medium");
3483 break;
3484
3485 case CM_LARGE:
3486 case CM_LARGE_PIC:
3487 if (opts->x_flag_pic)
3488 opts->x_ix86_cmodel = CM_LARGE_PIC;
3489 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3490 error ("code model %qs not supported in the %s bit mode",
3491 "large", "32");
3492 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3493 error ("code model %qs not supported in x32 mode",
3494 "large");
3495 break;
3496
3497 case CM_32:
3498 if (opts->x_flag_pic)
3499 error ("code model %s does not support PIC mode", "32");
3500 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3501 error ("code model %qs not supported in the %s bit mode",
3502 "32", "64");
3503 break;
3504
3505 case CM_KERNEL:
3506 if (opts->x_flag_pic)
3507 {
3508 error ("code model %s does not support PIC mode", "kernel");
3509 opts->x_ix86_cmodel = CM_32;
3510 }
3511 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3512 error ("code model %qs not supported in the %s bit mode",
3513 "kernel", "32");
3514 break;
3515
3516 default:
3517 gcc_unreachable ();
3518 }
3519 }
3520 else
3521 {
3522 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3523 use of rip-relative addressing. This eliminates fixups that
3524 would otherwise be needed if this object is to be placed in a
3525 DLL, and is essentially just as efficient as direct addressing. */
3526 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3527 && (TARGET_RDOS || TARGET_PECOFF))
3528 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3529 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3530 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3531 else
3532 opts->x_ix86_cmodel = CM_32;
3533 }
3534 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3535 {
3536 error ("-masm=intel not supported in this configuration");
3537 opts->x_ix86_asm_dialect = ASM_ATT;
3538 }
3539 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3540 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3541 sorry ("%i-bit mode not compiled in",
3542 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3543
3544 for (i = 0; i < pta_size; i++)
3545 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3546 {
3547 ix86_schedule = processor_alias_table[i].schedule;
3548 ix86_arch = processor_alias_table[i].processor;
3549 /* Default cpu tuning to the architecture. */
3550 ix86_tune = ix86_arch;
3551
3552 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3553 && !(processor_alias_table[i].flags & PTA_64BIT))
3554 error ("CPU you selected does not support x86-64 "
3555 "instruction set");
3556
3557 if (processor_alias_table[i].flags & PTA_MMX
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3560 if (processor_alias_table[i].flags & PTA_3DNOW
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3563 if (processor_alias_table[i].flags & PTA_3DNOW_A
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3566 if (processor_alias_table[i].flags & PTA_SSE
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3569 if (processor_alias_table[i].flags & PTA_SSE2
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3572 if (processor_alias_table[i].flags & PTA_SSE3
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3575 if (processor_alias_table[i].flags & PTA_SSSE3
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3578 if (processor_alias_table[i].flags & PTA_SSE4_1
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3581 if (processor_alias_table[i].flags & PTA_SSE4_2
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3584 if (processor_alias_table[i].flags & PTA_AVX
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3587 if (processor_alias_table[i].flags & PTA_AVX2
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3590 if (processor_alias_table[i].flags & PTA_FMA
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3593 if (processor_alias_table[i].flags & PTA_SSE4A
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3596 if (processor_alias_table[i].flags & PTA_FMA4
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3599 if (processor_alias_table[i].flags & PTA_XOP
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3602 if (processor_alias_table[i].flags & PTA_LWP
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3605 if (processor_alias_table[i].flags & PTA_ABM
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3608 if (processor_alias_table[i].flags & PTA_BMI
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3611 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3614 if (processor_alias_table[i].flags & PTA_TBM
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3617 if (processor_alias_table[i].flags & PTA_BMI2
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3620 if (processor_alias_table[i].flags & PTA_CX16
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3623 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3626 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3627 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3630 if (processor_alias_table[i].flags & PTA_MOVBE
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3633 if (processor_alias_table[i].flags & PTA_AES
3634 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3635 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3636 if (processor_alias_table[i].flags & PTA_SHA
3637 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3638 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3639 if (processor_alias_table[i].flags & PTA_PCLMUL
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3642 if (processor_alias_table[i].flags & PTA_FSGSBASE
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3645 if (processor_alias_table[i].flags & PTA_RDRND
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3648 if (processor_alias_table[i].flags & PTA_F16C
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3651 if (processor_alias_table[i].flags & PTA_RTM
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3654 if (processor_alias_table[i].flags & PTA_HLE
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3657 if (processor_alias_table[i].flags & PTA_PRFCHW
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3660 if (processor_alias_table[i].flags & PTA_RDSEED
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3663 if (processor_alias_table[i].flags & PTA_ADX
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3666 if (processor_alias_table[i].flags & PTA_FXSR
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3669 if (processor_alias_table[i].flags & PTA_XSAVE
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3672 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3675 if (processor_alias_table[i].flags & PTA_AVX512F
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3678 if (processor_alias_table[i].flags & PTA_AVX512ER
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3681 if (processor_alias_table[i].flags & PTA_AVX512PF
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3684 if (processor_alias_table[i].flags & PTA_AVX512CD
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3687 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3690 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3693 if (processor_alias_table[i].flags & PTA_XSAVEC
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3696 if (processor_alias_table[i].flags & PTA_XSAVES
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3699 if (processor_alias_table[i].flags & PTA_AVX512DQ
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3702 if (processor_alias_table[i].flags & PTA_AVX512BW
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3705 if (processor_alias_table[i].flags & PTA_AVX512VL
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3708 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3709 x86_prefetch_sse = true;
3710
3711 break;
3712 }
3713
3714 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3715 error ("generic CPU can be used only for %stune=%s %s",
3716 prefix, suffix, sw);
3717 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3718 error ("intel CPU can be used only for %stune=%s %s",
3719 prefix, suffix, sw);
3720 else if (i == pta_size)
3721 error ("bad value (%s) for %sarch=%s %s",
3722 opts->x_ix86_arch_string, prefix, suffix, sw);
3723
3724 ix86_arch_mask = 1u << ix86_arch;
3725 for (i = 0; i < X86_ARCH_LAST; ++i)
3726 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3727
3728 for (i = 0; i < pta_size; i++)
3729 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3730 {
3731 ix86_schedule = processor_alias_table[i].schedule;
3732 ix86_tune = processor_alias_table[i].processor;
3733 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3734 {
3735 if (!(processor_alias_table[i].flags & PTA_64BIT))
3736 {
3737 if (ix86_tune_defaulted)
3738 {
3739 opts->x_ix86_tune_string = "x86-64";
3740 for (i = 0; i < pta_size; i++)
3741 if (! strcmp (opts->x_ix86_tune_string,
3742 processor_alias_table[i].name))
3743 break;
3744 ix86_schedule = processor_alias_table[i].schedule;
3745 ix86_tune = processor_alias_table[i].processor;
3746 }
3747 else
3748 error ("CPU you selected does not support x86-64 "
3749 "instruction set");
3750 }
3751 }
3752 /* Intel CPUs have always interpreted SSE prefetch instructions as
3753 NOPs; so, we can enable SSE prefetch instructions even when
3754 -mtune (rather than -march) points us to a processor that has them.
3755 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3756 higher processors. */
3757 if (TARGET_CMOV
3758 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3759 x86_prefetch_sse = true;
3760 break;
3761 }
3762
3763 if (ix86_tune_specified && i == pta_size)
3764 error ("bad value (%s) for %stune=%s %s",
3765 opts->x_ix86_tune_string, prefix, suffix, sw);
3766
3767 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3768
3769 #ifndef USE_IX86_FRAME_POINTER
3770 #define USE_IX86_FRAME_POINTER 0
3771 #endif
3772
3773 #ifndef USE_X86_64_FRAME_POINTER
3774 #define USE_X86_64_FRAME_POINTER 0
3775 #endif
3776
3777 /* Set the default values for switches whose default depends on TARGET_64BIT
3778 in case they weren't overwritten by command line options. */
3779 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3780 {
3781 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3782 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3783 if (opts->x_flag_asynchronous_unwind_tables
3784 && !opts_set->x_flag_unwind_tables
3785 && TARGET_64BIT_MS_ABI)
3786 opts->x_flag_unwind_tables = 1;
3787 if (opts->x_flag_asynchronous_unwind_tables == 2)
3788 opts->x_flag_unwind_tables
3789 = opts->x_flag_asynchronous_unwind_tables = 1;
3790 if (opts->x_flag_pcc_struct_return == 2)
3791 opts->x_flag_pcc_struct_return = 0;
3792 }
3793 else
3794 {
3795 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3796 opts->x_flag_omit_frame_pointer
3797 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3798 if (opts->x_flag_asynchronous_unwind_tables == 2)
3799 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3800 if (opts->x_flag_pcc_struct_return == 2)
3801 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3802 }
3803
3804 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3805 if (opts->x_optimize_size)
3806 ix86_cost = &ix86_size_cost;
3807 else
3808 ix86_cost = ix86_tune_cost;
3809
3810 /* Arrange to set up i386_stack_locals for all functions. */
3811 init_machine_status = ix86_init_machine_status;
3812
3813 /* Validate -mregparm= value. */
3814 if (opts_set->x_ix86_regparm)
3815 {
3816 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3817 warning (0, "-mregparm is ignored in 64-bit mode");
3818 if (opts->x_ix86_regparm > REGPARM_MAX)
3819 {
3820 error ("-mregparm=%d is not between 0 and %d",
3821 opts->x_ix86_regparm, REGPARM_MAX);
3822 opts->x_ix86_regparm = 0;
3823 }
3824 }
3825 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3826 opts->x_ix86_regparm = REGPARM_MAX;
3827
3828 /* Default align_* from the processor table. */
3829 if (opts->x_align_loops == 0)
3830 {
3831 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3832 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3833 }
3834 if (opts->x_align_jumps == 0)
3835 {
3836 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3837 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3838 }
3839 if (opts->x_align_functions == 0)
3840 {
3841 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3842 }
3843
3844 /* Provide default for -mbranch-cost= value. */
3845 if (!opts_set->x_ix86_branch_cost)
3846 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3847
3848 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3849 {
3850 opts->x_target_flags
3851 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3852
3853 /* Enable by default the SSE and MMX builtins. Do allow the user to
3854 explicitly disable any of these. In particular, disabling SSE and
3855 MMX for kernel code is extremely useful. */
3856 if (!ix86_arch_specified)
3857 opts->x_ix86_isa_flags
3858 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3859 | TARGET_SUBTARGET64_ISA_DEFAULT)
3860 & ~opts->x_ix86_isa_flags_explicit);
3861
3862 if (TARGET_RTD_P (opts->x_target_flags))
3863 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3864 }
3865 else
3866 {
3867 opts->x_target_flags
3868 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3869
3870 if (!ix86_arch_specified)
3871 opts->x_ix86_isa_flags
3872 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3873
3874 /* i386 ABI does not specify red zone. It still makes sense to use it
3875 when programmer takes care to stack from being destroyed. */
3876 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3877 opts->x_target_flags |= MASK_NO_RED_ZONE;
3878 }
3879
3880 /* Keep nonleaf frame pointers. */
3881 if (opts->x_flag_omit_frame_pointer)
3882 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3883 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3884 opts->x_flag_omit_frame_pointer = 1;
3885
3886 /* If we're doing fast math, we don't care about comparison order
3887 wrt NaNs. This lets us use a shorter comparison sequence. */
3888 if (opts->x_flag_finite_math_only)
3889 opts->x_target_flags &= ~MASK_IEEE_FP;
3890
3891 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3892 since the insns won't need emulation. */
3893 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3894 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3895
3896 /* Likewise, if the target doesn't have a 387, or we've specified
3897 software floating point, don't use 387 inline intrinsics. */
3898 if (!TARGET_80387_P (opts->x_target_flags))
3899 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3900
3901 /* Turn on MMX builtins for -msse. */
3902 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_isa_flags
3904 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3905
3906 /* Enable SSE prefetch. */
3907 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3908 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3909 x86_prefetch_sse = true;
3910
3911 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3912 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3913 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3914 opts->x_ix86_isa_flags
3915 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3916
3917 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3918 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3919 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3920 opts->x_ix86_isa_flags
3921 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3922
3923 /* Enable lzcnt instruction for -mabm. */
3924 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3925 opts->x_ix86_isa_flags
3926 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3927
3928 /* Validate -mpreferred-stack-boundary= value or default it to
3929 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3930 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3931 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3932 {
3933 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3934 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3935 int max = (TARGET_SEH ? 4 : 12);
3936
3937 if (opts->x_ix86_preferred_stack_boundary_arg < min
3938 || opts->x_ix86_preferred_stack_boundary_arg > max)
3939 {
3940 if (min == max)
3941 error ("-mpreferred-stack-boundary is not supported "
3942 "for this target");
3943 else
3944 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3945 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3946 }
3947 else
3948 ix86_preferred_stack_boundary
3949 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3950 }
3951
3952 /* Set the default value for -mstackrealign. */
3953 if (opts->x_ix86_force_align_arg_pointer == -1)
3954 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3955
3956 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3957
3958 /* Validate -mincoming-stack-boundary= value or default it to
3959 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3960 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3961 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3962 {
3963 if (opts->x_ix86_incoming_stack_boundary_arg
3964 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3965 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3966 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3967 opts->x_ix86_incoming_stack_boundary_arg,
3968 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3969 else
3970 {
3971 ix86_user_incoming_stack_boundary
3972 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3973 ix86_incoming_stack_boundary
3974 = ix86_user_incoming_stack_boundary;
3975 }
3976 }
3977
3978 /* Accept -msseregparm only if at least SSE support is enabled. */
3979 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3980 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3981 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3982
3983 if (opts_set->x_ix86_fpmath)
3984 {
3985 if (opts->x_ix86_fpmath & FPMATH_SSE)
3986 {
3987 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3988 {
3989 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3990 opts->x_ix86_fpmath = FPMATH_387;
3991 }
3992 else if ((opts->x_ix86_fpmath & FPMATH_387)
3993 && !TARGET_80387_P (opts->x_target_flags))
3994 {
3995 warning (0, "387 instruction set disabled, using SSE arithmetics");
3996 opts->x_ix86_fpmath = FPMATH_SSE;
3997 }
3998 }
3999 }
4000 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4001 fpmath=387. The second is however default at many targets since the
4002 extra 80bit precision of temporaries is considered to be part of ABI.
4003 Overwrite the default at least for -ffast-math.
4004 TODO: -mfpmath=both seems to produce same performing code with bit
4005 smaller binaries. It is however not clear if register allocation is
4006 ready for this setting.
4007 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4008 codegen. We may switch to 387 with -ffast-math for size optimized
4009 functions. */
4010 else if (fast_math_flags_set_p (&global_options)
4011 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4012 opts->x_ix86_fpmath = FPMATH_SSE;
4013 else
4014 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4015
4016 /* If the i387 is disabled, then do not return values in it. */
4017 if (!TARGET_80387_P (opts->x_target_flags))
4018 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4019
4020 /* Use external vectorized library in vectorizing intrinsics. */
4021 if (opts_set->x_ix86_veclibabi_type)
4022 switch (opts->x_ix86_veclibabi_type)
4023 {
4024 case ix86_veclibabi_type_svml:
4025 ix86_veclib_handler = ix86_veclibabi_svml;
4026 break;
4027
4028 case ix86_veclibabi_type_acml:
4029 ix86_veclib_handler = ix86_veclibabi_acml;
4030 break;
4031
4032 default:
4033 gcc_unreachable ();
4034 }
4035
4036 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4037 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4038 && !opts->x_optimize_size)
4039 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4040
4041 /* If stack probes are required, the space used for large function
4042 arguments on the stack must also be probed, so enable
4043 -maccumulate-outgoing-args so this happens in the prologue. */
4044 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4045 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4046 {
4047 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4048 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4049 "for correctness", prefix, suffix);
4050 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4051 }
4052
4053 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4054 {
4055 char *p;
4056 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4057 p = strchr (internal_label_prefix, 'X');
4058 internal_label_prefix_len = p - internal_label_prefix;
4059 *p = '\0';
4060 }
4061
4062 /* When scheduling description is not available, disable scheduler pass
4063 so it won't slow down the compilation and make x87 code slower. */
4064 if (!TARGET_SCHEDULE)
4065 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4066
4067 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4068 ix86_tune_cost->simultaneous_prefetches,
4069 opts->x_param_values,
4070 opts_set->x_param_values);
4071 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4072 ix86_tune_cost->prefetch_block,
4073 opts->x_param_values,
4074 opts_set->x_param_values);
4075 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4076 ix86_tune_cost->l1_cache_size,
4077 opts->x_param_values,
4078 opts_set->x_param_values);
4079 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4080 ix86_tune_cost->l2_cache_size,
4081 opts->x_param_values,
4082 opts_set->x_param_values);
4083
4084 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4085 if (opts->x_flag_prefetch_loop_arrays < 0
4086 && HAVE_prefetch
4087 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4088 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4089 opts->x_flag_prefetch_loop_arrays = 1;
4090
4091 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4092 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4093 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4094 targetm.expand_builtin_va_start = NULL;
4095
4096 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4097 {
4098 ix86_gen_leave = gen_leave_rex64;
4099 if (Pmode == DImode)
4100 {
4101 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4102 ix86_gen_tls_local_dynamic_base_64
4103 = gen_tls_local_dynamic_base_64_di;
4104 }
4105 else
4106 {
4107 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4108 ix86_gen_tls_local_dynamic_base_64
4109 = gen_tls_local_dynamic_base_64_si;
4110 }
4111 }
4112 else
4113 ix86_gen_leave = gen_leave;
4114
4115 if (Pmode == DImode)
4116 {
4117 ix86_gen_add3 = gen_adddi3;
4118 ix86_gen_sub3 = gen_subdi3;
4119 ix86_gen_sub3_carry = gen_subdi3_carry;
4120 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4121 ix86_gen_andsp = gen_anddi3;
4122 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4123 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4124 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4125 ix86_gen_monitor = gen_sse3_monitor_di;
4126 }
4127 else
4128 {
4129 ix86_gen_add3 = gen_addsi3;
4130 ix86_gen_sub3 = gen_subsi3;
4131 ix86_gen_sub3_carry = gen_subsi3_carry;
4132 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4133 ix86_gen_andsp = gen_andsi3;
4134 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4135 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4136 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4137 ix86_gen_monitor = gen_sse3_monitor_si;
4138 }
4139
4140 #ifdef USE_IX86_CLD
4141 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4142 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4143 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4144 #endif
4145
4146 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4147 {
4148 if (opts->x_flag_fentry > 0)
4149 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4150 "with -fpic");
4151 opts->x_flag_fentry = 0;
4152 }
4153 else if (TARGET_SEH)
4154 {
4155 if (opts->x_flag_fentry == 0)
4156 sorry ("-mno-fentry isn%'t compatible with SEH");
4157 opts->x_flag_fentry = 1;
4158 }
4159 else if (opts->x_flag_fentry < 0)
4160 {
4161 #if defined(PROFILE_BEFORE_PROLOGUE)
4162 opts->x_flag_fentry = 1;
4163 #else
4164 opts->x_flag_fentry = 0;
4165 #endif
4166 }
4167
4168 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4169 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4170 AVX unaligned load/store. */
4171 if (!opts->x_optimize_size)
4172 {
4173 if (flag_expensive_optimizations
4174 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4175 opts->x_target_flags |= MASK_VZEROUPPER;
4176 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4177 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4178 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4179 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4180 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4181 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4182 /* Enable 128-bit AVX instruction generation
4183 for the auto-vectorizer. */
4184 if (TARGET_AVX128_OPTIMAL
4185 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4186 opts->x_target_flags |= MASK_PREFER_AVX128;
4187 }
4188
4189 if (opts->x_ix86_recip_name)
4190 {
4191 char *p = ASTRDUP (opts->x_ix86_recip_name);
4192 char *q;
4193 unsigned int mask, i;
4194 bool invert;
4195
4196 while ((q = strtok (p, ",")) != NULL)
4197 {
4198 p = NULL;
4199 if (*q == '!')
4200 {
4201 invert = true;
4202 q++;
4203 }
4204 else
4205 invert = false;
4206
4207 if (!strcmp (q, "default"))
4208 mask = RECIP_MASK_ALL;
4209 else
4210 {
4211 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4212 if (!strcmp (q, recip_options[i].string))
4213 {
4214 mask = recip_options[i].mask;
4215 break;
4216 }
4217
4218 if (i == ARRAY_SIZE (recip_options))
4219 {
4220 error ("unknown option for -mrecip=%s", q);
4221 invert = false;
4222 mask = RECIP_MASK_NONE;
4223 }
4224 }
4225
4226 opts->x_recip_mask_explicit |= mask;
4227 if (invert)
4228 opts->x_recip_mask &= ~mask;
4229 else
4230 opts->x_recip_mask |= mask;
4231 }
4232 }
4233
4234 if (TARGET_RECIP_P (opts->x_target_flags))
4235 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4236 else if (opts_set->x_target_flags & MASK_RECIP)
4237 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4238
4239 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4240 for 64-bit Bionic. */
4241 if (TARGET_HAS_BIONIC
4242 && !(opts_set->x_target_flags
4243 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4244 opts->x_target_flags |= (TARGET_64BIT
4245 ? MASK_LONG_DOUBLE_128
4246 : MASK_LONG_DOUBLE_64);
4247
4248 /* Only one of them can be active. */
4249 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4250 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4251
4252 /* Save the initial options in case the user does function specific
4253 options. */
4254 if (main_args_p)
4255 target_option_default_node = target_option_current_node
4256 = build_target_option_node (opts);
4257
4258 /* Handle stack protector */
4259 if (!opts_set->x_ix86_stack_protector_guard)
4260 opts->x_ix86_stack_protector_guard
4261 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4262
4263 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4264 if (opts->x_ix86_tune_memcpy_strategy)
4265 {
4266 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4267 ix86_parse_stringop_strategy_string (str, false);
4268 free (str);
4269 }
4270
4271 if (opts->x_ix86_tune_memset_strategy)
4272 {
4273 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4274 ix86_parse_stringop_strategy_string (str, true);
4275 free (str);
4276 }
4277 }
4278
4279 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4280
4281 static void
4282 ix86_option_override (void)
4283 {
4284 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4285 static struct register_pass_info insert_vzeroupper_info
4286 = { pass_insert_vzeroupper, "reload",
4287 1, PASS_POS_INSERT_AFTER
4288 };
4289
4290 ix86_option_override_internal (true, &global_options, &global_options_set);
4291
4292
4293 /* This needs to be done at start up. It's convenient to do it here. */
4294 register_pass (&insert_vzeroupper_info);
4295 }
4296
4297 /* Update register usage after having seen the compiler flags. */
4298
4299 static void
4300 ix86_conditional_register_usage (void)
4301 {
4302 int i, c_mask;
4303 unsigned int j;
4304
4305 /* The PIC register, if it exists, is fixed. */
4306 j = PIC_OFFSET_TABLE_REGNUM;
4307 if (j != INVALID_REGNUM)
4308 fixed_regs[j] = call_used_regs[j] = 1;
4309
4310 /* For 32-bit targets, squash the REX registers. */
4311 if (! TARGET_64BIT)
4312 {
4313 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4314 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4315 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4316 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4317 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4318 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4319 }
4320
4321 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4322 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4323 : TARGET_64BIT ? (1 << 2)
4324 : (1 << 1));
4325
4326 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4327
4328 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4329 {
4330 /* Set/reset conditionally defined registers from
4331 CALL_USED_REGISTERS initializer. */
4332 if (call_used_regs[i] > 1)
4333 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4334
4335 /* Calculate registers of CLOBBERED_REGS register set
4336 as call used registers from GENERAL_REGS register set. */
4337 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4338 && call_used_regs[i])
4339 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4340 }
4341
4342 /* If MMX is disabled, squash the registers. */
4343 if (! TARGET_MMX)
4344 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4345 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4346 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4347
4348 /* If SSE is disabled, squash the registers. */
4349 if (! TARGET_SSE)
4350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4351 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4353
4354 /* If the FPU is disabled, squash the registers. */
4355 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4356 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4357 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4358 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4359
4360 /* If AVX512F is disabled, squash the registers. */
4361 if (! TARGET_AVX512F)
4362 {
4363 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4364 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4365
4366 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4367 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4368 }
4369 }
4370
4371 \f
4372 /* Save the current options */
4373
4374 static void
4375 ix86_function_specific_save (struct cl_target_option *ptr,
4376 struct gcc_options *opts)
4377 {
4378 ptr->arch = ix86_arch;
4379 ptr->schedule = ix86_schedule;
4380 ptr->tune = ix86_tune;
4381 ptr->branch_cost = ix86_branch_cost;
4382 ptr->tune_defaulted = ix86_tune_defaulted;
4383 ptr->arch_specified = ix86_arch_specified;
4384 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4385 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4386 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4387 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4388 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4389 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4390 ptr->x_ix86_abi = opts->x_ix86_abi;
4391 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4392 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4393 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4394 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4395 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4396 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4397 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4398 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4399 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4400 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4401 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4402 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4403 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4404 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4405 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4406 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4407 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4408 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4409 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4410 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4411
4412 /* The fields are char but the variables are not; make sure the
4413 values fit in the fields. */
4414 gcc_assert (ptr->arch == ix86_arch);
4415 gcc_assert (ptr->schedule == ix86_schedule);
4416 gcc_assert (ptr->tune == ix86_tune);
4417 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4418 }
4419
4420 /* Restore the current options */
4421
4422 static void
4423 ix86_function_specific_restore (struct gcc_options *opts,
4424 struct cl_target_option *ptr)
4425 {
4426 enum processor_type old_tune = ix86_tune;
4427 enum processor_type old_arch = ix86_arch;
4428 unsigned int ix86_arch_mask;
4429 int i;
4430
4431 /* We don't change -fPIC. */
4432 opts->x_flag_pic = flag_pic;
4433
4434 ix86_arch = (enum processor_type) ptr->arch;
4435 ix86_schedule = (enum attr_cpu) ptr->schedule;
4436 ix86_tune = (enum processor_type) ptr->tune;
4437 opts->x_ix86_branch_cost = ptr->branch_cost;
4438 ix86_tune_defaulted = ptr->tune_defaulted;
4439 ix86_arch_specified = ptr->arch_specified;
4440 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4441 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4442 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4443 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4444 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4445 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4446 opts->x_ix86_abi = ptr->x_ix86_abi;
4447 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4448 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4449 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4450 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4451 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4452 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4453 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4454 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4455 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4456 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4457 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4458 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4459 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4460 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4461 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4462 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4463 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4464 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4465 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4466 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4467
4468 /* Recreate the arch feature tests if the arch changed */
4469 if (old_arch != ix86_arch)
4470 {
4471 ix86_arch_mask = 1u << ix86_arch;
4472 for (i = 0; i < X86_ARCH_LAST; ++i)
4473 ix86_arch_features[i]
4474 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4475 }
4476
4477 /* Recreate the tune optimization tests */
4478 if (old_tune != ix86_tune)
4479 set_ix86_tune_features (ix86_tune, false);
4480 }
4481
4482 /* Print the current options */
4483
4484 static void
4485 ix86_function_specific_print (FILE *file, int indent,
4486 struct cl_target_option *ptr)
4487 {
4488 char *target_string
4489 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4490 NULL, NULL, ptr->x_ix86_fpmath, false);
4491
4492 gcc_assert (ptr->arch < PROCESSOR_max);
4493 fprintf (file, "%*sarch = %d (%s)\n",
4494 indent, "",
4495 ptr->arch, processor_target_table[ptr->arch].name);
4496
4497 gcc_assert (ptr->tune < PROCESSOR_max);
4498 fprintf (file, "%*stune = %d (%s)\n",
4499 indent, "",
4500 ptr->tune, processor_target_table[ptr->tune].name);
4501
4502 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4503
4504 if (target_string)
4505 {
4506 fprintf (file, "%*s%s\n", indent, "", target_string);
4507 free (target_string);
4508 }
4509 }
4510
4511 \f
4512 /* Inner function to process the attribute((target(...))), take an argument and
4513 set the current options from the argument. If we have a list, recursively go
4514 over the list. */
4515
4516 static bool
4517 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4518 struct gcc_options *opts,
4519 struct gcc_options *opts_set,
4520 struct gcc_options *enum_opts_set)
4521 {
4522 char *next_optstr;
4523 bool ret = true;
4524
4525 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4526 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4527 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4528 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4529 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4530
4531 enum ix86_opt_type
4532 {
4533 ix86_opt_unknown,
4534 ix86_opt_yes,
4535 ix86_opt_no,
4536 ix86_opt_str,
4537 ix86_opt_enum,
4538 ix86_opt_isa
4539 };
4540
4541 static const struct
4542 {
4543 const char *string;
4544 size_t len;
4545 enum ix86_opt_type type;
4546 int opt;
4547 int mask;
4548 } attrs[] = {
4549 /* isa options */
4550 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4551 IX86_ATTR_ISA ("abm", OPT_mabm),
4552 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4553 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4554 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4555 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4556 IX86_ATTR_ISA ("aes", OPT_maes),
4557 IX86_ATTR_ISA ("sha", OPT_msha),
4558 IX86_ATTR_ISA ("avx", OPT_mavx),
4559 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4560 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4561 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4562 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4563 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4564 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4565 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4566 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4567 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4568 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4569 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4570 IX86_ATTR_ISA ("sse", OPT_msse),
4571 IX86_ATTR_ISA ("sse2", OPT_msse2),
4572 IX86_ATTR_ISA ("sse3", OPT_msse3),
4573 IX86_ATTR_ISA ("sse4", OPT_msse4),
4574 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4575 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4576 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4577 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4578 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4579 IX86_ATTR_ISA ("fma", OPT_mfma),
4580 IX86_ATTR_ISA ("xop", OPT_mxop),
4581 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4582 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4583 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4584 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4585 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4586 IX86_ATTR_ISA ("hle", OPT_mhle),
4587 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4588 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4589 IX86_ATTR_ISA ("adx", OPT_madx),
4590 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4591 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4592 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4593 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4594 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4595 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4596 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4597
4598 /* enum options */
4599 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4600
4601 /* string options */
4602 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4603 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4604
4605 /* flag options */
4606 IX86_ATTR_YES ("cld",
4607 OPT_mcld,
4608 MASK_CLD),
4609
4610 IX86_ATTR_NO ("fancy-math-387",
4611 OPT_mfancy_math_387,
4612 MASK_NO_FANCY_MATH_387),
4613
4614 IX86_ATTR_YES ("ieee-fp",
4615 OPT_mieee_fp,
4616 MASK_IEEE_FP),
4617
4618 IX86_ATTR_YES ("inline-all-stringops",
4619 OPT_minline_all_stringops,
4620 MASK_INLINE_ALL_STRINGOPS),
4621
4622 IX86_ATTR_YES ("inline-stringops-dynamically",
4623 OPT_minline_stringops_dynamically,
4624 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4625
4626 IX86_ATTR_NO ("align-stringops",
4627 OPT_mno_align_stringops,
4628 MASK_NO_ALIGN_STRINGOPS),
4629
4630 IX86_ATTR_YES ("recip",
4631 OPT_mrecip,
4632 MASK_RECIP),
4633
4634 };
4635
4636 /* If this is a list, recurse to get the options. */
4637 if (TREE_CODE (args) == TREE_LIST)
4638 {
4639 bool ret = true;
4640
4641 for (; args; args = TREE_CHAIN (args))
4642 if (TREE_VALUE (args)
4643 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4644 p_strings, opts, opts_set,
4645 enum_opts_set))
4646 ret = false;
4647
4648 return ret;
4649 }
4650
4651 else if (TREE_CODE (args) != STRING_CST)
4652 {
4653 error ("attribute %<target%> argument not a string");
4654 return false;
4655 }
4656
4657 /* Handle multiple arguments separated by commas. */
4658 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4659
4660 while (next_optstr && *next_optstr != '\0')
4661 {
4662 char *p = next_optstr;
4663 char *orig_p = p;
4664 char *comma = strchr (next_optstr, ',');
4665 const char *opt_string;
4666 size_t len, opt_len;
4667 int opt;
4668 bool opt_set_p;
4669 char ch;
4670 unsigned i;
4671 enum ix86_opt_type type = ix86_opt_unknown;
4672 int mask = 0;
4673
4674 if (comma)
4675 {
4676 *comma = '\0';
4677 len = comma - next_optstr;
4678 next_optstr = comma + 1;
4679 }
4680 else
4681 {
4682 len = strlen (p);
4683 next_optstr = NULL;
4684 }
4685
4686 /* Recognize no-xxx. */
4687 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4688 {
4689 opt_set_p = false;
4690 p += 3;
4691 len -= 3;
4692 }
4693 else
4694 opt_set_p = true;
4695
4696 /* Find the option. */
4697 ch = *p;
4698 opt = N_OPTS;
4699 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4700 {
4701 type = attrs[i].type;
4702 opt_len = attrs[i].len;
4703 if (ch == attrs[i].string[0]
4704 && ((type != ix86_opt_str && type != ix86_opt_enum)
4705 ? len == opt_len
4706 : len > opt_len)
4707 && memcmp (p, attrs[i].string, opt_len) == 0)
4708 {
4709 opt = attrs[i].opt;
4710 mask = attrs[i].mask;
4711 opt_string = attrs[i].string;
4712 break;
4713 }
4714 }
4715
4716 /* Process the option. */
4717 if (opt == N_OPTS)
4718 {
4719 error ("attribute(target(\"%s\")) is unknown", orig_p);
4720 ret = false;
4721 }
4722
4723 else if (type == ix86_opt_isa)
4724 {
4725 struct cl_decoded_option decoded;
4726
4727 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4728 ix86_handle_option (opts, opts_set,
4729 &decoded, input_location);
4730 }
4731
4732 else if (type == ix86_opt_yes || type == ix86_opt_no)
4733 {
4734 if (type == ix86_opt_no)
4735 opt_set_p = !opt_set_p;
4736
4737 if (opt_set_p)
4738 opts->x_target_flags |= mask;
4739 else
4740 opts->x_target_flags &= ~mask;
4741 }
4742
4743 else if (type == ix86_opt_str)
4744 {
4745 if (p_strings[opt])
4746 {
4747 error ("option(\"%s\") was already specified", opt_string);
4748 ret = false;
4749 }
4750 else
4751 p_strings[opt] = xstrdup (p + opt_len);
4752 }
4753
4754 else if (type == ix86_opt_enum)
4755 {
4756 bool arg_ok;
4757 int value;
4758
4759 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4760 if (arg_ok)
4761 set_option (opts, enum_opts_set, opt, value,
4762 p + opt_len, DK_UNSPECIFIED, input_location,
4763 global_dc);
4764 else
4765 {
4766 error ("attribute(target(\"%s\")) is unknown", orig_p);
4767 ret = false;
4768 }
4769 }
4770
4771 else
4772 gcc_unreachable ();
4773 }
4774
4775 return ret;
4776 }
4777
4778 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4779
4780 tree
4781 ix86_valid_target_attribute_tree (tree args,
4782 struct gcc_options *opts,
4783 struct gcc_options *opts_set)
4784 {
4785 const char *orig_arch_string = opts->x_ix86_arch_string;
4786 const char *orig_tune_string = opts->x_ix86_tune_string;
4787 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4788 int orig_tune_defaulted = ix86_tune_defaulted;
4789 int orig_arch_specified = ix86_arch_specified;
4790 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4791 tree t = NULL_TREE;
4792 int i;
4793 struct cl_target_option *def
4794 = TREE_TARGET_OPTION (target_option_default_node);
4795 struct gcc_options enum_opts_set;
4796
4797 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4798
4799 /* Process each of the options on the chain. */
4800 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4801 opts_set, &enum_opts_set))
4802 return error_mark_node;
4803
4804 /* If the changed options are different from the default, rerun
4805 ix86_option_override_internal, and then save the options away.
4806 The string options are are attribute options, and will be undone
4807 when we copy the save structure. */
4808 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4809 || opts->x_target_flags != def->x_target_flags
4810 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4811 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4812 || enum_opts_set.x_ix86_fpmath)
4813 {
4814 /* If we are using the default tune= or arch=, undo the string assigned,
4815 and use the default. */
4816 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4817 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4818 else if (!orig_arch_specified)
4819 opts->x_ix86_arch_string = NULL;
4820
4821 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4822 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4823 else if (orig_tune_defaulted)
4824 opts->x_ix86_tune_string = NULL;
4825
4826 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4827 if (enum_opts_set.x_ix86_fpmath)
4828 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4829 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4830 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4831 {
4832 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4833 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4834 }
4835
4836 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4837 ix86_option_override_internal (false, opts, opts_set);
4838
4839 /* Add any builtin functions with the new isa if any. */
4840 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4841
4842 /* Save the current options unless we are validating options for
4843 #pragma. */
4844 t = build_target_option_node (opts);
4845
4846 opts->x_ix86_arch_string = orig_arch_string;
4847 opts->x_ix86_tune_string = orig_tune_string;
4848 opts_set->x_ix86_fpmath = orig_fpmath_set;
4849
4850 /* Free up memory allocated to hold the strings */
4851 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4852 free (option_strings[i]);
4853 }
4854
4855 return t;
4856 }
4857
4858 /* Hook to validate attribute((target("string"))). */
4859
4860 static bool
4861 ix86_valid_target_attribute_p (tree fndecl,
4862 tree ARG_UNUSED (name),
4863 tree args,
4864 int ARG_UNUSED (flags))
4865 {
4866 struct gcc_options func_options;
4867 tree new_target, new_optimize;
4868 bool ret = true;
4869
4870 /* attribute((target("default"))) does nothing, beyond
4871 affecting multi-versioning. */
4872 if (TREE_VALUE (args)
4873 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4874 && TREE_CHAIN (args) == NULL_TREE
4875 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4876 return true;
4877
4878 tree old_optimize = build_optimization_node (&global_options);
4879
4880 /* Get the optimization options of the current function. */
4881 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4882
4883 if (!func_optimize)
4884 func_optimize = old_optimize;
4885
4886 /* Init func_options. */
4887 memset (&func_options, 0, sizeof (func_options));
4888 init_options_struct (&func_options, NULL);
4889 lang_hooks.init_options_struct (&func_options);
4890
4891 cl_optimization_restore (&func_options,
4892 TREE_OPTIMIZATION (func_optimize));
4893
4894 /* Initialize func_options to the default before its target options can
4895 be set. */
4896 cl_target_option_restore (&func_options,
4897 TREE_TARGET_OPTION (target_option_default_node));
4898
4899 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4900 &global_options_set);
4901
4902 new_optimize = build_optimization_node (&func_options);
4903
4904 if (new_target == error_mark_node)
4905 ret = false;
4906
4907 else if (fndecl && new_target)
4908 {
4909 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4910
4911 if (old_optimize != new_optimize)
4912 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4913 }
4914
4915 return ret;
4916 }
4917
4918 \f
4919 /* Hook to determine if one function can safely inline another. */
4920
4921 static bool
4922 ix86_can_inline_p (tree caller, tree callee)
4923 {
4924 bool ret = false;
4925 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4926 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4927
4928 /* If callee has no option attributes, then it is ok to inline. */
4929 if (!callee_tree)
4930 ret = true;
4931
4932 /* If caller has no option attributes, but callee does then it is not ok to
4933 inline. */
4934 else if (!caller_tree)
4935 ret = false;
4936
4937 else
4938 {
4939 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4940 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4941
4942 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4943 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4944 function. */
4945 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4946 != callee_opts->x_ix86_isa_flags)
4947 ret = false;
4948
4949 /* See if we have the same non-isa options. */
4950 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4951 ret = false;
4952
4953 /* See if arch, tune, etc. are the same. */
4954 else if (caller_opts->arch != callee_opts->arch)
4955 ret = false;
4956
4957 else if (caller_opts->tune != callee_opts->tune)
4958 ret = false;
4959
4960 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4961 ret = false;
4962
4963 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4964 ret = false;
4965
4966 else
4967 ret = true;
4968 }
4969
4970 return ret;
4971 }
4972
4973 \f
4974 /* Remember the last target of ix86_set_current_function. */
4975 static GTY(()) tree ix86_previous_fndecl;
4976
4977 /* Invalidate ix86_previous_fndecl cache. */
4978 void
4979 ix86_reset_previous_fndecl (void)
4980 {
4981 ix86_previous_fndecl = NULL_TREE;
4982 }
4983
4984 /* Establish appropriate back-end context for processing the function
4985 FNDECL. The argument might be NULL to indicate processing at top
4986 level, outside of any function scope. */
4987 static void
4988 ix86_set_current_function (tree fndecl)
4989 {
4990 /* Only change the context if the function changes. This hook is called
4991 several times in the course of compiling a function, and we don't want to
4992 slow things down too much or call target_reinit when it isn't safe. */
4993 if (fndecl && fndecl != ix86_previous_fndecl)
4994 {
4995 tree old_tree = (ix86_previous_fndecl
4996 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4997 : NULL_TREE);
4998
4999 tree new_tree = (fndecl
5000 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5001 : NULL_TREE);
5002
5003 ix86_previous_fndecl = fndecl;
5004 if (old_tree == new_tree)
5005 ;
5006
5007 else if (new_tree)
5008 {
5009 cl_target_option_restore (&global_options,
5010 TREE_TARGET_OPTION (new_tree));
5011 if (TREE_TARGET_GLOBALS (new_tree))
5012 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5013 else
5014 TREE_TARGET_GLOBALS (new_tree)
5015 = save_target_globals_default_opts ();
5016 }
5017
5018 else if (old_tree)
5019 {
5020 new_tree = target_option_current_node;
5021 cl_target_option_restore (&global_options,
5022 TREE_TARGET_OPTION (new_tree));
5023 if (TREE_TARGET_GLOBALS (new_tree))
5024 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5025 else if (new_tree == target_option_default_node)
5026 restore_target_globals (&default_target_globals);
5027 else
5028 TREE_TARGET_GLOBALS (new_tree)
5029 = save_target_globals_default_opts ();
5030 }
5031 }
5032 }
5033
5034 \f
5035 /* Return true if this goes in large data/bss. */
5036
5037 static bool
5038 ix86_in_large_data_p (tree exp)
5039 {
5040 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5041 return false;
5042
5043 /* Functions are never large data. */
5044 if (TREE_CODE (exp) == FUNCTION_DECL)
5045 return false;
5046
5047 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5048 {
5049 const char *section = DECL_SECTION_NAME (exp);
5050 if (strcmp (section, ".ldata") == 0
5051 || strcmp (section, ".lbss") == 0)
5052 return true;
5053 return false;
5054 }
5055 else
5056 {
5057 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5058
5059 /* If this is an incomplete type with size 0, then we can't put it
5060 in data because it might be too big when completed. Also,
5061 int_size_in_bytes returns -1 if size can vary or is larger than
5062 an integer in which case also it is safer to assume that it goes in
5063 large data. */
5064 if (size <= 0 || size > ix86_section_threshold)
5065 return true;
5066 }
5067
5068 return false;
5069 }
5070
5071 /* Switch to the appropriate section for output of DECL.
5072 DECL is either a `VAR_DECL' node or a constant of some sort.
5073 RELOC indicates whether forming the initial value of DECL requires
5074 link-time relocations. */
5075
5076 ATTRIBUTE_UNUSED static section *
5077 x86_64_elf_select_section (tree decl, int reloc,
5078 unsigned HOST_WIDE_INT align)
5079 {
5080 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5081 && ix86_in_large_data_p (decl))
5082 {
5083 const char *sname = NULL;
5084 unsigned int flags = SECTION_WRITE;
5085 switch (categorize_decl_for_section (decl, reloc))
5086 {
5087 case SECCAT_DATA:
5088 sname = ".ldata";
5089 break;
5090 case SECCAT_DATA_REL:
5091 sname = ".ldata.rel";
5092 break;
5093 case SECCAT_DATA_REL_LOCAL:
5094 sname = ".ldata.rel.local";
5095 break;
5096 case SECCAT_DATA_REL_RO:
5097 sname = ".ldata.rel.ro";
5098 break;
5099 case SECCAT_DATA_REL_RO_LOCAL:
5100 sname = ".ldata.rel.ro.local";
5101 break;
5102 case SECCAT_BSS:
5103 sname = ".lbss";
5104 flags |= SECTION_BSS;
5105 break;
5106 case SECCAT_RODATA:
5107 case SECCAT_RODATA_MERGE_STR:
5108 case SECCAT_RODATA_MERGE_STR_INIT:
5109 case SECCAT_RODATA_MERGE_CONST:
5110 sname = ".lrodata";
5111 flags = 0;
5112 break;
5113 case SECCAT_SRODATA:
5114 case SECCAT_SDATA:
5115 case SECCAT_SBSS:
5116 gcc_unreachable ();
5117 case SECCAT_TEXT:
5118 case SECCAT_TDATA:
5119 case SECCAT_TBSS:
5120 /* We don't split these for medium model. Place them into
5121 default sections and hope for best. */
5122 break;
5123 }
5124 if (sname)
5125 {
5126 /* We might get called with string constants, but get_named_section
5127 doesn't like them as they are not DECLs. Also, we need to set
5128 flags in that case. */
5129 if (!DECL_P (decl))
5130 return get_section (sname, flags, NULL);
5131 return get_named_section (decl, sname, reloc);
5132 }
5133 }
5134 return default_elf_select_section (decl, reloc, align);
5135 }
5136
5137 /* Select a set of attributes for section NAME based on the properties
5138 of DECL and whether or not RELOC indicates that DECL's initializer
5139 might contain runtime relocations. */
5140
5141 static unsigned int ATTRIBUTE_UNUSED
5142 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5143 {
5144 unsigned int flags = default_section_type_flags (decl, name, reloc);
5145
5146 if (decl == NULL_TREE
5147 && (strcmp (name, ".ldata.rel.ro") == 0
5148 || strcmp (name, ".ldata.rel.ro.local") == 0))
5149 flags |= SECTION_RELRO;
5150
5151 if (strcmp (name, ".lbss") == 0
5152 || strncmp (name, ".lbss.", 5) == 0
5153 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5154 flags |= SECTION_BSS;
5155
5156 return flags;
5157 }
5158
5159 /* Build up a unique section name, expressed as a
5160 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5161 RELOC indicates whether the initial value of EXP requires
5162 link-time relocations. */
5163
5164 static void ATTRIBUTE_UNUSED
5165 x86_64_elf_unique_section (tree decl, int reloc)
5166 {
5167 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5168 && ix86_in_large_data_p (decl))
5169 {
5170 const char *prefix = NULL;
5171 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5172 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5173
5174 switch (categorize_decl_for_section (decl, reloc))
5175 {
5176 case SECCAT_DATA:
5177 case SECCAT_DATA_REL:
5178 case SECCAT_DATA_REL_LOCAL:
5179 case SECCAT_DATA_REL_RO:
5180 case SECCAT_DATA_REL_RO_LOCAL:
5181 prefix = one_only ? ".ld" : ".ldata";
5182 break;
5183 case SECCAT_BSS:
5184 prefix = one_only ? ".lb" : ".lbss";
5185 break;
5186 case SECCAT_RODATA:
5187 case SECCAT_RODATA_MERGE_STR:
5188 case SECCAT_RODATA_MERGE_STR_INIT:
5189 case SECCAT_RODATA_MERGE_CONST:
5190 prefix = one_only ? ".lr" : ".lrodata";
5191 break;
5192 case SECCAT_SRODATA:
5193 case SECCAT_SDATA:
5194 case SECCAT_SBSS:
5195 gcc_unreachable ();
5196 case SECCAT_TEXT:
5197 case SECCAT_TDATA:
5198 case SECCAT_TBSS:
5199 /* We don't split these for medium model. Place them into
5200 default sections and hope for best. */
5201 break;
5202 }
5203 if (prefix)
5204 {
5205 const char *name, *linkonce;
5206 char *string;
5207
5208 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5209 name = targetm.strip_name_encoding (name);
5210
5211 /* If we're using one_only, then there needs to be a .gnu.linkonce
5212 prefix to the section name. */
5213 linkonce = one_only ? ".gnu.linkonce" : "";
5214
5215 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5216
5217 set_decl_section_name (decl, string);
5218 return;
5219 }
5220 }
5221 default_unique_section (decl, reloc);
5222 }
5223
5224 #ifdef COMMON_ASM_OP
5225 /* This says how to output assembler code to declare an
5226 uninitialized external linkage data object.
5227
5228 For medium model x86-64 we need to use .largecomm opcode for
5229 large objects. */
5230 void
5231 x86_elf_aligned_common (FILE *file,
5232 const char *name, unsigned HOST_WIDE_INT size,
5233 int align)
5234 {
5235 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5236 && size > (unsigned int)ix86_section_threshold)
5237 fputs (".largecomm\t", file);
5238 else
5239 fputs (COMMON_ASM_OP, file);
5240 assemble_name (file, name);
5241 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5242 size, align / BITS_PER_UNIT);
5243 }
5244 #endif
5245
5246 /* Utility function for targets to use in implementing
5247 ASM_OUTPUT_ALIGNED_BSS. */
5248
5249 void
5250 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5251 unsigned HOST_WIDE_INT size, int align)
5252 {
5253 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5254 && size > (unsigned int)ix86_section_threshold)
5255 switch_to_section (get_named_section (decl, ".lbss", 0));
5256 else
5257 switch_to_section (bss_section);
5258 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5259 #ifdef ASM_DECLARE_OBJECT_NAME
5260 last_assemble_variable_decl = decl;
5261 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5262 #else
5263 /* Standard thing is just output label for the object. */
5264 ASM_OUTPUT_LABEL (file, name);
5265 #endif /* ASM_DECLARE_OBJECT_NAME */
5266 ASM_OUTPUT_SKIP (file, size ? size : 1);
5267 }
5268 \f
5269 /* Decide whether we must probe the stack before any space allocation
5270 on this target. It's essentially TARGET_STACK_PROBE except when
5271 -fstack-check causes the stack to be already probed differently. */
5272
5273 bool
5274 ix86_target_stack_probe (void)
5275 {
5276 /* Do not probe the stack twice if static stack checking is enabled. */
5277 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5278 return false;
5279
5280 return TARGET_STACK_PROBE;
5281 }
5282 \f
5283 /* Decide whether we can make a sibling call to a function. DECL is the
5284 declaration of the function being targeted by the call and EXP is the
5285 CALL_EXPR representing the call. */
5286
5287 static bool
5288 ix86_function_ok_for_sibcall (tree decl, tree exp)
5289 {
5290 tree type, decl_or_type;
5291 rtx a, b;
5292
5293 /* If we are generating position-independent code, we cannot sibcall
5294 optimize any indirect call, or a direct call to a global function,
5295 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5296 if (!TARGET_MACHO
5297 && !TARGET_64BIT
5298 && flag_pic
5299 && (!decl || !targetm.binds_local_p (decl)))
5300 return false;
5301
5302 /* If we need to align the outgoing stack, then sibcalling would
5303 unalign the stack, which may break the called function. */
5304 if (ix86_minimum_incoming_stack_boundary (true)
5305 < PREFERRED_STACK_BOUNDARY)
5306 return false;
5307
5308 if (decl)
5309 {
5310 decl_or_type = decl;
5311 type = TREE_TYPE (decl);
5312 }
5313 else
5314 {
5315 /* We're looking at the CALL_EXPR, we need the type of the function. */
5316 type = CALL_EXPR_FN (exp); /* pointer expression */
5317 type = TREE_TYPE (type); /* pointer type */
5318 type = TREE_TYPE (type); /* function type */
5319 decl_or_type = type;
5320 }
5321
5322 /* Check that the return value locations are the same. Like
5323 if we are returning floats on the 80387 register stack, we cannot
5324 make a sibcall from a function that doesn't return a float to a
5325 function that does or, conversely, from a function that does return
5326 a float to a function that doesn't; the necessary stack adjustment
5327 would not be executed. This is also the place we notice
5328 differences in the return value ABI. Note that it is ok for one
5329 of the functions to have void return type as long as the return
5330 value of the other is passed in a register. */
5331 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5332 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5333 cfun->decl, false);
5334 if (STACK_REG_P (a) || STACK_REG_P (b))
5335 {
5336 if (!rtx_equal_p (a, b))
5337 return false;
5338 }
5339 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5340 ;
5341 else if (!rtx_equal_p (a, b))
5342 return false;
5343
5344 if (TARGET_64BIT)
5345 {
5346 /* The SYSV ABI has more call-clobbered registers;
5347 disallow sibcalls from MS to SYSV. */
5348 if (cfun->machine->call_abi == MS_ABI
5349 && ix86_function_type_abi (type) == SYSV_ABI)
5350 return false;
5351 }
5352 else
5353 {
5354 /* If this call is indirect, we'll need to be able to use a
5355 call-clobbered register for the address of the target function.
5356 Make sure that all such registers are not used for passing
5357 parameters. Note that DLLIMPORT functions are indirect. */
5358 if (!decl
5359 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5360 {
5361 if (ix86_function_regparm (type, NULL) >= 3)
5362 {
5363 /* ??? Need to count the actual number of registers to be used,
5364 not the possible number of registers. Fix later. */
5365 return false;
5366 }
5367 }
5368 }
5369
5370 /* Otherwise okay. That also includes certain types of indirect calls. */
5371 return true;
5372 }
5373
5374 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5375 and "sseregparm" calling convention attributes;
5376 arguments as in struct attribute_spec.handler. */
5377
5378 static tree
5379 ix86_handle_cconv_attribute (tree *node, tree name,
5380 tree args,
5381 int,
5382 bool *no_add_attrs)
5383 {
5384 if (TREE_CODE (*node) != FUNCTION_TYPE
5385 && TREE_CODE (*node) != METHOD_TYPE
5386 && TREE_CODE (*node) != FIELD_DECL
5387 && TREE_CODE (*node) != TYPE_DECL)
5388 {
5389 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5390 name);
5391 *no_add_attrs = true;
5392 return NULL_TREE;
5393 }
5394
5395 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5396 if (is_attribute_p ("regparm", name))
5397 {
5398 tree cst;
5399
5400 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5401 {
5402 error ("fastcall and regparm attributes are not compatible");
5403 }
5404
5405 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5406 {
5407 error ("regparam and thiscall attributes are not compatible");
5408 }
5409
5410 cst = TREE_VALUE (args);
5411 if (TREE_CODE (cst) != INTEGER_CST)
5412 {
5413 warning (OPT_Wattributes,
5414 "%qE attribute requires an integer constant argument",
5415 name);
5416 *no_add_attrs = true;
5417 }
5418 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5419 {
5420 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5421 name, REGPARM_MAX);
5422 *no_add_attrs = true;
5423 }
5424
5425 return NULL_TREE;
5426 }
5427
5428 if (TARGET_64BIT)
5429 {
5430 /* Do not warn when emulating the MS ABI. */
5431 if ((TREE_CODE (*node) != FUNCTION_TYPE
5432 && TREE_CODE (*node) != METHOD_TYPE)
5433 || ix86_function_type_abi (*node) != MS_ABI)
5434 warning (OPT_Wattributes, "%qE attribute ignored",
5435 name);
5436 *no_add_attrs = true;
5437 return NULL_TREE;
5438 }
5439
5440 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5441 if (is_attribute_p ("fastcall", name))
5442 {
5443 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5444 {
5445 error ("fastcall and cdecl attributes are not compatible");
5446 }
5447 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5448 {
5449 error ("fastcall and stdcall attributes are not compatible");
5450 }
5451 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5452 {
5453 error ("fastcall and regparm attributes are not compatible");
5454 }
5455 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5456 {
5457 error ("fastcall and thiscall attributes are not compatible");
5458 }
5459 }
5460
5461 /* Can combine stdcall with fastcall (redundant), regparm and
5462 sseregparm. */
5463 else if (is_attribute_p ("stdcall", name))
5464 {
5465 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5466 {
5467 error ("stdcall and cdecl attributes are not compatible");
5468 }
5469 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5470 {
5471 error ("stdcall and fastcall attributes are not compatible");
5472 }
5473 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5474 {
5475 error ("stdcall and thiscall attributes are not compatible");
5476 }
5477 }
5478
5479 /* Can combine cdecl with regparm and sseregparm. */
5480 else if (is_attribute_p ("cdecl", name))
5481 {
5482 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5483 {
5484 error ("stdcall and cdecl attributes are not compatible");
5485 }
5486 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5487 {
5488 error ("fastcall and cdecl attributes are not compatible");
5489 }
5490 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5491 {
5492 error ("cdecl and thiscall attributes are not compatible");
5493 }
5494 }
5495 else if (is_attribute_p ("thiscall", name))
5496 {
5497 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5498 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5499 name);
5500 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5501 {
5502 error ("stdcall and thiscall attributes are not compatible");
5503 }
5504 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5505 {
5506 error ("fastcall and thiscall attributes are not compatible");
5507 }
5508 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5509 {
5510 error ("cdecl and thiscall attributes are not compatible");
5511 }
5512 }
5513
5514 /* Can combine sseregparm with all attributes. */
5515
5516 return NULL_TREE;
5517 }
5518
5519 /* The transactional memory builtins are implicitly regparm or fastcall
5520 depending on the ABI. Override the generic do-nothing attribute that
5521 these builtins were declared with, and replace it with one of the two
5522 attributes that we expect elsewhere. */
5523
5524 static tree
5525 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5526 int flags, bool *no_add_attrs)
5527 {
5528 tree alt;
5529
5530 /* In no case do we want to add the placeholder attribute. */
5531 *no_add_attrs = true;
5532
5533 /* The 64-bit ABI is unchanged for transactional memory. */
5534 if (TARGET_64BIT)
5535 return NULL_TREE;
5536
5537 /* ??? Is there a better way to validate 32-bit windows? We have
5538 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5539 if (CHECK_STACK_LIMIT > 0)
5540 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5541 else
5542 {
5543 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5544 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5545 }
5546 decl_attributes (node, alt, flags);
5547
5548 return NULL_TREE;
5549 }
5550
5551 /* This function determines from TYPE the calling-convention. */
5552
5553 unsigned int
5554 ix86_get_callcvt (const_tree type)
5555 {
5556 unsigned int ret = 0;
5557 bool is_stdarg;
5558 tree attrs;
5559
5560 if (TARGET_64BIT)
5561 return IX86_CALLCVT_CDECL;
5562
5563 attrs = TYPE_ATTRIBUTES (type);
5564 if (attrs != NULL_TREE)
5565 {
5566 if (lookup_attribute ("cdecl", attrs))
5567 ret |= IX86_CALLCVT_CDECL;
5568 else if (lookup_attribute ("stdcall", attrs))
5569 ret |= IX86_CALLCVT_STDCALL;
5570 else if (lookup_attribute ("fastcall", attrs))
5571 ret |= IX86_CALLCVT_FASTCALL;
5572 else if (lookup_attribute ("thiscall", attrs))
5573 ret |= IX86_CALLCVT_THISCALL;
5574
5575 /* Regparam isn't allowed for thiscall and fastcall. */
5576 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5577 {
5578 if (lookup_attribute ("regparm", attrs))
5579 ret |= IX86_CALLCVT_REGPARM;
5580 if (lookup_attribute ("sseregparm", attrs))
5581 ret |= IX86_CALLCVT_SSEREGPARM;
5582 }
5583
5584 if (IX86_BASE_CALLCVT(ret) != 0)
5585 return ret;
5586 }
5587
5588 is_stdarg = stdarg_p (type);
5589 if (TARGET_RTD && !is_stdarg)
5590 return IX86_CALLCVT_STDCALL | ret;
5591
5592 if (ret != 0
5593 || is_stdarg
5594 || TREE_CODE (type) != METHOD_TYPE
5595 || ix86_function_type_abi (type) != MS_ABI)
5596 return IX86_CALLCVT_CDECL | ret;
5597
5598 return IX86_CALLCVT_THISCALL;
5599 }
5600
5601 /* Return 0 if the attributes for two types are incompatible, 1 if they
5602 are compatible, and 2 if they are nearly compatible (which causes a
5603 warning to be generated). */
5604
5605 static int
5606 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5607 {
5608 unsigned int ccvt1, ccvt2;
5609
5610 if (TREE_CODE (type1) != FUNCTION_TYPE
5611 && TREE_CODE (type1) != METHOD_TYPE)
5612 return 1;
5613
5614 ccvt1 = ix86_get_callcvt (type1);
5615 ccvt2 = ix86_get_callcvt (type2);
5616 if (ccvt1 != ccvt2)
5617 return 0;
5618 if (ix86_function_regparm (type1, NULL)
5619 != ix86_function_regparm (type2, NULL))
5620 return 0;
5621
5622 return 1;
5623 }
5624 \f
5625 /* Return the regparm value for a function with the indicated TYPE and DECL.
5626 DECL may be NULL when calling function indirectly
5627 or considering a libcall. */
5628
5629 static int
5630 ix86_function_regparm (const_tree type, const_tree decl)
5631 {
5632 tree attr;
5633 int regparm;
5634 unsigned int ccvt;
5635
5636 if (TARGET_64BIT)
5637 return (ix86_function_type_abi (type) == SYSV_ABI
5638 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5639 ccvt = ix86_get_callcvt (type);
5640 regparm = ix86_regparm;
5641
5642 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5643 {
5644 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5645 if (attr)
5646 {
5647 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5648 return regparm;
5649 }
5650 }
5651 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5652 return 2;
5653 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5654 return 1;
5655
5656 /* Use register calling convention for local functions when possible. */
5657 if (decl
5658 && TREE_CODE (decl) == FUNCTION_DECL
5659 /* Caller and callee must agree on the calling convention, so
5660 checking here just optimize means that with
5661 __attribute__((optimize (...))) caller could use regparm convention
5662 and callee not, or vice versa. Instead look at whether the callee
5663 is optimized or not. */
5664 && opt_for_fn (decl, optimize)
5665 && !(profile_flag && !flag_fentry))
5666 {
5667 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5668 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5669 if (i && i->local && i->can_change_signature)
5670 {
5671 int local_regparm, globals = 0, regno;
5672
5673 /* Make sure no regparm register is taken by a
5674 fixed register variable. */
5675 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5676 if (fixed_regs[local_regparm])
5677 break;
5678
5679 /* We don't want to use regparm(3) for nested functions as
5680 these use a static chain pointer in the third argument. */
5681 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5682 local_regparm = 2;
5683
5684 /* In 32-bit mode save a register for the split stack. */
5685 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5686 local_regparm = 2;
5687
5688 /* Each fixed register usage increases register pressure,
5689 so less registers should be used for argument passing.
5690 This functionality can be overriden by an explicit
5691 regparm value. */
5692 for (regno = AX_REG; regno <= DI_REG; regno++)
5693 if (fixed_regs[regno])
5694 globals++;
5695
5696 local_regparm
5697 = globals < local_regparm ? local_regparm - globals : 0;
5698
5699 if (local_regparm > regparm)
5700 regparm = local_regparm;
5701 }
5702 }
5703
5704 return regparm;
5705 }
5706
5707 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5708 DFmode (2) arguments in SSE registers for a function with the
5709 indicated TYPE and DECL. DECL may be NULL when calling function
5710 indirectly or considering a libcall. Otherwise return 0. */
5711
5712 static int
5713 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5714 {
5715 gcc_assert (!TARGET_64BIT);
5716
5717 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5718 by the sseregparm attribute. */
5719 if (TARGET_SSEREGPARM
5720 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5721 {
5722 if (!TARGET_SSE)
5723 {
5724 if (warn)
5725 {
5726 if (decl)
5727 error ("calling %qD with attribute sseregparm without "
5728 "SSE/SSE2 enabled", decl);
5729 else
5730 error ("calling %qT with attribute sseregparm without "
5731 "SSE/SSE2 enabled", type);
5732 }
5733 return 0;
5734 }
5735
5736 return 2;
5737 }
5738
5739 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5740 (and DFmode for SSE2) arguments in SSE registers. */
5741 if (decl && TARGET_SSE_MATH && optimize
5742 && !(profile_flag && !flag_fentry))
5743 {
5744 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5745 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5746 if (i && i->local && i->can_change_signature)
5747 return TARGET_SSE2 ? 2 : 1;
5748 }
5749
5750 return 0;
5751 }
5752
5753 /* Return true if EAX is live at the start of the function. Used by
5754 ix86_expand_prologue to determine if we need special help before
5755 calling allocate_stack_worker. */
5756
5757 static bool
5758 ix86_eax_live_at_start_p (void)
5759 {
5760 /* Cheat. Don't bother working forward from ix86_function_regparm
5761 to the function type to whether an actual argument is located in
5762 eax. Instead just look at cfg info, which is still close enough
5763 to correct at this point. This gives false positives for broken
5764 functions that might use uninitialized data that happens to be
5765 allocated in eax, but who cares? */
5766 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5767 }
5768
5769 static bool
5770 ix86_keep_aggregate_return_pointer (tree fntype)
5771 {
5772 tree attr;
5773
5774 if (!TARGET_64BIT)
5775 {
5776 attr = lookup_attribute ("callee_pop_aggregate_return",
5777 TYPE_ATTRIBUTES (fntype));
5778 if (attr)
5779 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5780
5781 /* For 32-bit MS-ABI the default is to keep aggregate
5782 return pointer. */
5783 if (ix86_function_type_abi (fntype) == MS_ABI)
5784 return true;
5785 }
5786 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5787 }
5788
5789 /* Value is the number of bytes of arguments automatically
5790 popped when returning from a subroutine call.
5791 FUNDECL is the declaration node of the function (as a tree),
5792 FUNTYPE is the data type of the function (as a tree),
5793 or for a library call it is an identifier node for the subroutine name.
5794 SIZE is the number of bytes of arguments passed on the stack.
5795
5796 On the 80386, the RTD insn may be used to pop them if the number
5797 of args is fixed, but if the number is variable then the caller
5798 must pop them all. RTD can't be used for library calls now
5799 because the library is compiled with the Unix compiler.
5800 Use of RTD is a selectable option, since it is incompatible with
5801 standard Unix calling sequences. If the option is not selected,
5802 the caller must always pop the args.
5803
5804 The attribute stdcall is equivalent to RTD on a per module basis. */
5805
5806 static int
5807 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5808 {
5809 unsigned int ccvt;
5810
5811 /* None of the 64-bit ABIs pop arguments. */
5812 if (TARGET_64BIT)
5813 return 0;
5814
5815 ccvt = ix86_get_callcvt (funtype);
5816
5817 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5818 | IX86_CALLCVT_THISCALL)) != 0
5819 && ! stdarg_p (funtype))
5820 return size;
5821
5822 /* Lose any fake structure return argument if it is passed on the stack. */
5823 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5824 && !ix86_keep_aggregate_return_pointer (funtype))
5825 {
5826 int nregs = ix86_function_regparm (funtype, fundecl);
5827 if (nregs == 0)
5828 return GET_MODE_SIZE (Pmode);
5829 }
5830
5831 return 0;
5832 }
5833
5834 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5835
5836 static bool
5837 ix86_legitimate_combined_insn (rtx_insn *insn)
5838 {
5839 /* Check operand constraints in case hard registers were propagated
5840 into insn pattern. This check prevents combine pass from
5841 generating insn patterns with invalid hard register operands.
5842 These invalid insns can eventually confuse reload to error out
5843 with a spill failure. See also PRs 46829 and 46843. */
5844 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5845 {
5846 int i;
5847
5848 extract_insn (insn);
5849 preprocess_constraints (insn);
5850
5851 int n_operands = recog_data.n_operands;
5852 int n_alternatives = recog_data.n_alternatives;
5853 for (i = 0; i < n_operands; i++)
5854 {
5855 rtx op = recog_data.operand[i];
5856 enum machine_mode mode = GET_MODE (op);
5857 const operand_alternative *op_alt;
5858 int offset = 0;
5859 bool win;
5860 int j;
5861
5862 /* For pre-AVX disallow unaligned loads/stores where the
5863 instructions don't support it. */
5864 if (!TARGET_AVX
5865 && VECTOR_MODE_P (GET_MODE (op))
5866 && misaligned_operand (op, GET_MODE (op)))
5867 {
5868 int min_align = get_attr_ssememalign (insn);
5869 if (min_align == 0)
5870 return false;
5871 }
5872
5873 /* A unary operator may be accepted by the predicate, but it
5874 is irrelevant for matching constraints. */
5875 if (UNARY_P (op))
5876 op = XEXP (op, 0);
5877
5878 if (GET_CODE (op) == SUBREG)
5879 {
5880 if (REG_P (SUBREG_REG (op))
5881 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5882 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5883 GET_MODE (SUBREG_REG (op)),
5884 SUBREG_BYTE (op),
5885 GET_MODE (op));
5886 op = SUBREG_REG (op);
5887 }
5888
5889 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5890 continue;
5891
5892 op_alt = recog_op_alt;
5893
5894 /* Operand has no constraints, anything is OK. */
5895 win = !n_alternatives;
5896
5897 alternative_mask enabled = recog_data.enabled_alternatives;
5898 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5899 {
5900 if (!TEST_BIT (enabled, j))
5901 continue;
5902 if (op_alt[i].anything_ok
5903 || (op_alt[i].matches != -1
5904 && operands_match_p
5905 (recog_data.operand[i],
5906 recog_data.operand[op_alt[i].matches]))
5907 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5908 {
5909 win = true;
5910 break;
5911 }
5912 }
5913
5914 if (!win)
5915 return false;
5916 }
5917 }
5918
5919 return true;
5920 }
5921 \f
5922 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5923
5924 static unsigned HOST_WIDE_INT
5925 ix86_asan_shadow_offset (void)
5926 {
5927 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5928 : HOST_WIDE_INT_C (0x7fff8000))
5929 : (HOST_WIDE_INT_1 << 29);
5930 }
5931 \f
5932 /* Argument support functions. */
5933
5934 /* Return true when register may be used to pass function parameters. */
5935 bool
5936 ix86_function_arg_regno_p (int regno)
5937 {
5938 int i;
5939 const int *parm_regs;
5940
5941 if (!TARGET_64BIT)
5942 {
5943 if (TARGET_MACHO)
5944 return (regno < REGPARM_MAX
5945 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5946 else
5947 return (regno < REGPARM_MAX
5948 || (TARGET_MMX && MMX_REGNO_P (regno)
5949 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5950 || (TARGET_SSE && SSE_REGNO_P (regno)
5951 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5952 }
5953
5954 if (TARGET_SSE && SSE_REGNO_P (regno)
5955 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5956 return true;
5957
5958 /* TODO: The function should depend on current function ABI but
5959 builtins.c would need updating then. Therefore we use the
5960 default ABI. */
5961
5962 /* RAX is used as hidden argument to va_arg functions. */
5963 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5964 return true;
5965
5966 if (ix86_abi == MS_ABI)
5967 parm_regs = x86_64_ms_abi_int_parameter_registers;
5968 else
5969 parm_regs = x86_64_int_parameter_registers;
5970 for (i = 0; i < (ix86_abi == MS_ABI
5971 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5972 if (regno == parm_regs[i])
5973 return true;
5974 return false;
5975 }
5976
5977 /* Return if we do not know how to pass TYPE solely in registers. */
5978
5979 static bool
5980 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5981 {
5982 if (must_pass_in_stack_var_size_or_pad (mode, type))
5983 return true;
5984
5985 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5986 The layout_type routine is crafty and tries to trick us into passing
5987 currently unsupported vector types on the stack by using TImode. */
5988 return (!TARGET_64BIT && mode == TImode
5989 && type && TREE_CODE (type) != VECTOR_TYPE);
5990 }
5991
5992 /* It returns the size, in bytes, of the area reserved for arguments passed
5993 in registers for the function represented by fndecl dependent to the used
5994 abi format. */
5995 int
5996 ix86_reg_parm_stack_space (const_tree fndecl)
5997 {
5998 enum calling_abi call_abi = SYSV_ABI;
5999 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6000 call_abi = ix86_function_abi (fndecl);
6001 else
6002 call_abi = ix86_function_type_abi (fndecl);
6003 if (TARGET_64BIT && call_abi == MS_ABI)
6004 return 32;
6005 return 0;
6006 }
6007
6008 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6009 call abi used. */
6010 enum calling_abi
6011 ix86_function_type_abi (const_tree fntype)
6012 {
6013 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6014 {
6015 enum calling_abi abi = ix86_abi;
6016 if (abi == SYSV_ABI)
6017 {
6018 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6019 abi = MS_ABI;
6020 }
6021 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6022 abi = SYSV_ABI;
6023 return abi;
6024 }
6025 return ix86_abi;
6026 }
6027
6028 /* We add this as a workaround in order to use libc_has_function
6029 hook in i386.md. */
6030 bool
6031 ix86_libc_has_function (enum function_class fn_class)
6032 {
6033 return targetm.libc_has_function (fn_class);
6034 }
6035
6036 static bool
6037 ix86_function_ms_hook_prologue (const_tree fn)
6038 {
6039 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6040 {
6041 if (decl_function_context (fn) != NULL_TREE)
6042 error_at (DECL_SOURCE_LOCATION (fn),
6043 "ms_hook_prologue is not compatible with nested function");
6044 else
6045 return true;
6046 }
6047 return false;
6048 }
6049
6050 static enum calling_abi
6051 ix86_function_abi (const_tree fndecl)
6052 {
6053 if (! fndecl)
6054 return ix86_abi;
6055 return ix86_function_type_abi (TREE_TYPE (fndecl));
6056 }
6057
6058 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6059 call abi used. */
6060 enum calling_abi
6061 ix86_cfun_abi (void)
6062 {
6063 if (! cfun)
6064 return ix86_abi;
6065 return cfun->machine->call_abi;
6066 }
6067
6068 /* Write the extra assembler code needed to declare a function properly. */
6069
6070 void
6071 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6072 tree decl)
6073 {
6074 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6075
6076 if (is_ms_hook)
6077 {
6078 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6079 unsigned int filler_cc = 0xcccccccc;
6080
6081 for (i = 0; i < filler_count; i += 4)
6082 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6083 }
6084
6085 #ifdef SUBTARGET_ASM_UNWIND_INIT
6086 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6087 #endif
6088
6089 ASM_OUTPUT_LABEL (asm_out_file, fname);
6090
6091 /* Output magic byte marker, if hot-patch attribute is set. */
6092 if (is_ms_hook)
6093 {
6094 if (TARGET_64BIT)
6095 {
6096 /* leaq [%rsp + 0], %rsp */
6097 asm_fprintf (asm_out_file, ASM_BYTE
6098 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6099 }
6100 else
6101 {
6102 /* movl.s %edi, %edi
6103 push %ebp
6104 movl.s %esp, %ebp */
6105 asm_fprintf (asm_out_file, ASM_BYTE
6106 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6107 }
6108 }
6109 }
6110
6111 /* regclass.c */
6112 extern void init_regs (void);
6113
6114 /* Implementation of call abi switching target hook. Specific to FNDECL
6115 the specific call register sets are set. See also
6116 ix86_conditional_register_usage for more details. */
6117 void
6118 ix86_call_abi_override (const_tree fndecl)
6119 {
6120 if (fndecl == NULL_TREE)
6121 cfun->machine->call_abi = ix86_abi;
6122 else
6123 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6124 }
6125
6126 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6127 expensive re-initialization of init_regs each time we switch function context
6128 since this is needed only during RTL expansion. */
6129 static void
6130 ix86_maybe_switch_abi (void)
6131 {
6132 if (TARGET_64BIT &&
6133 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6134 reinit_regs ();
6135 }
6136
6137 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6138 for a call to a function whose data type is FNTYPE.
6139 For a library call, FNTYPE is 0. */
6140
6141 void
6142 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6143 tree fntype, /* tree ptr for function decl */
6144 rtx libname, /* SYMBOL_REF of library name or 0 */
6145 tree fndecl,
6146 int caller)
6147 {
6148 struct cgraph_local_info *i;
6149
6150 memset (cum, 0, sizeof (*cum));
6151
6152 if (fndecl)
6153 {
6154 i = cgraph_node::local_info (fndecl);
6155 cum->call_abi = ix86_function_abi (fndecl);
6156 }
6157 else
6158 {
6159 i = NULL;
6160 cum->call_abi = ix86_function_type_abi (fntype);
6161 }
6162
6163 cum->caller = caller;
6164
6165 /* Set up the number of registers to use for passing arguments. */
6166 cum->nregs = ix86_regparm;
6167 if (TARGET_64BIT)
6168 {
6169 cum->nregs = (cum->call_abi == SYSV_ABI
6170 ? X86_64_REGPARM_MAX
6171 : X86_64_MS_REGPARM_MAX);
6172 }
6173 if (TARGET_SSE)
6174 {
6175 cum->sse_nregs = SSE_REGPARM_MAX;
6176 if (TARGET_64BIT)
6177 {
6178 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6179 ? X86_64_SSE_REGPARM_MAX
6180 : X86_64_MS_SSE_REGPARM_MAX);
6181 }
6182 }
6183 if (TARGET_MMX)
6184 cum->mmx_nregs = MMX_REGPARM_MAX;
6185 cum->warn_avx512f = true;
6186 cum->warn_avx = true;
6187 cum->warn_sse = true;
6188 cum->warn_mmx = true;
6189
6190 /* Because type might mismatch in between caller and callee, we need to
6191 use actual type of function for local calls.
6192 FIXME: cgraph_analyze can be told to actually record if function uses
6193 va_start so for local functions maybe_vaarg can be made aggressive
6194 helping K&R code.
6195 FIXME: once typesytem is fixed, we won't need this code anymore. */
6196 if (i && i->local && i->can_change_signature)
6197 fntype = TREE_TYPE (fndecl);
6198 cum->maybe_vaarg = (fntype
6199 ? (!prototype_p (fntype) || stdarg_p (fntype))
6200 : !libname);
6201
6202 if (!TARGET_64BIT)
6203 {
6204 /* If there are variable arguments, then we won't pass anything
6205 in registers in 32-bit mode. */
6206 if (stdarg_p (fntype))
6207 {
6208 cum->nregs = 0;
6209 cum->sse_nregs = 0;
6210 cum->mmx_nregs = 0;
6211 cum->warn_avx512f = false;
6212 cum->warn_avx = false;
6213 cum->warn_sse = false;
6214 cum->warn_mmx = false;
6215 return;
6216 }
6217
6218 /* Use ecx and edx registers if function has fastcall attribute,
6219 else look for regparm information. */
6220 if (fntype)
6221 {
6222 unsigned int ccvt = ix86_get_callcvt (fntype);
6223 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6224 {
6225 cum->nregs = 1;
6226 cum->fastcall = 1; /* Same first register as in fastcall. */
6227 }
6228 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6229 {
6230 cum->nregs = 2;
6231 cum->fastcall = 1;
6232 }
6233 else
6234 cum->nregs = ix86_function_regparm (fntype, fndecl);
6235 }
6236
6237 /* Set up the number of SSE registers used for passing SFmode
6238 and DFmode arguments. Warn for mismatching ABI. */
6239 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6240 }
6241 }
6242
6243 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6244 But in the case of vector types, it is some vector mode.
6245
6246 When we have only some of our vector isa extensions enabled, then there
6247 are some modes for which vector_mode_supported_p is false. For these
6248 modes, the generic vector support in gcc will choose some non-vector mode
6249 in order to implement the type. By computing the natural mode, we'll
6250 select the proper ABI location for the operand and not depend on whatever
6251 the middle-end decides to do with these vector types.
6252
6253 The midde-end can't deal with the vector types > 16 bytes. In this
6254 case, we return the original mode and warn ABI change if CUM isn't
6255 NULL.
6256
6257 If INT_RETURN is true, warn ABI change if the vector mode isn't
6258 available for function return value. */
6259
6260 static enum machine_mode
6261 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6262 bool in_return)
6263 {
6264 enum machine_mode mode = TYPE_MODE (type);
6265
6266 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6267 {
6268 HOST_WIDE_INT size = int_size_in_bytes (type);
6269 if ((size == 8 || size == 16 || size == 32 || size == 64)
6270 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6271 && TYPE_VECTOR_SUBPARTS (type) > 1)
6272 {
6273 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6274
6275 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6276 mode = MIN_MODE_VECTOR_FLOAT;
6277 else
6278 mode = MIN_MODE_VECTOR_INT;
6279
6280 /* Get the mode which has this inner mode and number of units. */
6281 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6282 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6283 && GET_MODE_INNER (mode) == innermode)
6284 {
6285 if (size == 64 && !TARGET_AVX512F)
6286 {
6287 static bool warnedavx512f;
6288 static bool warnedavx512f_ret;
6289
6290 if (cum && cum->warn_avx512f && !warnedavx512f)
6291 {
6292 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6293 "without AVX512F enabled changes the ABI"))
6294 warnedavx512f = true;
6295 }
6296 else if (in_return && !warnedavx512f_ret)
6297 {
6298 if (warning (OPT_Wpsabi, "AVX512F vector return "
6299 "without AVX512F enabled changes the ABI"))
6300 warnedavx512f_ret = true;
6301 }
6302
6303 return TYPE_MODE (type);
6304 }
6305 else if (size == 32 && !TARGET_AVX)
6306 {
6307 static bool warnedavx;
6308 static bool warnedavx_ret;
6309
6310 if (cum && cum->warn_avx && !warnedavx)
6311 {
6312 if (warning (OPT_Wpsabi, "AVX vector argument "
6313 "without AVX enabled changes the ABI"))
6314 warnedavx = true;
6315 }
6316 else if (in_return && !warnedavx_ret)
6317 {
6318 if (warning (OPT_Wpsabi, "AVX vector return "
6319 "without AVX enabled changes the ABI"))
6320 warnedavx_ret = true;
6321 }
6322
6323 return TYPE_MODE (type);
6324 }
6325 else if (((size == 8 && TARGET_64BIT) || size == 16)
6326 && !TARGET_SSE)
6327 {
6328 static bool warnedsse;
6329 static bool warnedsse_ret;
6330
6331 if (cum && cum->warn_sse && !warnedsse)
6332 {
6333 if (warning (OPT_Wpsabi, "SSE vector argument "
6334 "without SSE enabled changes the ABI"))
6335 warnedsse = true;
6336 }
6337 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6338 {
6339 if (warning (OPT_Wpsabi, "SSE vector return "
6340 "without SSE enabled changes the ABI"))
6341 warnedsse_ret = true;
6342 }
6343 }
6344 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6345 {
6346 static bool warnedmmx;
6347 static bool warnedmmx_ret;
6348
6349 if (cum && cum->warn_mmx && !warnedmmx)
6350 {
6351 if (warning (OPT_Wpsabi, "MMX vector argument "
6352 "without MMX enabled changes the ABI"))
6353 warnedmmx = true;
6354 }
6355 else if (in_return && !warnedmmx_ret)
6356 {
6357 if (warning (OPT_Wpsabi, "MMX vector return "
6358 "without MMX enabled changes the ABI"))
6359 warnedmmx_ret = true;
6360 }
6361 }
6362 return mode;
6363 }
6364
6365 gcc_unreachable ();
6366 }
6367 }
6368
6369 return mode;
6370 }
6371
6372 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6373 this may not agree with the mode that the type system has chosen for the
6374 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6375 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6376
6377 static rtx
6378 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6379 unsigned int regno)
6380 {
6381 rtx tmp;
6382
6383 if (orig_mode != BLKmode)
6384 tmp = gen_rtx_REG (orig_mode, regno);
6385 else
6386 {
6387 tmp = gen_rtx_REG (mode, regno);
6388 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6389 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6390 }
6391
6392 return tmp;
6393 }
6394
6395 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6396 of this code is to classify each 8bytes of incoming argument by the register
6397 class and assign registers accordingly. */
6398
6399 /* Return the union class of CLASS1 and CLASS2.
6400 See the x86-64 PS ABI for details. */
6401
6402 static enum x86_64_reg_class
6403 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6404 {
6405 /* Rule #1: If both classes are equal, this is the resulting class. */
6406 if (class1 == class2)
6407 return class1;
6408
6409 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6410 the other class. */
6411 if (class1 == X86_64_NO_CLASS)
6412 return class2;
6413 if (class2 == X86_64_NO_CLASS)
6414 return class1;
6415
6416 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6417 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6418 return X86_64_MEMORY_CLASS;
6419
6420 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6421 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6422 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6423 return X86_64_INTEGERSI_CLASS;
6424 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6425 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6426 return X86_64_INTEGER_CLASS;
6427
6428 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6429 MEMORY is used. */
6430 if (class1 == X86_64_X87_CLASS
6431 || class1 == X86_64_X87UP_CLASS
6432 || class1 == X86_64_COMPLEX_X87_CLASS
6433 || class2 == X86_64_X87_CLASS
6434 || class2 == X86_64_X87UP_CLASS
6435 || class2 == X86_64_COMPLEX_X87_CLASS)
6436 return X86_64_MEMORY_CLASS;
6437
6438 /* Rule #6: Otherwise class SSE is used. */
6439 return X86_64_SSE_CLASS;
6440 }
6441
6442 /* Classify the argument of type TYPE and mode MODE.
6443 CLASSES will be filled by the register class used to pass each word
6444 of the operand. The number of words is returned. In case the parameter
6445 should be passed in memory, 0 is returned. As a special case for zero
6446 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6447
6448 BIT_OFFSET is used internally for handling records and specifies offset
6449 of the offset in bits modulo 512 to avoid overflow cases.
6450
6451 See the x86-64 PS ABI for details.
6452 */
6453
6454 static int
6455 classify_argument (enum machine_mode mode, const_tree type,
6456 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6457 {
6458 HOST_WIDE_INT bytes =
6459 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6460 int words
6461 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6462
6463 /* Variable sized entities are always passed/returned in memory. */
6464 if (bytes < 0)
6465 return 0;
6466
6467 if (mode != VOIDmode
6468 && targetm.calls.must_pass_in_stack (mode, type))
6469 return 0;
6470
6471 if (type && AGGREGATE_TYPE_P (type))
6472 {
6473 int i;
6474 tree field;
6475 enum x86_64_reg_class subclasses[MAX_CLASSES];
6476
6477 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6478 if (bytes > 64)
6479 return 0;
6480
6481 for (i = 0; i < words; i++)
6482 classes[i] = X86_64_NO_CLASS;
6483
6484 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6485 signalize memory class, so handle it as special case. */
6486 if (!words)
6487 {
6488 classes[0] = X86_64_NO_CLASS;
6489 return 1;
6490 }
6491
6492 /* Classify each field of record and merge classes. */
6493 switch (TREE_CODE (type))
6494 {
6495 case RECORD_TYPE:
6496 /* And now merge the fields of structure. */
6497 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6498 {
6499 if (TREE_CODE (field) == FIELD_DECL)
6500 {
6501 int num;
6502
6503 if (TREE_TYPE (field) == error_mark_node)
6504 continue;
6505
6506 /* Bitfields are always classified as integer. Handle them
6507 early, since later code would consider them to be
6508 misaligned integers. */
6509 if (DECL_BIT_FIELD (field))
6510 {
6511 for (i = (int_bit_position (field)
6512 + (bit_offset % 64)) / 8 / 8;
6513 i < ((int_bit_position (field) + (bit_offset % 64))
6514 + tree_to_shwi (DECL_SIZE (field))
6515 + 63) / 8 / 8; i++)
6516 classes[i] =
6517 merge_classes (X86_64_INTEGER_CLASS,
6518 classes[i]);
6519 }
6520 else
6521 {
6522 int pos;
6523
6524 type = TREE_TYPE (field);
6525
6526 /* Flexible array member is ignored. */
6527 if (TYPE_MODE (type) == BLKmode
6528 && TREE_CODE (type) == ARRAY_TYPE
6529 && TYPE_SIZE (type) == NULL_TREE
6530 && TYPE_DOMAIN (type) != NULL_TREE
6531 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6532 == NULL_TREE))
6533 {
6534 static bool warned;
6535
6536 if (!warned && warn_psabi)
6537 {
6538 warned = true;
6539 inform (input_location,
6540 "the ABI of passing struct with"
6541 " a flexible array member has"
6542 " changed in GCC 4.4");
6543 }
6544 continue;
6545 }
6546 num = classify_argument (TYPE_MODE (type), type,
6547 subclasses,
6548 (int_bit_position (field)
6549 + bit_offset) % 512);
6550 if (!num)
6551 return 0;
6552 pos = (int_bit_position (field)
6553 + (bit_offset % 64)) / 8 / 8;
6554 for (i = 0; i < num && (i + pos) < words; i++)
6555 classes[i + pos] =
6556 merge_classes (subclasses[i], classes[i + pos]);
6557 }
6558 }
6559 }
6560 break;
6561
6562 case ARRAY_TYPE:
6563 /* Arrays are handled as small records. */
6564 {
6565 int num;
6566 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6567 TREE_TYPE (type), subclasses, bit_offset);
6568 if (!num)
6569 return 0;
6570
6571 /* The partial classes are now full classes. */
6572 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6573 subclasses[0] = X86_64_SSE_CLASS;
6574 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6575 && !((bit_offset % 64) == 0 && bytes == 4))
6576 subclasses[0] = X86_64_INTEGER_CLASS;
6577
6578 for (i = 0; i < words; i++)
6579 classes[i] = subclasses[i % num];
6580
6581 break;
6582 }
6583 case UNION_TYPE:
6584 case QUAL_UNION_TYPE:
6585 /* Unions are similar to RECORD_TYPE but offset is always 0.
6586 */
6587 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6588 {
6589 if (TREE_CODE (field) == FIELD_DECL)
6590 {
6591 int num;
6592
6593 if (TREE_TYPE (field) == error_mark_node)
6594 continue;
6595
6596 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6597 TREE_TYPE (field), subclasses,
6598 bit_offset);
6599 if (!num)
6600 return 0;
6601 for (i = 0; i < num && i < words; i++)
6602 classes[i] = merge_classes (subclasses[i], classes[i]);
6603 }
6604 }
6605 break;
6606
6607 default:
6608 gcc_unreachable ();
6609 }
6610
6611 if (words > 2)
6612 {
6613 /* When size > 16 bytes, if the first one isn't
6614 X86_64_SSE_CLASS or any other ones aren't
6615 X86_64_SSEUP_CLASS, everything should be passed in
6616 memory. */
6617 if (classes[0] != X86_64_SSE_CLASS)
6618 return 0;
6619
6620 for (i = 1; i < words; i++)
6621 if (classes[i] != X86_64_SSEUP_CLASS)
6622 return 0;
6623 }
6624
6625 /* Final merger cleanup. */
6626 for (i = 0; i < words; i++)
6627 {
6628 /* If one class is MEMORY, everything should be passed in
6629 memory. */
6630 if (classes[i] == X86_64_MEMORY_CLASS)
6631 return 0;
6632
6633 /* The X86_64_SSEUP_CLASS should be always preceded by
6634 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6635 if (classes[i] == X86_64_SSEUP_CLASS
6636 && classes[i - 1] != X86_64_SSE_CLASS
6637 && classes[i - 1] != X86_64_SSEUP_CLASS)
6638 {
6639 /* The first one should never be X86_64_SSEUP_CLASS. */
6640 gcc_assert (i != 0);
6641 classes[i] = X86_64_SSE_CLASS;
6642 }
6643
6644 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6645 everything should be passed in memory. */
6646 if (classes[i] == X86_64_X87UP_CLASS
6647 && (classes[i - 1] != X86_64_X87_CLASS))
6648 {
6649 static bool warned;
6650
6651 /* The first one should never be X86_64_X87UP_CLASS. */
6652 gcc_assert (i != 0);
6653 if (!warned && warn_psabi)
6654 {
6655 warned = true;
6656 inform (input_location,
6657 "the ABI of passing union with long double"
6658 " has changed in GCC 4.4");
6659 }
6660 return 0;
6661 }
6662 }
6663 return words;
6664 }
6665
6666 /* Compute alignment needed. We align all types to natural boundaries with
6667 exception of XFmode that is aligned to 64bits. */
6668 if (mode != VOIDmode && mode != BLKmode)
6669 {
6670 int mode_alignment = GET_MODE_BITSIZE (mode);
6671
6672 if (mode == XFmode)
6673 mode_alignment = 128;
6674 else if (mode == XCmode)
6675 mode_alignment = 256;
6676 if (COMPLEX_MODE_P (mode))
6677 mode_alignment /= 2;
6678 /* Misaligned fields are always returned in memory. */
6679 if (bit_offset % mode_alignment)
6680 return 0;
6681 }
6682
6683 /* for V1xx modes, just use the base mode */
6684 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6685 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6686 mode = GET_MODE_INNER (mode);
6687
6688 /* Classification of atomic types. */
6689 switch (mode)
6690 {
6691 case SDmode:
6692 case DDmode:
6693 classes[0] = X86_64_SSE_CLASS;
6694 return 1;
6695 case TDmode:
6696 classes[0] = X86_64_SSE_CLASS;
6697 classes[1] = X86_64_SSEUP_CLASS;
6698 return 2;
6699 case DImode:
6700 case SImode:
6701 case HImode:
6702 case QImode:
6703 case CSImode:
6704 case CHImode:
6705 case CQImode:
6706 {
6707 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6708
6709 /* Analyze last 128 bits only. */
6710 size = (size - 1) & 0x7f;
6711
6712 if (size < 32)
6713 {
6714 classes[0] = X86_64_INTEGERSI_CLASS;
6715 return 1;
6716 }
6717 else if (size < 64)
6718 {
6719 classes[0] = X86_64_INTEGER_CLASS;
6720 return 1;
6721 }
6722 else if (size < 64+32)
6723 {
6724 classes[0] = X86_64_INTEGER_CLASS;
6725 classes[1] = X86_64_INTEGERSI_CLASS;
6726 return 2;
6727 }
6728 else if (size < 64+64)
6729 {
6730 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6731 return 2;
6732 }
6733 else
6734 gcc_unreachable ();
6735 }
6736 case CDImode:
6737 case TImode:
6738 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6739 return 2;
6740 case COImode:
6741 case OImode:
6742 /* OImode shouldn't be used directly. */
6743 gcc_unreachable ();
6744 case CTImode:
6745 return 0;
6746 case SFmode:
6747 if (!(bit_offset % 64))
6748 classes[0] = X86_64_SSESF_CLASS;
6749 else
6750 classes[0] = X86_64_SSE_CLASS;
6751 return 1;
6752 case DFmode:
6753 classes[0] = X86_64_SSEDF_CLASS;
6754 return 1;
6755 case XFmode:
6756 classes[0] = X86_64_X87_CLASS;
6757 classes[1] = X86_64_X87UP_CLASS;
6758 return 2;
6759 case TFmode:
6760 classes[0] = X86_64_SSE_CLASS;
6761 classes[1] = X86_64_SSEUP_CLASS;
6762 return 2;
6763 case SCmode:
6764 classes[0] = X86_64_SSE_CLASS;
6765 if (!(bit_offset % 64))
6766 return 1;
6767 else
6768 {
6769 static bool warned;
6770
6771 if (!warned && warn_psabi)
6772 {
6773 warned = true;
6774 inform (input_location,
6775 "the ABI of passing structure with complex float"
6776 " member has changed in GCC 4.4");
6777 }
6778 classes[1] = X86_64_SSESF_CLASS;
6779 return 2;
6780 }
6781 case DCmode:
6782 classes[0] = X86_64_SSEDF_CLASS;
6783 classes[1] = X86_64_SSEDF_CLASS;
6784 return 2;
6785 case XCmode:
6786 classes[0] = X86_64_COMPLEX_X87_CLASS;
6787 return 1;
6788 case TCmode:
6789 /* This modes is larger than 16 bytes. */
6790 return 0;
6791 case V8SFmode:
6792 case V8SImode:
6793 case V32QImode:
6794 case V16HImode:
6795 case V4DFmode:
6796 case V4DImode:
6797 classes[0] = X86_64_SSE_CLASS;
6798 classes[1] = X86_64_SSEUP_CLASS;
6799 classes[2] = X86_64_SSEUP_CLASS;
6800 classes[3] = X86_64_SSEUP_CLASS;
6801 return 4;
6802 case V8DFmode:
6803 case V16SFmode:
6804 case V8DImode:
6805 case V16SImode:
6806 case V32HImode:
6807 case V64QImode:
6808 classes[0] = X86_64_SSE_CLASS;
6809 classes[1] = X86_64_SSEUP_CLASS;
6810 classes[2] = X86_64_SSEUP_CLASS;
6811 classes[3] = X86_64_SSEUP_CLASS;
6812 classes[4] = X86_64_SSEUP_CLASS;
6813 classes[5] = X86_64_SSEUP_CLASS;
6814 classes[6] = X86_64_SSEUP_CLASS;
6815 classes[7] = X86_64_SSEUP_CLASS;
6816 return 8;
6817 case V4SFmode:
6818 case V4SImode:
6819 case V16QImode:
6820 case V8HImode:
6821 case V2DFmode:
6822 case V2DImode:
6823 classes[0] = X86_64_SSE_CLASS;
6824 classes[1] = X86_64_SSEUP_CLASS;
6825 return 2;
6826 case V1TImode:
6827 case V1DImode:
6828 case V2SFmode:
6829 case V2SImode:
6830 case V4HImode:
6831 case V8QImode:
6832 classes[0] = X86_64_SSE_CLASS;
6833 return 1;
6834 case BLKmode:
6835 case VOIDmode:
6836 return 0;
6837 default:
6838 gcc_assert (VECTOR_MODE_P (mode));
6839
6840 if (bytes > 16)
6841 return 0;
6842
6843 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6844
6845 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6846 classes[0] = X86_64_INTEGERSI_CLASS;
6847 else
6848 classes[0] = X86_64_INTEGER_CLASS;
6849 classes[1] = X86_64_INTEGER_CLASS;
6850 return 1 + (bytes > 8);
6851 }
6852 }
6853
6854 /* Examine the argument and return set number of register required in each
6855 class. Return true iff parameter should be passed in memory. */
6856
6857 static bool
6858 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6859 int *int_nregs, int *sse_nregs)
6860 {
6861 enum x86_64_reg_class regclass[MAX_CLASSES];
6862 int n = classify_argument (mode, type, regclass, 0);
6863
6864 *int_nregs = 0;
6865 *sse_nregs = 0;
6866
6867 if (!n)
6868 return true;
6869 for (n--; n >= 0; n--)
6870 switch (regclass[n])
6871 {
6872 case X86_64_INTEGER_CLASS:
6873 case X86_64_INTEGERSI_CLASS:
6874 (*int_nregs)++;
6875 break;
6876 case X86_64_SSE_CLASS:
6877 case X86_64_SSESF_CLASS:
6878 case X86_64_SSEDF_CLASS:
6879 (*sse_nregs)++;
6880 break;
6881 case X86_64_NO_CLASS:
6882 case X86_64_SSEUP_CLASS:
6883 break;
6884 case X86_64_X87_CLASS:
6885 case X86_64_X87UP_CLASS:
6886 case X86_64_COMPLEX_X87_CLASS:
6887 if (!in_return)
6888 return true;
6889 break;
6890 case X86_64_MEMORY_CLASS:
6891 gcc_unreachable ();
6892 }
6893
6894 return false;
6895 }
6896
6897 /* Construct container for the argument used by GCC interface. See
6898 FUNCTION_ARG for the detailed description. */
6899
6900 static rtx
6901 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6902 const_tree type, int in_return, int nintregs, int nsseregs,
6903 const int *intreg, int sse_regno)
6904 {
6905 /* The following variables hold the static issued_error state. */
6906 static bool issued_sse_arg_error;
6907 static bool issued_sse_ret_error;
6908 static bool issued_x87_ret_error;
6909
6910 enum machine_mode tmpmode;
6911 int bytes =
6912 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6913 enum x86_64_reg_class regclass[MAX_CLASSES];
6914 int n;
6915 int i;
6916 int nexps = 0;
6917 int needed_sseregs, needed_intregs;
6918 rtx exp[MAX_CLASSES];
6919 rtx ret;
6920
6921 n = classify_argument (mode, type, regclass, 0);
6922 if (!n)
6923 return NULL;
6924 if (examine_argument (mode, type, in_return, &needed_intregs,
6925 &needed_sseregs))
6926 return NULL;
6927 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6928 return NULL;
6929
6930 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6931 some less clueful developer tries to use floating-point anyway. */
6932 if (needed_sseregs && !TARGET_SSE)
6933 {
6934 if (in_return)
6935 {
6936 if (!issued_sse_ret_error)
6937 {
6938 error ("SSE register return with SSE disabled");
6939 issued_sse_ret_error = true;
6940 }
6941 }
6942 else if (!issued_sse_arg_error)
6943 {
6944 error ("SSE register argument with SSE disabled");
6945 issued_sse_arg_error = true;
6946 }
6947 return NULL;
6948 }
6949
6950 /* Likewise, error if the ABI requires us to return values in the
6951 x87 registers and the user specified -mno-80387. */
6952 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6953 for (i = 0; i < n; i++)
6954 if (regclass[i] == X86_64_X87_CLASS
6955 || regclass[i] == X86_64_X87UP_CLASS
6956 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6957 {
6958 if (!issued_x87_ret_error)
6959 {
6960 error ("x87 register return with x87 disabled");
6961 issued_x87_ret_error = true;
6962 }
6963 return NULL;
6964 }
6965
6966 /* First construct simple cases. Avoid SCmode, since we want to use
6967 single register to pass this type. */
6968 if (n == 1 && mode != SCmode)
6969 switch (regclass[0])
6970 {
6971 case X86_64_INTEGER_CLASS:
6972 case X86_64_INTEGERSI_CLASS:
6973 return gen_rtx_REG (mode, intreg[0]);
6974 case X86_64_SSE_CLASS:
6975 case X86_64_SSESF_CLASS:
6976 case X86_64_SSEDF_CLASS:
6977 if (mode != BLKmode)
6978 return gen_reg_or_parallel (mode, orig_mode,
6979 SSE_REGNO (sse_regno));
6980 break;
6981 case X86_64_X87_CLASS:
6982 case X86_64_COMPLEX_X87_CLASS:
6983 return gen_rtx_REG (mode, FIRST_STACK_REG);
6984 case X86_64_NO_CLASS:
6985 /* Zero sized array, struct or class. */
6986 return NULL;
6987 default:
6988 gcc_unreachable ();
6989 }
6990 if (n == 2
6991 && regclass[0] == X86_64_SSE_CLASS
6992 && regclass[1] == X86_64_SSEUP_CLASS
6993 && mode != BLKmode)
6994 return gen_reg_or_parallel (mode, orig_mode,
6995 SSE_REGNO (sse_regno));
6996 if (n == 4
6997 && regclass[0] == X86_64_SSE_CLASS
6998 && regclass[1] == X86_64_SSEUP_CLASS
6999 && regclass[2] == X86_64_SSEUP_CLASS
7000 && regclass[3] == X86_64_SSEUP_CLASS
7001 && mode != BLKmode)
7002 return gen_reg_or_parallel (mode, orig_mode,
7003 SSE_REGNO (sse_regno));
7004 if (n == 8
7005 && regclass[0] == X86_64_SSE_CLASS
7006 && regclass[1] == X86_64_SSEUP_CLASS
7007 && regclass[2] == X86_64_SSEUP_CLASS
7008 && regclass[3] == X86_64_SSEUP_CLASS
7009 && regclass[4] == X86_64_SSEUP_CLASS
7010 && regclass[5] == X86_64_SSEUP_CLASS
7011 && regclass[6] == X86_64_SSEUP_CLASS
7012 && regclass[7] == X86_64_SSEUP_CLASS
7013 && mode != BLKmode)
7014 return gen_reg_or_parallel (mode, orig_mode,
7015 SSE_REGNO (sse_regno));
7016 if (n == 2
7017 && regclass[0] == X86_64_X87_CLASS
7018 && regclass[1] == X86_64_X87UP_CLASS)
7019 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7020
7021 if (n == 2
7022 && regclass[0] == X86_64_INTEGER_CLASS
7023 && regclass[1] == X86_64_INTEGER_CLASS
7024 && (mode == CDImode || mode == TImode)
7025 && intreg[0] + 1 == intreg[1])
7026 return gen_rtx_REG (mode, intreg[0]);
7027
7028 /* Otherwise figure out the entries of the PARALLEL. */
7029 for (i = 0; i < n; i++)
7030 {
7031 int pos;
7032
7033 switch (regclass[i])
7034 {
7035 case X86_64_NO_CLASS:
7036 break;
7037 case X86_64_INTEGER_CLASS:
7038 case X86_64_INTEGERSI_CLASS:
7039 /* Merge TImodes on aligned occasions here too. */
7040 if (i * 8 + 8 > bytes)
7041 tmpmode
7042 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7043 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7044 tmpmode = SImode;
7045 else
7046 tmpmode = DImode;
7047 /* We've requested 24 bytes we
7048 don't have mode for. Use DImode. */
7049 if (tmpmode == BLKmode)
7050 tmpmode = DImode;
7051 exp [nexps++]
7052 = gen_rtx_EXPR_LIST (VOIDmode,
7053 gen_rtx_REG (tmpmode, *intreg),
7054 GEN_INT (i*8));
7055 intreg++;
7056 break;
7057 case X86_64_SSESF_CLASS:
7058 exp [nexps++]
7059 = gen_rtx_EXPR_LIST (VOIDmode,
7060 gen_rtx_REG (SFmode,
7061 SSE_REGNO (sse_regno)),
7062 GEN_INT (i*8));
7063 sse_regno++;
7064 break;
7065 case X86_64_SSEDF_CLASS:
7066 exp [nexps++]
7067 = gen_rtx_EXPR_LIST (VOIDmode,
7068 gen_rtx_REG (DFmode,
7069 SSE_REGNO (sse_regno)),
7070 GEN_INT (i*8));
7071 sse_regno++;
7072 break;
7073 case X86_64_SSE_CLASS:
7074 pos = i;
7075 switch (n)
7076 {
7077 case 1:
7078 tmpmode = DImode;
7079 break;
7080 case 2:
7081 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7082 {
7083 tmpmode = TImode;
7084 i++;
7085 }
7086 else
7087 tmpmode = DImode;
7088 break;
7089 case 4:
7090 gcc_assert (i == 0
7091 && regclass[1] == X86_64_SSEUP_CLASS
7092 && regclass[2] == X86_64_SSEUP_CLASS
7093 && regclass[3] == X86_64_SSEUP_CLASS);
7094 tmpmode = OImode;
7095 i += 3;
7096 break;
7097 case 8:
7098 gcc_assert (i == 0
7099 && regclass[1] == X86_64_SSEUP_CLASS
7100 && regclass[2] == X86_64_SSEUP_CLASS
7101 && regclass[3] == X86_64_SSEUP_CLASS
7102 && regclass[4] == X86_64_SSEUP_CLASS
7103 && regclass[5] == X86_64_SSEUP_CLASS
7104 && regclass[6] == X86_64_SSEUP_CLASS
7105 && regclass[7] == X86_64_SSEUP_CLASS);
7106 tmpmode = XImode;
7107 i += 7;
7108 break;
7109 default:
7110 gcc_unreachable ();
7111 }
7112 exp [nexps++]
7113 = gen_rtx_EXPR_LIST (VOIDmode,
7114 gen_rtx_REG (tmpmode,
7115 SSE_REGNO (sse_regno)),
7116 GEN_INT (pos*8));
7117 sse_regno++;
7118 break;
7119 default:
7120 gcc_unreachable ();
7121 }
7122 }
7123
7124 /* Empty aligned struct, union or class. */
7125 if (nexps == 0)
7126 return NULL;
7127
7128 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7129 for (i = 0; i < nexps; i++)
7130 XVECEXP (ret, 0, i) = exp [i];
7131 return ret;
7132 }
7133
7134 /* Update the data in CUM to advance over an argument of mode MODE
7135 and data type TYPE. (TYPE is null for libcalls where that information
7136 may not be available.) */
7137
7138 static void
7139 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7140 const_tree type, HOST_WIDE_INT bytes,
7141 HOST_WIDE_INT words)
7142 {
7143 switch (mode)
7144 {
7145 default:
7146 break;
7147
7148 case BLKmode:
7149 if (bytes < 0)
7150 break;
7151 /* FALLTHRU */
7152
7153 case DImode:
7154 case SImode:
7155 case HImode:
7156 case QImode:
7157 cum->words += words;
7158 cum->nregs -= words;
7159 cum->regno += words;
7160
7161 if (cum->nregs <= 0)
7162 {
7163 cum->nregs = 0;
7164 cum->regno = 0;
7165 }
7166 break;
7167
7168 case OImode:
7169 /* OImode shouldn't be used directly. */
7170 gcc_unreachable ();
7171
7172 case DFmode:
7173 if (cum->float_in_sse < 2)
7174 break;
7175 case SFmode:
7176 if (cum->float_in_sse < 1)
7177 break;
7178 /* FALLTHRU */
7179
7180 case V8SFmode:
7181 case V8SImode:
7182 case V64QImode:
7183 case V32HImode:
7184 case V16SImode:
7185 case V8DImode:
7186 case V16SFmode:
7187 case V8DFmode:
7188 case V32QImode:
7189 case V16HImode:
7190 case V4DFmode:
7191 case V4DImode:
7192 case TImode:
7193 case V16QImode:
7194 case V8HImode:
7195 case V4SImode:
7196 case V2DImode:
7197 case V4SFmode:
7198 case V2DFmode:
7199 if (!type || !AGGREGATE_TYPE_P (type))
7200 {
7201 cum->sse_words += words;
7202 cum->sse_nregs -= 1;
7203 cum->sse_regno += 1;
7204 if (cum->sse_nregs <= 0)
7205 {
7206 cum->sse_nregs = 0;
7207 cum->sse_regno = 0;
7208 }
7209 }
7210 break;
7211
7212 case V8QImode:
7213 case V4HImode:
7214 case V2SImode:
7215 case V2SFmode:
7216 case V1TImode:
7217 case V1DImode:
7218 if (!type || !AGGREGATE_TYPE_P (type))
7219 {
7220 cum->mmx_words += words;
7221 cum->mmx_nregs -= 1;
7222 cum->mmx_regno += 1;
7223 if (cum->mmx_nregs <= 0)
7224 {
7225 cum->mmx_nregs = 0;
7226 cum->mmx_regno = 0;
7227 }
7228 }
7229 break;
7230 }
7231 }
7232
7233 static void
7234 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7235 const_tree type, HOST_WIDE_INT words, bool named)
7236 {
7237 int int_nregs, sse_nregs;
7238
7239 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7240 if (!named && (VALID_AVX512F_REG_MODE (mode)
7241 || VALID_AVX256_REG_MODE (mode)))
7242 return;
7243
7244 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7245 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7246 {
7247 cum->nregs -= int_nregs;
7248 cum->sse_nregs -= sse_nregs;
7249 cum->regno += int_nregs;
7250 cum->sse_regno += sse_nregs;
7251 }
7252 else
7253 {
7254 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7255 cum->words = (cum->words + align - 1) & ~(align - 1);
7256 cum->words += words;
7257 }
7258 }
7259
7260 static void
7261 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7262 HOST_WIDE_INT words)
7263 {
7264 /* Otherwise, this should be passed indirect. */
7265 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7266
7267 cum->words += words;
7268 if (cum->nregs > 0)
7269 {
7270 cum->nregs -= 1;
7271 cum->regno += 1;
7272 }
7273 }
7274
7275 /* Update the data in CUM to advance over an argument of mode MODE and
7276 data type TYPE. (TYPE is null for libcalls where that information
7277 may not be available.) */
7278
7279 static void
7280 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7281 const_tree type, bool named)
7282 {
7283 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7284 HOST_WIDE_INT bytes, words;
7285
7286 if (mode == BLKmode)
7287 bytes = int_size_in_bytes (type);
7288 else
7289 bytes = GET_MODE_SIZE (mode);
7290 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7291
7292 if (type)
7293 mode = type_natural_mode (type, NULL, false);
7294
7295 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7296 function_arg_advance_ms_64 (cum, bytes, words);
7297 else if (TARGET_64BIT)
7298 function_arg_advance_64 (cum, mode, type, words, named);
7299 else
7300 function_arg_advance_32 (cum, mode, type, bytes, words);
7301 }
7302
7303 /* Define where to put the arguments to a function.
7304 Value is zero to push the argument on the stack,
7305 or a hard register in which to store the argument.
7306
7307 MODE is the argument's machine mode.
7308 TYPE is the data type of the argument (as a tree).
7309 This is null for libcalls where that information may
7310 not be available.
7311 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7312 the preceding args and about the function being called.
7313 NAMED is nonzero if this argument is a named parameter
7314 (otherwise it is an extra parameter matching an ellipsis). */
7315
7316 static rtx
7317 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7318 enum machine_mode orig_mode, const_tree type,
7319 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7320 {
7321 /* Avoid the AL settings for the Unix64 ABI. */
7322 if (mode == VOIDmode)
7323 return constm1_rtx;
7324
7325 switch (mode)
7326 {
7327 default:
7328 break;
7329
7330 case BLKmode:
7331 if (bytes < 0)
7332 break;
7333 /* FALLTHRU */
7334 case DImode:
7335 case SImode:
7336 case HImode:
7337 case QImode:
7338 if (words <= cum->nregs)
7339 {
7340 int regno = cum->regno;
7341
7342 /* Fastcall allocates the first two DWORD (SImode) or
7343 smaller arguments to ECX and EDX if it isn't an
7344 aggregate type . */
7345 if (cum->fastcall)
7346 {
7347 if (mode == BLKmode
7348 || mode == DImode
7349 || (type && AGGREGATE_TYPE_P (type)))
7350 break;
7351
7352 /* ECX not EAX is the first allocated register. */
7353 if (regno == AX_REG)
7354 regno = CX_REG;
7355 }
7356 return gen_rtx_REG (mode, regno);
7357 }
7358 break;
7359
7360 case DFmode:
7361 if (cum->float_in_sse < 2)
7362 break;
7363 case SFmode:
7364 if (cum->float_in_sse < 1)
7365 break;
7366 /* FALLTHRU */
7367 case TImode:
7368 /* In 32bit, we pass TImode in xmm registers. */
7369 case V16QImode:
7370 case V8HImode:
7371 case V4SImode:
7372 case V2DImode:
7373 case V4SFmode:
7374 case V2DFmode:
7375 if (!type || !AGGREGATE_TYPE_P (type))
7376 {
7377 if (cum->sse_nregs)
7378 return gen_reg_or_parallel (mode, orig_mode,
7379 cum->sse_regno + FIRST_SSE_REG);
7380 }
7381 break;
7382
7383 case OImode:
7384 case XImode:
7385 /* OImode and XImode shouldn't be used directly. */
7386 gcc_unreachable ();
7387
7388 case V64QImode:
7389 case V32HImode:
7390 case V16SImode:
7391 case V8DImode:
7392 case V16SFmode:
7393 case V8DFmode:
7394 case V8SFmode:
7395 case V8SImode:
7396 case V32QImode:
7397 case V16HImode:
7398 case V4DFmode:
7399 case V4DImode:
7400 if (!type || !AGGREGATE_TYPE_P (type))
7401 {
7402 if (cum->sse_nregs)
7403 return gen_reg_or_parallel (mode, orig_mode,
7404 cum->sse_regno + FIRST_SSE_REG);
7405 }
7406 break;
7407
7408 case V8QImode:
7409 case V4HImode:
7410 case V2SImode:
7411 case V2SFmode:
7412 case V1TImode:
7413 case V1DImode:
7414 if (!type || !AGGREGATE_TYPE_P (type))
7415 {
7416 if (cum->mmx_nregs)
7417 return gen_reg_or_parallel (mode, orig_mode,
7418 cum->mmx_regno + FIRST_MMX_REG);
7419 }
7420 break;
7421 }
7422
7423 return NULL_RTX;
7424 }
7425
7426 static rtx
7427 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7428 enum machine_mode orig_mode, const_tree type, bool named)
7429 {
7430 /* Handle a hidden AL argument containing number of registers
7431 for varargs x86-64 functions. */
7432 if (mode == VOIDmode)
7433 return GEN_INT (cum->maybe_vaarg
7434 ? (cum->sse_nregs < 0
7435 ? X86_64_SSE_REGPARM_MAX
7436 : cum->sse_regno)
7437 : -1);
7438
7439 switch (mode)
7440 {
7441 default:
7442 break;
7443
7444 case V8SFmode:
7445 case V8SImode:
7446 case V32QImode:
7447 case V16HImode:
7448 case V4DFmode:
7449 case V4DImode:
7450 case V16SFmode:
7451 case V16SImode:
7452 case V64QImode:
7453 case V32HImode:
7454 case V8DFmode:
7455 case V8DImode:
7456 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7457 if (!named)
7458 return NULL;
7459 break;
7460 }
7461
7462 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7463 cum->sse_nregs,
7464 &x86_64_int_parameter_registers [cum->regno],
7465 cum->sse_regno);
7466 }
7467
7468 static rtx
7469 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7470 enum machine_mode orig_mode, bool named,
7471 HOST_WIDE_INT bytes)
7472 {
7473 unsigned int regno;
7474
7475 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7476 We use value of -2 to specify that current function call is MSABI. */
7477 if (mode == VOIDmode)
7478 return GEN_INT (-2);
7479
7480 /* If we've run out of registers, it goes on the stack. */
7481 if (cum->nregs == 0)
7482 return NULL_RTX;
7483
7484 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7485
7486 /* Only floating point modes are passed in anything but integer regs. */
7487 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7488 {
7489 if (named)
7490 regno = cum->regno + FIRST_SSE_REG;
7491 else
7492 {
7493 rtx t1, t2;
7494
7495 /* Unnamed floating parameters are passed in both the
7496 SSE and integer registers. */
7497 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7498 t2 = gen_rtx_REG (mode, regno);
7499 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7500 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7501 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7502 }
7503 }
7504 /* Handle aggregated types passed in register. */
7505 if (orig_mode == BLKmode)
7506 {
7507 if (bytes > 0 && bytes <= 8)
7508 mode = (bytes > 4 ? DImode : SImode);
7509 if (mode == BLKmode)
7510 mode = DImode;
7511 }
7512
7513 return gen_reg_or_parallel (mode, orig_mode, regno);
7514 }
7515
7516 /* Return where to put the arguments to a function.
7517 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7518
7519 MODE is the argument's machine mode. TYPE is the data type of the
7520 argument. It is null for libcalls where that information may not be
7521 available. CUM gives information about the preceding args and about
7522 the function being called. NAMED is nonzero if this argument is a
7523 named parameter (otherwise it is an extra parameter matching an
7524 ellipsis). */
7525
7526 static rtx
7527 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7528 const_tree type, bool named)
7529 {
7530 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7531 enum machine_mode mode = omode;
7532 HOST_WIDE_INT bytes, words;
7533 rtx arg;
7534
7535 if (mode == BLKmode)
7536 bytes = int_size_in_bytes (type);
7537 else
7538 bytes = GET_MODE_SIZE (mode);
7539 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7540
7541 /* To simplify the code below, represent vector types with a vector mode
7542 even if MMX/SSE are not active. */
7543 if (type && TREE_CODE (type) == VECTOR_TYPE)
7544 mode = type_natural_mode (type, cum, false);
7545
7546 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7547 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7548 else if (TARGET_64BIT)
7549 arg = function_arg_64 (cum, mode, omode, type, named);
7550 else
7551 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7552
7553 return arg;
7554 }
7555
7556 /* A C expression that indicates when an argument must be passed by
7557 reference. If nonzero for an argument, a copy of that argument is
7558 made in memory and a pointer to the argument is passed instead of
7559 the argument itself. The pointer is passed in whatever way is
7560 appropriate for passing a pointer to that type. */
7561
7562 static bool
7563 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7564 const_tree type, bool)
7565 {
7566 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7567
7568 /* See Windows x64 Software Convention. */
7569 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7570 {
7571 int msize = (int) GET_MODE_SIZE (mode);
7572 if (type)
7573 {
7574 /* Arrays are passed by reference. */
7575 if (TREE_CODE (type) == ARRAY_TYPE)
7576 return true;
7577
7578 if (AGGREGATE_TYPE_P (type))
7579 {
7580 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7581 are passed by reference. */
7582 msize = int_size_in_bytes (type);
7583 }
7584 }
7585
7586 /* __m128 is passed by reference. */
7587 switch (msize) {
7588 case 1: case 2: case 4: case 8:
7589 break;
7590 default:
7591 return true;
7592 }
7593 }
7594 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7595 return 1;
7596
7597 return 0;
7598 }
7599
7600 /* Return true when TYPE should be 128bit aligned for 32bit argument
7601 passing ABI. XXX: This function is obsolete and is only used for
7602 checking psABI compatibility with previous versions of GCC. */
7603
7604 static bool
7605 ix86_compat_aligned_value_p (const_tree type)
7606 {
7607 enum machine_mode mode = TYPE_MODE (type);
7608 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7609 || mode == TDmode
7610 || mode == TFmode
7611 || mode == TCmode)
7612 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7613 return true;
7614 if (TYPE_ALIGN (type) < 128)
7615 return false;
7616
7617 if (AGGREGATE_TYPE_P (type))
7618 {
7619 /* Walk the aggregates recursively. */
7620 switch (TREE_CODE (type))
7621 {
7622 case RECORD_TYPE:
7623 case UNION_TYPE:
7624 case QUAL_UNION_TYPE:
7625 {
7626 tree field;
7627
7628 /* Walk all the structure fields. */
7629 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7630 {
7631 if (TREE_CODE (field) == FIELD_DECL
7632 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7633 return true;
7634 }
7635 break;
7636 }
7637
7638 case ARRAY_TYPE:
7639 /* Just for use if some languages passes arrays by value. */
7640 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7641 return true;
7642 break;
7643
7644 default:
7645 gcc_unreachable ();
7646 }
7647 }
7648 return false;
7649 }
7650
7651 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7652 XXX: This function is obsolete and is only used for checking psABI
7653 compatibility with previous versions of GCC. */
7654
7655 static unsigned int
7656 ix86_compat_function_arg_boundary (enum machine_mode mode,
7657 const_tree type, unsigned int align)
7658 {
7659 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7660 natural boundaries. */
7661 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7662 {
7663 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7664 make an exception for SSE modes since these require 128bit
7665 alignment.
7666
7667 The handling here differs from field_alignment. ICC aligns MMX
7668 arguments to 4 byte boundaries, while structure fields are aligned
7669 to 8 byte boundaries. */
7670 if (!type)
7671 {
7672 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7673 align = PARM_BOUNDARY;
7674 }
7675 else
7676 {
7677 if (!ix86_compat_aligned_value_p (type))
7678 align = PARM_BOUNDARY;
7679 }
7680 }
7681 if (align > BIGGEST_ALIGNMENT)
7682 align = BIGGEST_ALIGNMENT;
7683 return align;
7684 }
7685
7686 /* Return true when TYPE should be 128bit aligned for 32bit argument
7687 passing ABI. */
7688
7689 static bool
7690 ix86_contains_aligned_value_p (const_tree type)
7691 {
7692 enum machine_mode mode = TYPE_MODE (type);
7693
7694 if (mode == XFmode || mode == XCmode)
7695 return false;
7696
7697 if (TYPE_ALIGN (type) < 128)
7698 return false;
7699
7700 if (AGGREGATE_TYPE_P (type))
7701 {
7702 /* Walk the aggregates recursively. */
7703 switch (TREE_CODE (type))
7704 {
7705 case RECORD_TYPE:
7706 case UNION_TYPE:
7707 case QUAL_UNION_TYPE:
7708 {
7709 tree field;
7710
7711 /* Walk all the structure fields. */
7712 for (field = TYPE_FIELDS (type);
7713 field;
7714 field = DECL_CHAIN (field))
7715 {
7716 if (TREE_CODE (field) == FIELD_DECL
7717 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7718 return true;
7719 }
7720 break;
7721 }
7722
7723 case ARRAY_TYPE:
7724 /* Just for use if some languages passes arrays by value. */
7725 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7726 return true;
7727 break;
7728
7729 default:
7730 gcc_unreachable ();
7731 }
7732 }
7733 else
7734 return TYPE_ALIGN (type) >= 128;
7735
7736 return false;
7737 }
7738
7739 /* Gives the alignment boundary, in bits, of an argument with the
7740 specified mode and type. */
7741
7742 static unsigned int
7743 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7744 {
7745 unsigned int align;
7746 if (type)
7747 {
7748 /* Since the main variant type is used for call, we convert it to
7749 the main variant type. */
7750 type = TYPE_MAIN_VARIANT (type);
7751 align = TYPE_ALIGN (type);
7752 }
7753 else
7754 align = GET_MODE_ALIGNMENT (mode);
7755 if (align < PARM_BOUNDARY)
7756 align = PARM_BOUNDARY;
7757 else
7758 {
7759 static bool warned;
7760 unsigned int saved_align = align;
7761
7762 if (!TARGET_64BIT)
7763 {
7764 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7765 if (!type)
7766 {
7767 if (mode == XFmode || mode == XCmode)
7768 align = PARM_BOUNDARY;
7769 }
7770 else if (!ix86_contains_aligned_value_p (type))
7771 align = PARM_BOUNDARY;
7772
7773 if (align < 128)
7774 align = PARM_BOUNDARY;
7775 }
7776
7777 if (warn_psabi
7778 && !warned
7779 && align != ix86_compat_function_arg_boundary (mode, type,
7780 saved_align))
7781 {
7782 warned = true;
7783 inform (input_location,
7784 "The ABI for passing parameters with %d-byte"
7785 " alignment has changed in GCC 4.6",
7786 align / BITS_PER_UNIT);
7787 }
7788 }
7789
7790 return align;
7791 }
7792
7793 /* Return true if N is a possible register number of function value. */
7794
7795 static bool
7796 ix86_function_value_regno_p (const unsigned int regno)
7797 {
7798 switch (regno)
7799 {
7800 case AX_REG:
7801 return true;
7802 case DX_REG:
7803 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7804 case DI_REG:
7805 case SI_REG:
7806 return TARGET_64BIT && ix86_abi != MS_ABI;
7807
7808 /* Complex values are returned in %st(0)/%st(1) pair. */
7809 case ST0_REG:
7810 case ST1_REG:
7811 /* TODO: The function should depend on current function ABI but
7812 builtins.c would need updating then. Therefore we use the
7813 default ABI. */
7814 if (TARGET_64BIT && ix86_abi == MS_ABI)
7815 return false;
7816 return TARGET_FLOAT_RETURNS_IN_80387;
7817
7818 /* Complex values are returned in %xmm0/%xmm1 pair. */
7819 case XMM0_REG:
7820 case XMM1_REG:
7821 return TARGET_SSE;
7822
7823 case MM0_REG:
7824 if (TARGET_MACHO || TARGET_64BIT)
7825 return false;
7826 return TARGET_MMX;
7827 }
7828
7829 return false;
7830 }
7831
7832 /* Define how to find the value returned by a function.
7833 VALTYPE is the data type of the value (as a tree).
7834 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7835 otherwise, FUNC is 0. */
7836
7837 static rtx
7838 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7839 const_tree fntype, const_tree fn)
7840 {
7841 unsigned int regno;
7842
7843 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7844 we normally prevent this case when mmx is not available. However
7845 some ABIs may require the result to be returned like DImode. */
7846 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7847 regno = FIRST_MMX_REG;
7848
7849 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7850 we prevent this case when sse is not available. However some ABIs
7851 may require the result to be returned like integer TImode. */
7852 else if (mode == TImode
7853 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7854 regno = FIRST_SSE_REG;
7855
7856 /* 32-byte vector modes in %ymm0. */
7857 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7858 regno = FIRST_SSE_REG;
7859
7860 /* 64-byte vector modes in %zmm0. */
7861 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7862 regno = FIRST_SSE_REG;
7863
7864 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7865 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7866 regno = FIRST_FLOAT_REG;
7867 else
7868 /* Most things go in %eax. */
7869 regno = AX_REG;
7870
7871 /* Override FP return register with %xmm0 for local functions when
7872 SSE math is enabled or for functions with sseregparm attribute. */
7873 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7874 {
7875 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7876 if ((sse_level >= 1 && mode == SFmode)
7877 || (sse_level == 2 && mode == DFmode))
7878 regno = FIRST_SSE_REG;
7879 }
7880
7881 /* OImode shouldn't be used directly. */
7882 gcc_assert (mode != OImode);
7883
7884 return gen_rtx_REG (orig_mode, regno);
7885 }
7886
7887 static rtx
7888 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7889 const_tree valtype)
7890 {
7891 rtx ret;
7892
7893 /* Handle libcalls, which don't provide a type node. */
7894 if (valtype == NULL)
7895 {
7896 unsigned int regno;
7897
7898 switch (mode)
7899 {
7900 case SFmode:
7901 case SCmode:
7902 case DFmode:
7903 case DCmode:
7904 case TFmode:
7905 case SDmode:
7906 case DDmode:
7907 case TDmode:
7908 regno = FIRST_SSE_REG;
7909 break;
7910 case XFmode:
7911 case XCmode:
7912 regno = FIRST_FLOAT_REG;
7913 break;
7914 case TCmode:
7915 return NULL;
7916 default:
7917 regno = AX_REG;
7918 }
7919
7920 return gen_rtx_REG (mode, regno);
7921 }
7922 else if (POINTER_TYPE_P (valtype))
7923 {
7924 /* Pointers are always returned in word_mode. */
7925 mode = word_mode;
7926 }
7927
7928 ret = construct_container (mode, orig_mode, valtype, 1,
7929 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7930 x86_64_int_return_registers, 0);
7931
7932 /* For zero sized structures, construct_container returns NULL, but we
7933 need to keep rest of compiler happy by returning meaningful value. */
7934 if (!ret)
7935 ret = gen_rtx_REG (orig_mode, AX_REG);
7936
7937 return ret;
7938 }
7939
7940 static rtx
7941 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7942 const_tree valtype)
7943 {
7944 unsigned int regno = AX_REG;
7945
7946 if (TARGET_SSE)
7947 {
7948 switch (GET_MODE_SIZE (mode))
7949 {
7950 case 16:
7951 if (valtype != NULL_TREE
7952 && !VECTOR_INTEGER_TYPE_P (valtype)
7953 && !VECTOR_INTEGER_TYPE_P (valtype)
7954 && !INTEGRAL_TYPE_P (valtype)
7955 && !VECTOR_FLOAT_TYPE_P (valtype))
7956 break;
7957 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7958 && !COMPLEX_MODE_P (mode))
7959 regno = FIRST_SSE_REG;
7960 break;
7961 case 8:
7962 case 4:
7963 if (mode == SFmode || mode == DFmode)
7964 regno = FIRST_SSE_REG;
7965 break;
7966 default:
7967 break;
7968 }
7969 }
7970 return gen_rtx_REG (orig_mode, regno);
7971 }
7972
7973 static rtx
7974 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7975 enum machine_mode orig_mode, enum machine_mode mode)
7976 {
7977 const_tree fn, fntype;
7978
7979 fn = NULL_TREE;
7980 if (fntype_or_decl && DECL_P (fntype_or_decl))
7981 fn = fntype_or_decl;
7982 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7983
7984 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7985 return function_value_ms_64 (orig_mode, mode, valtype);
7986 else if (TARGET_64BIT)
7987 return function_value_64 (orig_mode, mode, valtype);
7988 else
7989 return function_value_32 (orig_mode, mode, fntype, fn);
7990 }
7991
7992 static rtx
7993 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
7994 {
7995 enum machine_mode mode, orig_mode;
7996
7997 orig_mode = TYPE_MODE (valtype);
7998 mode = type_natural_mode (valtype, NULL, true);
7999 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8000 }
8001
8002 /* Pointer function arguments and return values are promoted to
8003 word_mode. */
8004
8005 static enum machine_mode
8006 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8007 int *punsignedp, const_tree fntype,
8008 int for_return)
8009 {
8010 if (type != NULL_TREE && POINTER_TYPE_P (type))
8011 {
8012 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8013 return word_mode;
8014 }
8015 return default_promote_function_mode (type, mode, punsignedp, fntype,
8016 for_return);
8017 }
8018
8019 /* Return true if a structure, union or array with MODE containing FIELD
8020 should be accessed using BLKmode. */
8021
8022 static bool
8023 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8024 {
8025 /* Union with XFmode must be in BLKmode. */
8026 return (mode == XFmode
8027 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8028 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8029 }
8030
8031 rtx
8032 ix86_libcall_value (enum machine_mode mode)
8033 {
8034 return ix86_function_value_1 (NULL, NULL, mode, mode);
8035 }
8036
8037 /* Return true iff type is returned in memory. */
8038
8039 static bool
8040 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8041 {
8042 #ifdef SUBTARGET_RETURN_IN_MEMORY
8043 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8044 #else
8045 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8046 HOST_WIDE_INT size;
8047
8048 if (TARGET_64BIT)
8049 {
8050 if (ix86_function_type_abi (fntype) == MS_ABI)
8051 {
8052 size = int_size_in_bytes (type);
8053
8054 /* __m128 is returned in xmm0. */
8055 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8056 || INTEGRAL_TYPE_P (type)
8057 || VECTOR_FLOAT_TYPE_P (type))
8058 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8059 && !COMPLEX_MODE_P (mode)
8060 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8061 return false;
8062
8063 /* Otherwise, the size must be exactly in [1248]. */
8064 return size != 1 && size != 2 && size != 4 && size != 8;
8065 }
8066 else
8067 {
8068 int needed_intregs, needed_sseregs;
8069
8070 return examine_argument (mode, type, 1,
8071 &needed_intregs, &needed_sseregs);
8072 }
8073 }
8074 else
8075 {
8076 if (mode == BLKmode)
8077 return true;
8078
8079 size = int_size_in_bytes (type);
8080
8081 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8082 return false;
8083
8084 if (VECTOR_MODE_P (mode) || mode == TImode)
8085 {
8086 /* User-created vectors small enough to fit in EAX. */
8087 if (size < 8)
8088 return false;
8089
8090 /* Unless ABI prescibes otherwise,
8091 MMX/3dNow values are returned in MM0 if available. */
8092
8093 if (size == 8)
8094 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8095
8096 /* SSE values are returned in XMM0 if available. */
8097 if (size == 16)
8098 return !TARGET_SSE;
8099
8100 /* AVX values are returned in YMM0 if available. */
8101 if (size == 32)
8102 return !TARGET_AVX;
8103
8104 /* AVX512F values are returned in ZMM0 if available. */
8105 if (size == 64)
8106 return !TARGET_AVX512F;
8107 }
8108
8109 if (mode == XFmode)
8110 return false;
8111
8112 if (size > 12)
8113 return true;
8114
8115 /* OImode shouldn't be used directly. */
8116 gcc_assert (mode != OImode);
8117
8118 return false;
8119 }
8120 #endif
8121 }
8122
8123 \f
8124 /* Create the va_list data type. */
8125
8126 /* Returns the calling convention specific va_list date type.
8127 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8128
8129 static tree
8130 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8131 {
8132 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8133
8134 /* For i386 we use plain pointer to argument area. */
8135 if (!TARGET_64BIT || abi == MS_ABI)
8136 return build_pointer_type (char_type_node);
8137
8138 record = lang_hooks.types.make_type (RECORD_TYPE);
8139 type_decl = build_decl (BUILTINS_LOCATION,
8140 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8141
8142 f_gpr = build_decl (BUILTINS_LOCATION,
8143 FIELD_DECL, get_identifier ("gp_offset"),
8144 unsigned_type_node);
8145 f_fpr = build_decl (BUILTINS_LOCATION,
8146 FIELD_DECL, get_identifier ("fp_offset"),
8147 unsigned_type_node);
8148 f_ovf = build_decl (BUILTINS_LOCATION,
8149 FIELD_DECL, get_identifier ("overflow_arg_area"),
8150 ptr_type_node);
8151 f_sav = build_decl (BUILTINS_LOCATION,
8152 FIELD_DECL, get_identifier ("reg_save_area"),
8153 ptr_type_node);
8154
8155 va_list_gpr_counter_field = f_gpr;
8156 va_list_fpr_counter_field = f_fpr;
8157
8158 DECL_FIELD_CONTEXT (f_gpr) = record;
8159 DECL_FIELD_CONTEXT (f_fpr) = record;
8160 DECL_FIELD_CONTEXT (f_ovf) = record;
8161 DECL_FIELD_CONTEXT (f_sav) = record;
8162
8163 TYPE_STUB_DECL (record) = type_decl;
8164 TYPE_NAME (record) = type_decl;
8165 TYPE_FIELDS (record) = f_gpr;
8166 DECL_CHAIN (f_gpr) = f_fpr;
8167 DECL_CHAIN (f_fpr) = f_ovf;
8168 DECL_CHAIN (f_ovf) = f_sav;
8169
8170 layout_type (record);
8171
8172 /* The correct type is an array type of one element. */
8173 return build_array_type (record, build_index_type (size_zero_node));
8174 }
8175
8176 /* Setup the builtin va_list data type and for 64-bit the additional
8177 calling convention specific va_list data types. */
8178
8179 static tree
8180 ix86_build_builtin_va_list (void)
8181 {
8182 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8183
8184 /* Initialize abi specific va_list builtin types. */
8185 if (TARGET_64BIT)
8186 {
8187 tree t;
8188 if (ix86_abi == MS_ABI)
8189 {
8190 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8191 if (TREE_CODE (t) != RECORD_TYPE)
8192 t = build_variant_type_copy (t);
8193 sysv_va_list_type_node = t;
8194 }
8195 else
8196 {
8197 t = ret;
8198 if (TREE_CODE (t) != RECORD_TYPE)
8199 t = build_variant_type_copy (t);
8200 sysv_va_list_type_node = t;
8201 }
8202 if (ix86_abi != MS_ABI)
8203 {
8204 t = ix86_build_builtin_va_list_abi (MS_ABI);
8205 if (TREE_CODE (t) != RECORD_TYPE)
8206 t = build_variant_type_copy (t);
8207 ms_va_list_type_node = t;
8208 }
8209 else
8210 {
8211 t = ret;
8212 if (TREE_CODE (t) != RECORD_TYPE)
8213 t = build_variant_type_copy (t);
8214 ms_va_list_type_node = t;
8215 }
8216 }
8217
8218 return ret;
8219 }
8220
8221 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8222
8223 static void
8224 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8225 {
8226 rtx save_area, mem;
8227 alias_set_type set;
8228 int i, max;
8229
8230 /* GPR size of varargs save area. */
8231 if (cfun->va_list_gpr_size)
8232 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8233 else
8234 ix86_varargs_gpr_size = 0;
8235
8236 /* FPR size of varargs save area. We don't need it if we don't pass
8237 anything in SSE registers. */
8238 if (TARGET_SSE && cfun->va_list_fpr_size)
8239 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8240 else
8241 ix86_varargs_fpr_size = 0;
8242
8243 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8244 return;
8245
8246 save_area = frame_pointer_rtx;
8247 set = get_varargs_alias_set ();
8248
8249 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8250 if (max > X86_64_REGPARM_MAX)
8251 max = X86_64_REGPARM_MAX;
8252
8253 for (i = cum->regno; i < max; i++)
8254 {
8255 mem = gen_rtx_MEM (word_mode,
8256 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8257 MEM_NOTRAP_P (mem) = 1;
8258 set_mem_alias_set (mem, set);
8259 emit_move_insn (mem,
8260 gen_rtx_REG (word_mode,
8261 x86_64_int_parameter_registers[i]));
8262 }
8263
8264 if (ix86_varargs_fpr_size)
8265 {
8266 enum machine_mode smode;
8267 rtx_code_label *label;
8268 rtx test;
8269
8270 /* Now emit code to save SSE registers. The AX parameter contains number
8271 of SSE parameter registers used to call this function, though all we
8272 actually check here is the zero/non-zero status. */
8273
8274 label = gen_label_rtx ();
8275 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8276 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8277 label));
8278
8279 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8280 we used movdqa (i.e. TImode) instead? Perhaps even better would
8281 be if we could determine the real mode of the data, via a hook
8282 into pass_stdarg. Ignore all that for now. */
8283 smode = V4SFmode;
8284 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8285 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8286
8287 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8288 if (max > X86_64_SSE_REGPARM_MAX)
8289 max = X86_64_SSE_REGPARM_MAX;
8290
8291 for (i = cum->sse_regno; i < max; ++i)
8292 {
8293 mem = plus_constant (Pmode, save_area,
8294 i * 16 + ix86_varargs_gpr_size);
8295 mem = gen_rtx_MEM (smode, mem);
8296 MEM_NOTRAP_P (mem) = 1;
8297 set_mem_alias_set (mem, set);
8298 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8299
8300 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8301 }
8302
8303 emit_label (label);
8304 }
8305 }
8306
8307 static void
8308 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8309 {
8310 alias_set_type set = get_varargs_alias_set ();
8311 int i;
8312
8313 /* Reset to zero, as there might be a sysv vaarg used
8314 before. */
8315 ix86_varargs_gpr_size = 0;
8316 ix86_varargs_fpr_size = 0;
8317
8318 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8319 {
8320 rtx reg, mem;
8321
8322 mem = gen_rtx_MEM (Pmode,
8323 plus_constant (Pmode, virtual_incoming_args_rtx,
8324 i * UNITS_PER_WORD));
8325 MEM_NOTRAP_P (mem) = 1;
8326 set_mem_alias_set (mem, set);
8327
8328 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8329 emit_move_insn (mem, reg);
8330 }
8331 }
8332
8333 static void
8334 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8335 tree type, int *, int no_rtl)
8336 {
8337 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8338 CUMULATIVE_ARGS next_cum;
8339 tree fntype;
8340
8341 /* This argument doesn't appear to be used anymore. Which is good,
8342 because the old code here didn't suppress rtl generation. */
8343 gcc_assert (!no_rtl);
8344
8345 if (!TARGET_64BIT)
8346 return;
8347
8348 fntype = TREE_TYPE (current_function_decl);
8349
8350 /* For varargs, we do not want to skip the dummy va_dcl argument.
8351 For stdargs, we do want to skip the last named argument. */
8352 next_cum = *cum;
8353 if (stdarg_p (fntype))
8354 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8355 true);
8356
8357 if (cum->call_abi == MS_ABI)
8358 setup_incoming_varargs_ms_64 (&next_cum);
8359 else
8360 setup_incoming_varargs_64 (&next_cum);
8361 }
8362
8363 /* Checks if TYPE is of kind va_list char *. */
8364
8365 static bool
8366 is_va_list_char_pointer (tree type)
8367 {
8368 tree canonic;
8369
8370 /* For 32-bit it is always true. */
8371 if (!TARGET_64BIT)
8372 return true;
8373 canonic = ix86_canonical_va_list_type (type);
8374 return (canonic == ms_va_list_type_node
8375 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8376 }
8377
8378 /* Implement va_start. */
8379
8380 static void
8381 ix86_va_start (tree valist, rtx nextarg)
8382 {
8383 HOST_WIDE_INT words, n_gpr, n_fpr;
8384 tree f_gpr, f_fpr, f_ovf, f_sav;
8385 tree gpr, fpr, ovf, sav, t;
8386 tree type;
8387 rtx ovf_rtx;
8388
8389 if (flag_split_stack
8390 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8391 {
8392 unsigned int scratch_regno;
8393
8394 /* When we are splitting the stack, we can't refer to the stack
8395 arguments using internal_arg_pointer, because they may be on
8396 the old stack. The split stack prologue will arrange to
8397 leave a pointer to the old stack arguments in a scratch
8398 register, which we here copy to a pseudo-register. The split
8399 stack prologue can't set the pseudo-register directly because
8400 it (the prologue) runs before any registers have been saved. */
8401
8402 scratch_regno = split_stack_prologue_scratch_regno ();
8403 if (scratch_regno != INVALID_REGNUM)
8404 {
8405 rtx reg;
8406 rtx_insn *seq;
8407
8408 reg = gen_reg_rtx (Pmode);
8409 cfun->machine->split_stack_varargs_pointer = reg;
8410
8411 start_sequence ();
8412 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8413 seq = get_insns ();
8414 end_sequence ();
8415
8416 push_topmost_sequence ();
8417 emit_insn_after (seq, entry_of_function ());
8418 pop_topmost_sequence ();
8419 }
8420 }
8421
8422 /* Only 64bit target needs something special. */
8423 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8424 {
8425 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8426 std_expand_builtin_va_start (valist, nextarg);
8427 else
8428 {
8429 rtx va_r, next;
8430
8431 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8432 next = expand_binop (ptr_mode, add_optab,
8433 cfun->machine->split_stack_varargs_pointer,
8434 crtl->args.arg_offset_rtx,
8435 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8436 convert_move (va_r, next, 0);
8437 }
8438 return;
8439 }
8440
8441 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8442 f_fpr = DECL_CHAIN (f_gpr);
8443 f_ovf = DECL_CHAIN (f_fpr);
8444 f_sav = DECL_CHAIN (f_ovf);
8445
8446 valist = build_simple_mem_ref (valist);
8447 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8448 /* The following should be folded into the MEM_REF offset. */
8449 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8450 f_gpr, NULL_TREE);
8451 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8452 f_fpr, NULL_TREE);
8453 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8454 f_ovf, NULL_TREE);
8455 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8456 f_sav, NULL_TREE);
8457
8458 /* Count number of gp and fp argument registers used. */
8459 words = crtl->args.info.words;
8460 n_gpr = crtl->args.info.regno;
8461 n_fpr = crtl->args.info.sse_regno;
8462
8463 if (cfun->va_list_gpr_size)
8464 {
8465 type = TREE_TYPE (gpr);
8466 t = build2 (MODIFY_EXPR, type,
8467 gpr, build_int_cst (type, n_gpr * 8));
8468 TREE_SIDE_EFFECTS (t) = 1;
8469 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8470 }
8471
8472 if (TARGET_SSE && cfun->va_list_fpr_size)
8473 {
8474 type = TREE_TYPE (fpr);
8475 t = build2 (MODIFY_EXPR, type, fpr,
8476 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8477 TREE_SIDE_EFFECTS (t) = 1;
8478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8479 }
8480
8481 /* Find the overflow area. */
8482 type = TREE_TYPE (ovf);
8483 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8484 ovf_rtx = crtl->args.internal_arg_pointer;
8485 else
8486 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8487 t = make_tree (type, ovf_rtx);
8488 if (words != 0)
8489 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8490 t = build2 (MODIFY_EXPR, type, ovf, t);
8491 TREE_SIDE_EFFECTS (t) = 1;
8492 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8493
8494 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8495 {
8496 /* Find the register save area.
8497 Prologue of the function save it right above stack frame. */
8498 type = TREE_TYPE (sav);
8499 t = make_tree (type, frame_pointer_rtx);
8500 if (!ix86_varargs_gpr_size)
8501 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8502 t = build2 (MODIFY_EXPR, type, sav, t);
8503 TREE_SIDE_EFFECTS (t) = 1;
8504 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8505 }
8506 }
8507
8508 /* Implement va_arg. */
8509
8510 static tree
8511 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8512 gimple_seq *post_p)
8513 {
8514 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8515 tree f_gpr, f_fpr, f_ovf, f_sav;
8516 tree gpr, fpr, ovf, sav, t;
8517 int size, rsize;
8518 tree lab_false, lab_over = NULL_TREE;
8519 tree addr, t2;
8520 rtx container;
8521 int indirect_p = 0;
8522 tree ptrtype;
8523 enum machine_mode nat_mode;
8524 unsigned int arg_boundary;
8525
8526 /* Only 64bit target needs something special. */
8527 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8528 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8529
8530 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8531 f_fpr = DECL_CHAIN (f_gpr);
8532 f_ovf = DECL_CHAIN (f_fpr);
8533 f_sav = DECL_CHAIN (f_ovf);
8534
8535 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8536 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8537 valist = build_va_arg_indirect_ref (valist);
8538 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8539 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8540 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8541
8542 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8543 if (indirect_p)
8544 type = build_pointer_type (type);
8545 size = int_size_in_bytes (type);
8546 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8547
8548 nat_mode = type_natural_mode (type, NULL, false);
8549 switch (nat_mode)
8550 {
8551 case V8SFmode:
8552 case V8SImode:
8553 case V32QImode:
8554 case V16HImode:
8555 case V4DFmode:
8556 case V4DImode:
8557 case V16SFmode:
8558 case V16SImode:
8559 case V64QImode:
8560 case V32HImode:
8561 case V8DFmode:
8562 case V8DImode:
8563 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8564 if (!TARGET_64BIT_MS_ABI)
8565 {
8566 container = NULL;
8567 break;
8568 }
8569
8570 default:
8571 container = construct_container (nat_mode, TYPE_MODE (type),
8572 type, 0, X86_64_REGPARM_MAX,
8573 X86_64_SSE_REGPARM_MAX, intreg,
8574 0);
8575 break;
8576 }
8577
8578 /* Pull the value out of the saved registers. */
8579
8580 addr = create_tmp_var (ptr_type_node, "addr");
8581
8582 if (container)
8583 {
8584 int needed_intregs, needed_sseregs;
8585 bool need_temp;
8586 tree int_addr, sse_addr;
8587
8588 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8589 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8590
8591 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8592
8593 need_temp = (!REG_P (container)
8594 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8595 || TYPE_ALIGN (type) > 128));
8596
8597 /* In case we are passing structure, verify that it is consecutive block
8598 on the register save area. If not we need to do moves. */
8599 if (!need_temp && !REG_P (container))
8600 {
8601 /* Verify that all registers are strictly consecutive */
8602 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8603 {
8604 int i;
8605
8606 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8607 {
8608 rtx slot = XVECEXP (container, 0, i);
8609 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8610 || INTVAL (XEXP (slot, 1)) != i * 16)
8611 need_temp = 1;
8612 }
8613 }
8614 else
8615 {
8616 int i;
8617
8618 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8619 {
8620 rtx slot = XVECEXP (container, 0, i);
8621 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8622 || INTVAL (XEXP (slot, 1)) != i * 8)
8623 need_temp = 1;
8624 }
8625 }
8626 }
8627 if (!need_temp)
8628 {
8629 int_addr = addr;
8630 sse_addr = addr;
8631 }
8632 else
8633 {
8634 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8635 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8636 }
8637
8638 /* First ensure that we fit completely in registers. */
8639 if (needed_intregs)
8640 {
8641 t = build_int_cst (TREE_TYPE (gpr),
8642 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8643 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8644 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8645 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8646 gimplify_and_add (t, pre_p);
8647 }
8648 if (needed_sseregs)
8649 {
8650 t = build_int_cst (TREE_TYPE (fpr),
8651 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8652 + X86_64_REGPARM_MAX * 8);
8653 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8654 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8655 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8656 gimplify_and_add (t, pre_p);
8657 }
8658
8659 /* Compute index to start of area used for integer regs. */
8660 if (needed_intregs)
8661 {
8662 /* int_addr = gpr + sav; */
8663 t = fold_build_pointer_plus (sav, gpr);
8664 gimplify_assign (int_addr, t, pre_p);
8665 }
8666 if (needed_sseregs)
8667 {
8668 /* sse_addr = fpr + sav; */
8669 t = fold_build_pointer_plus (sav, fpr);
8670 gimplify_assign (sse_addr, t, pre_p);
8671 }
8672 if (need_temp)
8673 {
8674 int i, prev_size = 0;
8675 tree temp = create_tmp_var (type, "va_arg_tmp");
8676
8677 /* addr = &temp; */
8678 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8679 gimplify_assign (addr, t, pre_p);
8680
8681 for (i = 0; i < XVECLEN (container, 0); i++)
8682 {
8683 rtx slot = XVECEXP (container, 0, i);
8684 rtx reg = XEXP (slot, 0);
8685 enum machine_mode mode = GET_MODE (reg);
8686 tree piece_type;
8687 tree addr_type;
8688 tree daddr_type;
8689 tree src_addr, src;
8690 int src_offset;
8691 tree dest_addr, dest;
8692 int cur_size = GET_MODE_SIZE (mode);
8693
8694 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8695 prev_size = INTVAL (XEXP (slot, 1));
8696 if (prev_size + cur_size > size)
8697 {
8698 cur_size = size - prev_size;
8699 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8700 if (mode == BLKmode)
8701 mode = QImode;
8702 }
8703 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8704 if (mode == GET_MODE (reg))
8705 addr_type = build_pointer_type (piece_type);
8706 else
8707 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8708 true);
8709 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8710 true);
8711
8712 if (SSE_REGNO_P (REGNO (reg)))
8713 {
8714 src_addr = sse_addr;
8715 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8716 }
8717 else
8718 {
8719 src_addr = int_addr;
8720 src_offset = REGNO (reg) * 8;
8721 }
8722 src_addr = fold_convert (addr_type, src_addr);
8723 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8724
8725 dest_addr = fold_convert (daddr_type, addr);
8726 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8727 if (cur_size == GET_MODE_SIZE (mode))
8728 {
8729 src = build_va_arg_indirect_ref (src_addr);
8730 dest = build_va_arg_indirect_ref (dest_addr);
8731
8732 gimplify_assign (dest, src, pre_p);
8733 }
8734 else
8735 {
8736 tree copy
8737 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8738 3, dest_addr, src_addr,
8739 size_int (cur_size));
8740 gimplify_and_add (copy, pre_p);
8741 }
8742 prev_size += cur_size;
8743 }
8744 }
8745
8746 if (needed_intregs)
8747 {
8748 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8749 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8750 gimplify_assign (gpr, t, pre_p);
8751 }
8752
8753 if (needed_sseregs)
8754 {
8755 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8756 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8757 gimplify_assign (fpr, t, pre_p);
8758 }
8759
8760 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8761
8762 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8763 }
8764
8765 /* ... otherwise out of the overflow area. */
8766
8767 /* When we align parameter on stack for caller, if the parameter
8768 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8769 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8770 here with caller. */
8771 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8772 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8773 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8774
8775 /* Care for on-stack alignment if needed. */
8776 if (arg_boundary <= 64 || size == 0)
8777 t = ovf;
8778 else
8779 {
8780 HOST_WIDE_INT align = arg_boundary / 8;
8781 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8782 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8783 build_int_cst (TREE_TYPE (t), -align));
8784 }
8785
8786 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8787 gimplify_assign (addr, t, pre_p);
8788
8789 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8790 gimplify_assign (unshare_expr (ovf), t, pre_p);
8791
8792 if (container)
8793 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8794
8795 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8796 addr = fold_convert (ptrtype, addr);
8797
8798 if (indirect_p)
8799 addr = build_va_arg_indirect_ref (addr);
8800 return build_va_arg_indirect_ref (addr);
8801 }
8802 \f
8803 /* Return true if OPNUM's MEM should be matched
8804 in movabs* patterns. */
8805
8806 bool
8807 ix86_check_movabs (rtx insn, int opnum)
8808 {
8809 rtx set, mem;
8810
8811 set = PATTERN (insn);
8812 if (GET_CODE (set) == PARALLEL)
8813 set = XVECEXP (set, 0, 0);
8814 gcc_assert (GET_CODE (set) == SET);
8815 mem = XEXP (set, opnum);
8816 while (GET_CODE (mem) == SUBREG)
8817 mem = SUBREG_REG (mem);
8818 gcc_assert (MEM_P (mem));
8819 return volatile_ok || !MEM_VOLATILE_P (mem);
8820 }
8821 \f
8822 /* Initialize the table of extra 80387 mathematical constants. */
8823
8824 static void
8825 init_ext_80387_constants (void)
8826 {
8827 static const char * cst[5] =
8828 {
8829 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8830 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8831 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8832 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8833 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8834 };
8835 int i;
8836
8837 for (i = 0; i < 5; i++)
8838 {
8839 real_from_string (&ext_80387_constants_table[i], cst[i]);
8840 /* Ensure each constant is rounded to XFmode precision. */
8841 real_convert (&ext_80387_constants_table[i],
8842 XFmode, &ext_80387_constants_table[i]);
8843 }
8844
8845 ext_80387_constants_init = 1;
8846 }
8847
8848 /* Return non-zero if the constant is something that
8849 can be loaded with a special instruction. */
8850
8851 int
8852 standard_80387_constant_p (rtx x)
8853 {
8854 enum machine_mode mode = GET_MODE (x);
8855
8856 REAL_VALUE_TYPE r;
8857
8858 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8859 return -1;
8860
8861 if (x == CONST0_RTX (mode))
8862 return 1;
8863 if (x == CONST1_RTX (mode))
8864 return 2;
8865
8866 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8867
8868 /* For XFmode constants, try to find a special 80387 instruction when
8869 optimizing for size or on those CPUs that benefit from them. */
8870 if (mode == XFmode
8871 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8872 {
8873 int i;
8874
8875 if (! ext_80387_constants_init)
8876 init_ext_80387_constants ();
8877
8878 for (i = 0; i < 5; i++)
8879 if (real_identical (&r, &ext_80387_constants_table[i]))
8880 return i + 3;
8881 }
8882
8883 /* Load of the constant -0.0 or -1.0 will be split as
8884 fldz;fchs or fld1;fchs sequence. */
8885 if (real_isnegzero (&r))
8886 return 8;
8887 if (real_identical (&r, &dconstm1))
8888 return 9;
8889
8890 return 0;
8891 }
8892
8893 /* Return the opcode of the special instruction to be used to load
8894 the constant X. */
8895
8896 const char *
8897 standard_80387_constant_opcode (rtx x)
8898 {
8899 switch (standard_80387_constant_p (x))
8900 {
8901 case 1:
8902 return "fldz";
8903 case 2:
8904 return "fld1";
8905 case 3:
8906 return "fldlg2";
8907 case 4:
8908 return "fldln2";
8909 case 5:
8910 return "fldl2e";
8911 case 6:
8912 return "fldl2t";
8913 case 7:
8914 return "fldpi";
8915 case 8:
8916 case 9:
8917 return "#";
8918 default:
8919 gcc_unreachable ();
8920 }
8921 }
8922
8923 /* Return the CONST_DOUBLE representing the 80387 constant that is
8924 loaded by the specified special instruction. The argument IDX
8925 matches the return value from standard_80387_constant_p. */
8926
8927 rtx
8928 standard_80387_constant_rtx (int idx)
8929 {
8930 int i;
8931
8932 if (! ext_80387_constants_init)
8933 init_ext_80387_constants ();
8934
8935 switch (idx)
8936 {
8937 case 3:
8938 case 4:
8939 case 5:
8940 case 6:
8941 case 7:
8942 i = idx - 3;
8943 break;
8944
8945 default:
8946 gcc_unreachable ();
8947 }
8948
8949 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8950 XFmode);
8951 }
8952
8953 /* Return 1 if X is all 0s and 2 if x is all 1s
8954 in supported SSE/AVX vector mode. */
8955
8956 int
8957 standard_sse_constant_p (rtx x)
8958 {
8959 enum machine_mode mode = GET_MODE (x);
8960
8961 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8962 return 1;
8963 if (vector_all_ones_operand (x, mode))
8964 switch (mode)
8965 {
8966 case V16QImode:
8967 case V8HImode:
8968 case V4SImode:
8969 case V2DImode:
8970 if (TARGET_SSE2)
8971 return 2;
8972 case V32QImode:
8973 case V16HImode:
8974 case V8SImode:
8975 case V4DImode:
8976 if (TARGET_AVX2)
8977 return 2;
8978 case V64QImode:
8979 case V32HImode:
8980 case V16SImode:
8981 case V8DImode:
8982 if (TARGET_AVX512F)
8983 return 2;
8984 default:
8985 break;
8986 }
8987
8988 return 0;
8989 }
8990
8991 /* Return the opcode of the special instruction to be used to load
8992 the constant X. */
8993
8994 const char *
8995 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
8996 {
8997 switch (standard_sse_constant_p (x))
8998 {
8999 case 1:
9000 switch (get_attr_mode (insn))
9001 {
9002 case MODE_XI:
9003 return "vpxord\t%g0, %g0, %g0";
9004 case MODE_V16SF:
9005 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9006 : "vpxord\t%g0, %g0, %g0";
9007 case MODE_V8DF:
9008 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9009 : "vpxorq\t%g0, %g0, %g0";
9010 case MODE_TI:
9011 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9012 : "%vpxor\t%0, %d0";
9013 case MODE_V2DF:
9014 return "%vxorpd\t%0, %d0";
9015 case MODE_V4SF:
9016 return "%vxorps\t%0, %d0";
9017
9018 case MODE_OI:
9019 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9020 : "vpxor\t%x0, %x0, %x0";
9021 case MODE_V4DF:
9022 return "vxorpd\t%x0, %x0, %x0";
9023 case MODE_V8SF:
9024 return "vxorps\t%x0, %x0, %x0";
9025
9026 default:
9027 break;
9028 }
9029
9030 case 2:
9031 if (TARGET_AVX512VL
9032 || get_attr_mode (insn) == MODE_XI
9033 || get_attr_mode (insn) == MODE_V8DF
9034 || get_attr_mode (insn) == MODE_V16SF)
9035 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9036 if (TARGET_AVX)
9037 return "vpcmpeqd\t%0, %0, %0";
9038 else
9039 return "pcmpeqd\t%0, %0";
9040
9041 default:
9042 break;
9043 }
9044 gcc_unreachable ();
9045 }
9046
9047 /* Returns true if OP contains a symbol reference */
9048
9049 bool
9050 symbolic_reference_mentioned_p (rtx op)
9051 {
9052 const char *fmt;
9053 int i;
9054
9055 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9056 return true;
9057
9058 fmt = GET_RTX_FORMAT (GET_CODE (op));
9059 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9060 {
9061 if (fmt[i] == 'E')
9062 {
9063 int j;
9064
9065 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9066 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9067 return true;
9068 }
9069
9070 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9071 return true;
9072 }
9073
9074 return false;
9075 }
9076
9077 /* Return true if it is appropriate to emit `ret' instructions in the
9078 body of a function. Do this only if the epilogue is simple, needing a
9079 couple of insns. Prior to reloading, we can't tell how many registers
9080 must be saved, so return false then. Return false if there is no frame
9081 marker to de-allocate. */
9082
9083 bool
9084 ix86_can_use_return_insn_p (void)
9085 {
9086 struct ix86_frame frame;
9087
9088 if (! reload_completed || frame_pointer_needed)
9089 return 0;
9090
9091 /* Don't allow more than 32k pop, since that's all we can do
9092 with one instruction. */
9093 if (crtl->args.pops_args && crtl->args.size >= 32768)
9094 return 0;
9095
9096 ix86_compute_frame_layout (&frame);
9097 return (frame.stack_pointer_offset == UNITS_PER_WORD
9098 && (frame.nregs + frame.nsseregs) == 0);
9099 }
9100 \f
9101 /* Value should be nonzero if functions must have frame pointers.
9102 Zero means the frame pointer need not be set up (and parms may
9103 be accessed via the stack pointer) in functions that seem suitable. */
9104
9105 static bool
9106 ix86_frame_pointer_required (void)
9107 {
9108 /* If we accessed previous frames, then the generated code expects
9109 to be able to access the saved ebp value in our frame. */
9110 if (cfun->machine->accesses_prev_frame)
9111 return true;
9112
9113 /* Several x86 os'es need a frame pointer for other reasons,
9114 usually pertaining to setjmp. */
9115 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9116 return true;
9117
9118 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9119 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9120 return true;
9121
9122 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9123 allocation is 4GB. */
9124 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9125 return true;
9126
9127 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9128 turns off the frame pointer by default. Turn it back on now if
9129 we've not got a leaf function. */
9130 if (TARGET_OMIT_LEAF_FRAME_POINTER
9131 && (!crtl->is_leaf
9132 || ix86_current_function_calls_tls_descriptor))
9133 return true;
9134
9135 if (crtl->profile && !flag_fentry)
9136 return true;
9137
9138 return false;
9139 }
9140
9141 /* Record that the current function accesses previous call frames. */
9142
9143 void
9144 ix86_setup_frame_addresses (void)
9145 {
9146 cfun->machine->accesses_prev_frame = 1;
9147 }
9148 \f
9149 #ifndef USE_HIDDEN_LINKONCE
9150 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9151 # define USE_HIDDEN_LINKONCE 1
9152 # else
9153 # define USE_HIDDEN_LINKONCE 0
9154 # endif
9155 #endif
9156
9157 static int pic_labels_used;
9158
9159 /* Fills in the label name that should be used for a pc thunk for
9160 the given register. */
9161
9162 static void
9163 get_pc_thunk_name (char name[32], unsigned int regno)
9164 {
9165 gcc_assert (!TARGET_64BIT);
9166
9167 if (USE_HIDDEN_LINKONCE)
9168 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9169 else
9170 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9171 }
9172
9173
9174 /* This function generates code for -fpic that loads %ebx with
9175 the return address of the caller and then returns. */
9176
9177 static void
9178 ix86_code_end (void)
9179 {
9180 rtx xops[2];
9181 int regno;
9182
9183 for (regno = AX_REG; regno <= SP_REG; regno++)
9184 {
9185 char name[32];
9186 tree decl;
9187
9188 if (!(pic_labels_used & (1 << regno)))
9189 continue;
9190
9191 get_pc_thunk_name (name, regno);
9192
9193 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9194 get_identifier (name),
9195 build_function_type_list (void_type_node, NULL_TREE));
9196 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9197 NULL_TREE, void_type_node);
9198 TREE_PUBLIC (decl) = 1;
9199 TREE_STATIC (decl) = 1;
9200 DECL_IGNORED_P (decl) = 1;
9201
9202 #if TARGET_MACHO
9203 if (TARGET_MACHO)
9204 {
9205 switch_to_section (darwin_sections[text_coal_section]);
9206 fputs ("\t.weak_definition\t", asm_out_file);
9207 assemble_name (asm_out_file, name);
9208 fputs ("\n\t.private_extern\t", asm_out_file);
9209 assemble_name (asm_out_file, name);
9210 putc ('\n', asm_out_file);
9211 ASM_OUTPUT_LABEL (asm_out_file, name);
9212 DECL_WEAK (decl) = 1;
9213 }
9214 else
9215 #endif
9216 if (USE_HIDDEN_LINKONCE)
9217 {
9218 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9219
9220 targetm.asm_out.unique_section (decl, 0);
9221 switch_to_section (get_named_section (decl, NULL, 0));
9222
9223 targetm.asm_out.globalize_label (asm_out_file, name);
9224 fputs ("\t.hidden\t", asm_out_file);
9225 assemble_name (asm_out_file, name);
9226 putc ('\n', asm_out_file);
9227 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9228 }
9229 else
9230 {
9231 switch_to_section (text_section);
9232 ASM_OUTPUT_LABEL (asm_out_file, name);
9233 }
9234
9235 DECL_INITIAL (decl) = make_node (BLOCK);
9236 current_function_decl = decl;
9237 init_function_start (decl);
9238 first_function_block_is_cold = false;
9239 /* Make sure unwind info is emitted for the thunk if needed. */
9240 final_start_function (emit_barrier (), asm_out_file, 1);
9241
9242 /* Pad stack IP move with 4 instructions (two NOPs count
9243 as one instruction). */
9244 if (TARGET_PAD_SHORT_FUNCTION)
9245 {
9246 int i = 8;
9247
9248 while (i--)
9249 fputs ("\tnop\n", asm_out_file);
9250 }
9251
9252 xops[0] = gen_rtx_REG (Pmode, regno);
9253 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9254 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9255 fputs ("\tret\n", asm_out_file);
9256 final_end_function ();
9257 init_insn_lengths ();
9258 free_after_compilation (cfun);
9259 set_cfun (NULL);
9260 current_function_decl = NULL;
9261 }
9262
9263 if (flag_split_stack)
9264 file_end_indicate_split_stack ();
9265 }
9266
9267 /* Emit code for the SET_GOT patterns. */
9268
9269 const char *
9270 output_set_got (rtx dest, rtx label)
9271 {
9272 rtx xops[3];
9273
9274 xops[0] = dest;
9275
9276 if (TARGET_VXWORKS_RTP && flag_pic)
9277 {
9278 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9279 xops[2] = gen_rtx_MEM (Pmode,
9280 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9281 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9282
9283 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9284 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9285 an unadorned address. */
9286 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9287 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9288 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9289 return "";
9290 }
9291
9292 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9293
9294 if (!flag_pic)
9295 {
9296 if (TARGET_MACHO)
9297 /* We don't need a pic base, we're not producing pic. */
9298 gcc_unreachable ();
9299
9300 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9301 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9302 targetm.asm_out.internal_label (asm_out_file, "L",
9303 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9304 }
9305 else
9306 {
9307 char name[32];
9308 get_pc_thunk_name (name, REGNO (dest));
9309 pic_labels_used |= 1 << REGNO (dest);
9310
9311 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9312 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9313 output_asm_insn ("call\t%X2", xops);
9314
9315 #if TARGET_MACHO
9316 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9317 This is what will be referenced by the Mach-O PIC subsystem. */
9318 if (machopic_should_output_picbase_label () || !label)
9319 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9320
9321 /* When we are restoring the pic base at the site of a nonlocal label,
9322 and we decided to emit the pic base above, we will still output a
9323 local label used for calculating the correction offset (even though
9324 the offset will be 0 in that case). */
9325 if (label)
9326 targetm.asm_out.internal_label (asm_out_file, "L",
9327 CODE_LABEL_NUMBER (label));
9328 #endif
9329 }
9330
9331 if (!TARGET_MACHO)
9332 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9333
9334 return "";
9335 }
9336
9337 /* Generate an "push" pattern for input ARG. */
9338
9339 static rtx
9340 gen_push (rtx arg)
9341 {
9342 struct machine_function *m = cfun->machine;
9343
9344 if (m->fs.cfa_reg == stack_pointer_rtx)
9345 m->fs.cfa_offset += UNITS_PER_WORD;
9346 m->fs.sp_offset += UNITS_PER_WORD;
9347
9348 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9349 arg = gen_rtx_REG (word_mode, REGNO (arg));
9350
9351 return gen_rtx_SET (VOIDmode,
9352 gen_rtx_MEM (word_mode,
9353 gen_rtx_PRE_DEC (Pmode,
9354 stack_pointer_rtx)),
9355 arg);
9356 }
9357
9358 /* Generate an "pop" pattern for input ARG. */
9359
9360 static rtx
9361 gen_pop (rtx arg)
9362 {
9363 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9364 arg = gen_rtx_REG (word_mode, REGNO (arg));
9365
9366 return gen_rtx_SET (VOIDmode,
9367 arg,
9368 gen_rtx_MEM (word_mode,
9369 gen_rtx_POST_INC (Pmode,
9370 stack_pointer_rtx)));
9371 }
9372
9373 /* Return >= 0 if there is an unused call-clobbered register available
9374 for the entire function. */
9375
9376 static unsigned int
9377 ix86_select_alt_pic_regnum (void)
9378 {
9379 if (crtl->is_leaf
9380 && !crtl->profile
9381 && !ix86_current_function_calls_tls_descriptor)
9382 {
9383 int i, drap;
9384 /* Can't use the same register for both PIC and DRAP. */
9385 if (crtl->drap_reg)
9386 drap = REGNO (crtl->drap_reg);
9387 else
9388 drap = -1;
9389 for (i = 2; i >= 0; --i)
9390 if (i != drap && !df_regs_ever_live_p (i))
9391 return i;
9392 }
9393
9394 return INVALID_REGNUM;
9395 }
9396
9397 /* Return TRUE if we need to save REGNO. */
9398
9399 static bool
9400 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9401 {
9402 if (pic_offset_table_rtx
9403 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9404 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9405 || crtl->profile
9406 || crtl->calls_eh_return
9407 || crtl->uses_const_pool
9408 || cfun->has_nonlocal_label))
9409 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9410
9411 if (crtl->calls_eh_return && maybe_eh_return)
9412 {
9413 unsigned i;
9414 for (i = 0; ; i++)
9415 {
9416 unsigned test = EH_RETURN_DATA_REGNO (i);
9417 if (test == INVALID_REGNUM)
9418 break;
9419 if (test == regno)
9420 return true;
9421 }
9422 }
9423
9424 if (crtl->drap_reg
9425 && regno == REGNO (crtl->drap_reg)
9426 && !cfun->machine->no_drap_save_restore)
9427 return true;
9428
9429 return (df_regs_ever_live_p (regno)
9430 && !call_used_regs[regno]
9431 && !fixed_regs[regno]
9432 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9433 }
9434
9435 /* Return number of saved general prupose registers. */
9436
9437 static int
9438 ix86_nsaved_regs (void)
9439 {
9440 int nregs = 0;
9441 int regno;
9442
9443 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9444 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9445 nregs ++;
9446 return nregs;
9447 }
9448
9449 /* Return number of saved SSE registrers. */
9450
9451 static int
9452 ix86_nsaved_sseregs (void)
9453 {
9454 int nregs = 0;
9455 int regno;
9456
9457 if (!TARGET_64BIT_MS_ABI)
9458 return 0;
9459 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9460 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9461 nregs ++;
9462 return nregs;
9463 }
9464
9465 /* Given FROM and TO register numbers, say whether this elimination is
9466 allowed. If stack alignment is needed, we can only replace argument
9467 pointer with hard frame pointer, or replace frame pointer with stack
9468 pointer. Otherwise, frame pointer elimination is automatically
9469 handled and all other eliminations are valid. */
9470
9471 static bool
9472 ix86_can_eliminate (const int from, const int to)
9473 {
9474 if (stack_realign_fp)
9475 return ((from == ARG_POINTER_REGNUM
9476 && to == HARD_FRAME_POINTER_REGNUM)
9477 || (from == FRAME_POINTER_REGNUM
9478 && to == STACK_POINTER_REGNUM));
9479 else
9480 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9481 }
9482
9483 /* Return the offset between two registers, one to be eliminated, and the other
9484 its replacement, at the start of a routine. */
9485
9486 HOST_WIDE_INT
9487 ix86_initial_elimination_offset (int from, int to)
9488 {
9489 struct ix86_frame frame;
9490 ix86_compute_frame_layout (&frame);
9491
9492 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9493 return frame.hard_frame_pointer_offset;
9494 else if (from == FRAME_POINTER_REGNUM
9495 && to == HARD_FRAME_POINTER_REGNUM)
9496 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9497 else
9498 {
9499 gcc_assert (to == STACK_POINTER_REGNUM);
9500
9501 if (from == ARG_POINTER_REGNUM)
9502 return frame.stack_pointer_offset;
9503
9504 gcc_assert (from == FRAME_POINTER_REGNUM);
9505 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9506 }
9507 }
9508
9509 /* In a dynamically-aligned function, we can't know the offset from
9510 stack pointer to frame pointer, so we must ensure that setjmp
9511 eliminates fp against the hard fp (%ebp) rather than trying to
9512 index from %esp up to the top of the frame across a gap that is
9513 of unknown (at compile-time) size. */
9514 static rtx
9515 ix86_builtin_setjmp_frame_value (void)
9516 {
9517 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9518 }
9519
9520 /* When using -fsplit-stack, the allocation routines set a field in
9521 the TCB to the bottom of the stack plus this much space, measured
9522 in bytes. */
9523
9524 #define SPLIT_STACK_AVAILABLE 256
9525
9526 /* Fill structure ix86_frame about frame of currently computed function. */
9527
9528 static void
9529 ix86_compute_frame_layout (struct ix86_frame *frame)
9530 {
9531 unsigned HOST_WIDE_INT stack_alignment_needed;
9532 HOST_WIDE_INT offset;
9533 unsigned HOST_WIDE_INT preferred_alignment;
9534 HOST_WIDE_INT size = get_frame_size ();
9535 HOST_WIDE_INT to_allocate;
9536
9537 frame->nregs = ix86_nsaved_regs ();
9538 frame->nsseregs = ix86_nsaved_sseregs ();
9539
9540 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9541 function prologues and leaf. */
9542 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9543 && (!crtl->is_leaf || cfun->calls_alloca != 0
9544 || ix86_current_function_calls_tls_descriptor))
9545 {
9546 crtl->preferred_stack_boundary = 128;
9547 crtl->stack_alignment_needed = 128;
9548 }
9549 /* preferred_stack_boundary is never updated for call
9550 expanded from tls descriptor. Update it here. We don't update it in
9551 expand stage because according to the comments before
9552 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9553 away. */
9554 else if (ix86_current_function_calls_tls_descriptor
9555 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9556 {
9557 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9558 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9559 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9560 }
9561
9562 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9563 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9564
9565 gcc_assert (!size || stack_alignment_needed);
9566 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9567 gcc_assert (preferred_alignment <= stack_alignment_needed);
9568
9569 /* For SEH we have to limit the amount of code movement into the prologue.
9570 At present we do this via a BLOCKAGE, at which point there's very little
9571 scheduling that can be done, which means that there's very little point
9572 in doing anything except PUSHs. */
9573 if (TARGET_SEH)
9574 cfun->machine->use_fast_prologue_epilogue = false;
9575
9576 /* During reload iteration the amount of registers saved can change.
9577 Recompute the value as needed. Do not recompute when amount of registers
9578 didn't change as reload does multiple calls to the function and does not
9579 expect the decision to change within single iteration. */
9580 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9581 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9582 {
9583 int count = frame->nregs;
9584 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9585
9586 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9587
9588 /* The fast prologue uses move instead of push to save registers. This
9589 is significantly longer, but also executes faster as modern hardware
9590 can execute the moves in parallel, but can't do that for push/pop.
9591
9592 Be careful about choosing what prologue to emit: When function takes
9593 many instructions to execute we may use slow version as well as in
9594 case function is known to be outside hot spot (this is known with
9595 feedback only). Weight the size of function by number of registers
9596 to save as it is cheap to use one or two push instructions but very
9597 slow to use many of them. */
9598 if (count)
9599 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9600 if (node->frequency < NODE_FREQUENCY_NORMAL
9601 || (flag_branch_probabilities
9602 && node->frequency < NODE_FREQUENCY_HOT))
9603 cfun->machine->use_fast_prologue_epilogue = false;
9604 else
9605 cfun->machine->use_fast_prologue_epilogue
9606 = !expensive_function_p (count);
9607 }
9608
9609 frame->save_regs_using_mov
9610 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9611 /* If static stack checking is enabled and done with probes,
9612 the registers need to be saved before allocating the frame. */
9613 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9614
9615 /* Skip return address. */
9616 offset = UNITS_PER_WORD;
9617
9618 /* Skip pushed static chain. */
9619 if (ix86_static_chain_on_stack)
9620 offset += UNITS_PER_WORD;
9621
9622 /* Skip saved base pointer. */
9623 if (frame_pointer_needed)
9624 offset += UNITS_PER_WORD;
9625 frame->hfp_save_offset = offset;
9626
9627 /* The traditional frame pointer location is at the top of the frame. */
9628 frame->hard_frame_pointer_offset = offset;
9629
9630 /* Register save area */
9631 offset += frame->nregs * UNITS_PER_WORD;
9632 frame->reg_save_offset = offset;
9633
9634 /* On SEH target, registers are pushed just before the frame pointer
9635 location. */
9636 if (TARGET_SEH)
9637 frame->hard_frame_pointer_offset = offset;
9638
9639 /* Align and set SSE register save area. */
9640 if (frame->nsseregs)
9641 {
9642 /* The only ABI that has saved SSE registers (Win64) also has a
9643 16-byte aligned default stack, and thus we don't need to be
9644 within the re-aligned local stack frame to save them. */
9645 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9646 offset = (offset + 16 - 1) & -16;
9647 offset += frame->nsseregs * 16;
9648 }
9649 frame->sse_reg_save_offset = offset;
9650
9651 /* The re-aligned stack starts here. Values before this point are not
9652 directly comparable with values below this point. In order to make
9653 sure that no value happens to be the same before and after, force
9654 the alignment computation below to add a non-zero value. */
9655 if (stack_realign_fp)
9656 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9657
9658 /* Va-arg area */
9659 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9660 offset += frame->va_arg_size;
9661
9662 /* Align start of frame for local function. */
9663 if (stack_realign_fp
9664 || offset != frame->sse_reg_save_offset
9665 || size != 0
9666 || !crtl->is_leaf
9667 || cfun->calls_alloca
9668 || ix86_current_function_calls_tls_descriptor)
9669 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9670
9671 /* Frame pointer points here. */
9672 frame->frame_pointer_offset = offset;
9673
9674 offset += size;
9675
9676 /* Add outgoing arguments area. Can be skipped if we eliminated
9677 all the function calls as dead code.
9678 Skipping is however impossible when function calls alloca. Alloca
9679 expander assumes that last crtl->outgoing_args_size
9680 of stack frame are unused. */
9681 if (ACCUMULATE_OUTGOING_ARGS
9682 && (!crtl->is_leaf || cfun->calls_alloca
9683 || ix86_current_function_calls_tls_descriptor))
9684 {
9685 offset += crtl->outgoing_args_size;
9686 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9687 }
9688 else
9689 frame->outgoing_arguments_size = 0;
9690
9691 /* Align stack boundary. Only needed if we're calling another function
9692 or using alloca. */
9693 if (!crtl->is_leaf || cfun->calls_alloca
9694 || ix86_current_function_calls_tls_descriptor)
9695 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9696
9697 /* We've reached end of stack frame. */
9698 frame->stack_pointer_offset = offset;
9699
9700 /* Size prologue needs to allocate. */
9701 to_allocate = offset - frame->sse_reg_save_offset;
9702
9703 if ((!to_allocate && frame->nregs <= 1)
9704 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9705 frame->save_regs_using_mov = false;
9706
9707 if (ix86_using_red_zone ()
9708 && crtl->sp_is_unchanging
9709 && crtl->is_leaf
9710 && !ix86_current_function_calls_tls_descriptor)
9711 {
9712 frame->red_zone_size = to_allocate;
9713 if (frame->save_regs_using_mov)
9714 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9715 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9716 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9717 }
9718 else
9719 frame->red_zone_size = 0;
9720 frame->stack_pointer_offset -= frame->red_zone_size;
9721
9722 /* The SEH frame pointer location is near the bottom of the frame.
9723 This is enforced by the fact that the difference between the
9724 stack pointer and the frame pointer is limited to 240 bytes in
9725 the unwind data structure. */
9726 if (TARGET_SEH)
9727 {
9728 HOST_WIDE_INT diff;
9729
9730 /* If we can leave the frame pointer where it is, do so. Also, returns
9731 the establisher frame for __builtin_frame_address (0). */
9732 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9733 if (diff <= SEH_MAX_FRAME_SIZE
9734 && (diff > 240 || (diff & 15) != 0)
9735 && !crtl->accesses_prior_frames)
9736 {
9737 /* Ideally we'd determine what portion of the local stack frame
9738 (within the constraint of the lowest 240) is most heavily used.
9739 But without that complication, simply bias the frame pointer
9740 by 128 bytes so as to maximize the amount of the local stack
9741 frame that is addressable with 8-bit offsets. */
9742 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9743 }
9744 }
9745 }
9746
9747 /* This is semi-inlined memory_address_length, but simplified
9748 since we know that we're always dealing with reg+offset, and
9749 to avoid having to create and discard all that rtl. */
9750
9751 static inline int
9752 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9753 {
9754 int len = 4;
9755
9756 if (offset == 0)
9757 {
9758 /* EBP and R13 cannot be encoded without an offset. */
9759 len = (regno == BP_REG || regno == R13_REG);
9760 }
9761 else if (IN_RANGE (offset, -128, 127))
9762 len = 1;
9763
9764 /* ESP and R12 must be encoded with a SIB byte. */
9765 if (regno == SP_REG || regno == R12_REG)
9766 len++;
9767
9768 return len;
9769 }
9770
9771 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9772 The valid base registers are taken from CFUN->MACHINE->FS. */
9773
9774 static rtx
9775 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9776 {
9777 const struct machine_function *m = cfun->machine;
9778 rtx base_reg = NULL;
9779 HOST_WIDE_INT base_offset = 0;
9780
9781 if (m->use_fast_prologue_epilogue)
9782 {
9783 /* Choose the base register most likely to allow the most scheduling
9784 opportunities. Generally FP is valid throughout the function,
9785 while DRAP must be reloaded within the epilogue. But choose either
9786 over the SP due to increased encoding size. */
9787
9788 if (m->fs.fp_valid)
9789 {
9790 base_reg = hard_frame_pointer_rtx;
9791 base_offset = m->fs.fp_offset - cfa_offset;
9792 }
9793 else if (m->fs.drap_valid)
9794 {
9795 base_reg = crtl->drap_reg;
9796 base_offset = 0 - cfa_offset;
9797 }
9798 else if (m->fs.sp_valid)
9799 {
9800 base_reg = stack_pointer_rtx;
9801 base_offset = m->fs.sp_offset - cfa_offset;
9802 }
9803 }
9804 else
9805 {
9806 HOST_WIDE_INT toffset;
9807 int len = 16, tlen;
9808
9809 /* Choose the base register with the smallest address encoding.
9810 With a tie, choose FP > DRAP > SP. */
9811 if (m->fs.sp_valid)
9812 {
9813 base_reg = stack_pointer_rtx;
9814 base_offset = m->fs.sp_offset - cfa_offset;
9815 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9816 }
9817 if (m->fs.drap_valid)
9818 {
9819 toffset = 0 - cfa_offset;
9820 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9821 if (tlen <= len)
9822 {
9823 base_reg = crtl->drap_reg;
9824 base_offset = toffset;
9825 len = tlen;
9826 }
9827 }
9828 if (m->fs.fp_valid)
9829 {
9830 toffset = m->fs.fp_offset - cfa_offset;
9831 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9832 if (tlen <= len)
9833 {
9834 base_reg = hard_frame_pointer_rtx;
9835 base_offset = toffset;
9836 len = tlen;
9837 }
9838 }
9839 }
9840 gcc_assert (base_reg != NULL);
9841
9842 return plus_constant (Pmode, base_reg, base_offset);
9843 }
9844
9845 /* Emit code to save registers in the prologue. */
9846
9847 static void
9848 ix86_emit_save_regs (void)
9849 {
9850 unsigned int regno;
9851 rtx insn;
9852
9853 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9854 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9855 {
9856 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9857 RTX_FRAME_RELATED_P (insn) = 1;
9858 }
9859 }
9860
9861 /* Emit a single register save at CFA - CFA_OFFSET. */
9862
9863 static void
9864 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9865 HOST_WIDE_INT cfa_offset)
9866 {
9867 struct machine_function *m = cfun->machine;
9868 rtx reg = gen_rtx_REG (mode, regno);
9869 rtx mem, addr, base, insn;
9870
9871 addr = choose_baseaddr (cfa_offset);
9872 mem = gen_frame_mem (mode, addr);
9873
9874 /* For SSE saves, we need to indicate the 128-bit alignment. */
9875 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9876
9877 insn = emit_move_insn (mem, reg);
9878 RTX_FRAME_RELATED_P (insn) = 1;
9879
9880 base = addr;
9881 if (GET_CODE (base) == PLUS)
9882 base = XEXP (base, 0);
9883 gcc_checking_assert (REG_P (base));
9884
9885 /* When saving registers into a re-aligned local stack frame, avoid
9886 any tricky guessing by dwarf2out. */
9887 if (m->fs.realigned)
9888 {
9889 gcc_checking_assert (stack_realign_drap);
9890
9891 if (regno == REGNO (crtl->drap_reg))
9892 {
9893 /* A bit of a hack. We force the DRAP register to be saved in
9894 the re-aligned stack frame, which provides us with a copy
9895 of the CFA that will last past the prologue. Install it. */
9896 gcc_checking_assert (cfun->machine->fs.fp_valid);
9897 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9898 cfun->machine->fs.fp_offset - cfa_offset);
9899 mem = gen_rtx_MEM (mode, addr);
9900 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9901 }
9902 else
9903 {
9904 /* The frame pointer is a stable reference within the
9905 aligned frame. Use it. */
9906 gcc_checking_assert (cfun->machine->fs.fp_valid);
9907 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9908 cfun->machine->fs.fp_offset - cfa_offset);
9909 mem = gen_rtx_MEM (mode, addr);
9910 add_reg_note (insn, REG_CFA_EXPRESSION,
9911 gen_rtx_SET (VOIDmode, mem, reg));
9912 }
9913 }
9914
9915 /* The memory may not be relative to the current CFA register,
9916 which means that we may need to generate a new pattern for
9917 use by the unwind info. */
9918 else if (base != m->fs.cfa_reg)
9919 {
9920 addr = plus_constant (Pmode, m->fs.cfa_reg,
9921 m->fs.cfa_offset - cfa_offset);
9922 mem = gen_rtx_MEM (mode, addr);
9923 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9924 }
9925 }
9926
9927 /* Emit code to save registers using MOV insns.
9928 First register is stored at CFA - CFA_OFFSET. */
9929 static void
9930 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9931 {
9932 unsigned int regno;
9933
9934 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9935 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9936 {
9937 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9938 cfa_offset -= UNITS_PER_WORD;
9939 }
9940 }
9941
9942 /* Emit code to save SSE registers using MOV insns.
9943 First register is stored at CFA - CFA_OFFSET. */
9944 static void
9945 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9946 {
9947 unsigned int regno;
9948
9949 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9950 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9951 {
9952 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9953 cfa_offset -= 16;
9954 }
9955 }
9956
9957 static GTY(()) rtx queued_cfa_restores;
9958
9959 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9960 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9961 Don't add the note if the previously saved value will be left untouched
9962 within stack red-zone till return, as unwinders can find the same value
9963 in the register and on the stack. */
9964
9965 static void
9966 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9967 {
9968 if (!crtl->shrink_wrapped
9969 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9970 return;
9971
9972 if (insn)
9973 {
9974 add_reg_note (insn, REG_CFA_RESTORE, reg);
9975 RTX_FRAME_RELATED_P (insn) = 1;
9976 }
9977 else
9978 queued_cfa_restores
9979 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9980 }
9981
9982 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9983
9984 static void
9985 ix86_add_queued_cfa_restore_notes (rtx insn)
9986 {
9987 rtx last;
9988 if (!queued_cfa_restores)
9989 return;
9990 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9991 ;
9992 XEXP (last, 1) = REG_NOTES (insn);
9993 REG_NOTES (insn) = queued_cfa_restores;
9994 queued_cfa_restores = NULL_RTX;
9995 RTX_FRAME_RELATED_P (insn) = 1;
9996 }
9997
9998 /* Expand prologue or epilogue stack adjustment.
9999 The pattern exist to put a dependency on all ebp-based memory accesses.
10000 STYLE should be negative if instructions should be marked as frame related,
10001 zero if %r11 register is live and cannot be freely used and positive
10002 otherwise. */
10003
10004 static void
10005 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10006 int style, bool set_cfa)
10007 {
10008 struct machine_function *m = cfun->machine;
10009 rtx insn;
10010 bool add_frame_related_expr = false;
10011
10012 if (Pmode == SImode)
10013 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10014 else if (x86_64_immediate_operand (offset, DImode))
10015 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10016 else
10017 {
10018 rtx tmp;
10019 /* r11 is used by indirect sibcall return as well, set before the
10020 epilogue and used after the epilogue. */
10021 if (style)
10022 tmp = gen_rtx_REG (DImode, R11_REG);
10023 else
10024 {
10025 gcc_assert (src != hard_frame_pointer_rtx
10026 && dest != hard_frame_pointer_rtx);
10027 tmp = hard_frame_pointer_rtx;
10028 }
10029 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10030 if (style < 0)
10031 add_frame_related_expr = true;
10032
10033 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10034 }
10035
10036 insn = emit_insn (insn);
10037 if (style >= 0)
10038 ix86_add_queued_cfa_restore_notes (insn);
10039
10040 if (set_cfa)
10041 {
10042 rtx r;
10043
10044 gcc_assert (m->fs.cfa_reg == src);
10045 m->fs.cfa_offset += INTVAL (offset);
10046 m->fs.cfa_reg = dest;
10047
10048 r = gen_rtx_PLUS (Pmode, src, offset);
10049 r = gen_rtx_SET (VOIDmode, dest, r);
10050 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10051 RTX_FRAME_RELATED_P (insn) = 1;
10052 }
10053 else if (style < 0)
10054 {
10055 RTX_FRAME_RELATED_P (insn) = 1;
10056 if (add_frame_related_expr)
10057 {
10058 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10059 r = gen_rtx_SET (VOIDmode, dest, r);
10060 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10061 }
10062 }
10063
10064 if (dest == stack_pointer_rtx)
10065 {
10066 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10067 bool valid = m->fs.sp_valid;
10068
10069 if (src == hard_frame_pointer_rtx)
10070 {
10071 valid = m->fs.fp_valid;
10072 ooffset = m->fs.fp_offset;
10073 }
10074 else if (src == crtl->drap_reg)
10075 {
10076 valid = m->fs.drap_valid;
10077 ooffset = 0;
10078 }
10079 else
10080 {
10081 /* Else there are two possibilities: SP itself, which we set
10082 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10083 taken care of this by hand along the eh_return path. */
10084 gcc_checking_assert (src == stack_pointer_rtx
10085 || offset == const0_rtx);
10086 }
10087
10088 m->fs.sp_offset = ooffset - INTVAL (offset);
10089 m->fs.sp_valid = valid;
10090 }
10091 }
10092
10093 /* Find an available register to be used as dynamic realign argument
10094 pointer regsiter. Such a register will be written in prologue and
10095 used in begin of body, so it must not be
10096 1. parameter passing register.
10097 2. GOT pointer.
10098 We reuse static-chain register if it is available. Otherwise, we
10099 use DI for i386 and R13 for x86-64. We chose R13 since it has
10100 shorter encoding.
10101
10102 Return: the regno of chosen register. */
10103
10104 static unsigned int
10105 find_drap_reg (void)
10106 {
10107 tree decl = cfun->decl;
10108
10109 if (TARGET_64BIT)
10110 {
10111 /* Use R13 for nested function or function need static chain.
10112 Since function with tail call may use any caller-saved
10113 registers in epilogue, DRAP must not use caller-saved
10114 register in such case. */
10115 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10116 return R13_REG;
10117
10118 return R10_REG;
10119 }
10120 else
10121 {
10122 /* Use DI for nested function or function need static chain.
10123 Since function with tail call may use any caller-saved
10124 registers in epilogue, DRAP must not use caller-saved
10125 register in such case. */
10126 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10127 return DI_REG;
10128
10129 /* Reuse static chain register if it isn't used for parameter
10130 passing. */
10131 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10132 {
10133 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10134 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10135 return CX_REG;
10136 }
10137 return DI_REG;
10138 }
10139 }
10140
10141 /* Return minimum incoming stack alignment. */
10142
10143 static unsigned int
10144 ix86_minimum_incoming_stack_boundary (bool sibcall)
10145 {
10146 unsigned int incoming_stack_boundary;
10147
10148 /* Prefer the one specified at command line. */
10149 if (ix86_user_incoming_stack_boundary)
10150 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10151 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10152 if -mstackrealign is used, it isn't used for sibcall check and
10153 estimated stack alignment is 128bit. */
10154 else if (!sibcall
10155 && !TARGET_64BIT
10156 && ix86_force_align_arg_pointer
10157 && crtl->stack_alignment_estimated == 128)
10158 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10159 else
10160 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10161
10162 /* Incoming stack alignment can be changed on individual functions
10163 via force_align_arg_pointer attribute. We use the smallest
10164 incoming stack boundary. */
10165 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10166 && lookup_attribute (ix86_force_align_arg_pointer_string,
10167 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10168 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10169
10170 /* The incoming stack frame has to be aligned at least at
10171 parm_stack_boundary. */
10172 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10173 incoming_stack_boundary = crtl->parm_stack_boundary;
10174
10175 /* Stack at entrance of main is aligned by runtime. We use the
10176 smallest incoming stack boundary. */
10177 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10178 && DECL_NAME (current_function_decl)
10179 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10180 && DECL_FILE_SCOPE_P (current_function_decl))
10181 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10182
10183 return incoming_stack_boundary;
10184 }
10185
10186 /* Update incoming stack boundary and estimated stack alignment. */
10187
10188 static void
10189 ix86_update_stack_boundary (void)
10190 {
10191 ix86_incoming_stack_boundary
10192 = ix86_minimum_incoming_stack_boundary (false);
10193
10194 /* x86_64 vararg needs 16byte stack alignment for register save
10195 area. */
10196 if (TARGET_64BIT
10197 && cfun->stdarg
10198 && crtl->stack_alignment_estimated < 128)
10199 crtl->stack_alignment_estimated = 128;
10200 }
10201
10202 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10203 needed or an rtx for DRAP otherwise. */
10204
10205 static rtx
10206 ix86_get_drap_rtx (void)
10207 {
10208 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10209 crtl->need_drap = true;
10210
10211 if (stack_realign_drap)
10212 {
10213 /* Assign DRAP to vDRAP and returns vDRAP */
10214 unsigned int regno = find_drap_reg ();
10215 rtx drap_vreg;
10216 rtx arg_ptr;
10217 rtx_insn *seq, *insn;
10218
10219 arg_ptr = gen_rtx_REG (Pmode, regno);
10220 crtl->drap_reg = arg_ptr;
10221
10222 start_sequence ();
10223 drap_vreg = copy_to_reg (arg_ptr);
10224 seq = get_insns ();
10225 end_sequence ();
10226
10227 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10228 if (!optimize)
10229 {
10230 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10231 RTX_FRAME_RELATED_P (insn) = 1;
10232 }
10233 return drap_vreg;
10234 }
10235 else
10236 return NULL;
10237 }
10238
10239 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10240
10241 static rtx
10242 ix86_internal_arg_pointer (void)
10243 {
10244 return virtual_incoming_args_rtx;
10245 }
10246
10247 struct scratch_reg {
10248 rtx reg;
10249 bool saved;
10250 };
10251
10252 /* Return a short-lived scratch register for use on function entry.
10253 In 32-bit mode, it is valid only after the registers are saved
10254 in the prologue. This register must be released by means of
10255 release_scratch_register_on_entry once it is dead. */
10256
10257 static void
10258 get_scratch_register_on_entry (struct scratch_reg *sr)
10259 {
10260 int regno;
10261
10262 sr->saved = false;
10263
10264 if (TARGET_64BIT)
10265 {
10266 /* We always use R11 in 64-bit mode. */
10267 regno = R11_REG;
10268 }
10269 else
10270 {
10271 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10272 bool fastcall_p
10273 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10274 bool thiscall_p
10275 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10276 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10277 int regparm = ix86_function_regparm (fntype, decl);
10278 int drap_regno
10279 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10280
10281 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10282 for the static chain register. */
10283 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10284 && drap_regno != AX_REG)
10285 regno = AX_REG;
10286 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10287 for the static chain register. */
10288 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10289 regno = AX_REG;
10290 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10291 regno = DX_REG;
10292 /* ecx is the static chain register. */
10293 else if (regparm < 3 && !fastcall_p && !thiscall_p
10294 && !static_chain_p
10295 && drap_regno != CX_REG)
10296 regno = CX_REG;
10297 else if (ix86_save_reg (BX_REG, true))
10298 regno = BX_REG;
10299 /* esi is the static chain register. */
10300 else if (!(regparm == 3 && static_chain_p)
10301 && ix86_save_reg (SI_REG, true))
10302 regno = SI_REG;
10303 else if (ix86_save_reg (DI_REG, true))
10304 regno = DI_REG;
10305 else
10306 {
10307 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10308 sr->saved = true;
10309 }
10310 }
10311
10312 sr->reg = gen_rtx_REG (Pmode, regno);
10313 if (sr->saved)
10314 {
10315 rtx insn = emit_insn (gen_push (sr->reg));
10316 RTX_FRAME_RELATED_P (insn) = 1;
10317 }
10318 }
10319
10320 /* Release a scratch register obtained from the preceding function. */
10321
10322 static void
10323 release_scratch_register_on_entry (struct scratch_reg *sr)
10324 {
10325 if (sr->saved)
10326 {
10327 struct machine_function *m = cfun->machine;
10328 rtx x, insn = emit_insn (gen_pop (sr->reg));
10329
10330 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10331 RTX_FRAME_RELATED_P (insn) = 1;
10332 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10333 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10334 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10335 m->fs.sp_offset -= UNITS_PER_WORD;
10336 }
10337 }
10338
10339 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10340
10341 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10342
10343 static void
10344 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10345 {
10346 /* We skip the probe for the first interval + a small dope of 4 words and
10347 probe that many bytes past the specified size to maintain a protection
10348 area at the botton of the stack. */
10349 const int dope = 4 * UNITS_PER_WORD;
10350 rtx size_rtx = GEN_INT (size), last;
10351
10352 /* See if we have a constant small number of probes to generate. If so,
10353 that's the easy case. The run-time loop is made up of 11 insns in the
10354 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10355 for n # of intervals. */
10356 if (size <= 5 * PROBE_INTERVAL)
10357 {
10358 HOST_WIDE_INT i, adjust;
10359 bool first_probe = true;
10360
10361 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10362 values of N from 1 until it exceeds SIZE. If only one probe is
10363 needed, this will not generate any code. Then adjust and probe
10364 to PROBE_INTERVAL + SIZE. */
10365 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10366 {
10367 if (first_probe)
10368 {
10369 adjust = 2 * PROBE_INTERVAL + dope;
10370 first_probe = false;
10371 }
10372 else
10373 adjust = PROBE_INTERVAL;
10374
10375 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10376 plus_constant (Pmode, stack_pointer_rtx,
10377 -adjust)));
10378 emit_stack_probe (stack_pointer_rtx);
10379 }
10380
10381 if (first_probe)
10382 adjust = size + PROBE_INTERVAL + dope;
10383 else
10384 adjust = size + PROBE_INTERVAL - i;
10385
10386 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10387 plus_constant (Pmode, stack_pointer_rtx,
10388 -adjust)));
10389 emit_stack_probe (stack_pointer_rtx);
10390
10391 /* Adjust back to account for the additional first interval. */
10392 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10393 plus_constant (Pmode, stack_pointer_rtx,
10394 PROBE_INTERVAL + dope)));
10395 }
10396
10397 /* Otherwise, do the same as above, but in a loop. Note that we must be
10398 extra careful with variables wrapping around because we might be at
10399 the very top (or the very bottom) of the address space and we have
10400 to be able to handle this case properly; in particular, we use an
10401 equality test for the loop condition. */
10402 else
10403 {
10404 HOST_WIDE_INT rounded_size;
10405 struct scratch_reg sr;
10406
10407 get_scratch_register_on_entry (&sr);
10408
10409
10410 /* Step 1: round SIZE to the previous multiple of the interval. */
10411
10412 rounded_size = size & -PROBE_INTERVAL;
10413
10414
10415 /* Step 2: compute initial and final value of the loop counter. */
10416
10417 /* SP = SP_0 + PROBE_INTERVAL. */
10418 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10419 plus_constant (Pmode, stack_pointer_rtx,
10420 - (PROBE_INTERVAL + dope))));
10421
10422 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10423 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10424 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10425 gen_rtx_PLUS (Pmode, sr.reg,
10426 stack_pointer_rtx)));
10427
10428
10429 /* Step 3: the loop
10430
10431 while (SP != LAST_ADDR)
10432 {
10433 SP = SP + PROBE_INTERVAL
10434 probe at SP
10435 }
10436
10437 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10438 values of N from 1 until it is equal to ROUNDED_SIZE. */
10439
10440 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10441
10442
10443 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10444 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10445
10446 if (size != rounded_size)
10447 {
10448 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10449 plus_constant (Pmode, stack_pointer_rtx,
10450 rounded_size - size)));
10451 emit_stack_probe (stack_pointer_rtx);
10452 }
10453
10454 /* Adjust back to account for the additional first interval. */
10455 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10456 plus_constant (Pmode, stack_pointer_rtx,
10457 PROBE_INTERVAL + dope)));
10458
10459 release_scratch_register_on_entry (&sr);
10460 }
10461
10462 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10463
10464 /* Even if the stack pointer isn't the CFA register, we need to correctly
10465 describe the adjustments made to it, in particular differentiate the
10466 frame-related ones from the frame-unrelated ones. */
10467 if (size > 0)
10468 {
10469 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10470 XVECEXP (expr, 0, 0)
10471 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10472 plus_constant (Pmode, stack_pointer_rtx, -size));
10473 XVECEXP (expr, 0, 1)
10474 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10475 plus_constant (Pmode, stack_pointer_rtx,
10476 PROBE_INTERVAL + dope + size));
10477 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10478 RTX_FRAME_RELATED_P (last) = 1;
10479
10480 cfun->machine->fs.sp_offset += size;
10481 }
10482
10483 /* Make sure nothing is scheduled before we are done. */
10484 emit_insn (gen_blockage ());
10485 }
10486
10487 /* Adjust the stack pointer up to REG while probing it. */
10488
10489 const char *
10490 output_adjust_stack_and_probe (rtx reg)
10491 {
10492 static int labelno = 0;
10493 char loop_lab[32], end_lab[32];
10494 rtx xops[2];
10495
10496 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10497 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10498
10499 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10500
10501 /* Jump to END_LAB if SP == LAST_ADDR. */
10502 xops[0] = stack_pointer_rtx;
10503 xops[1] = reg;
10504 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10505 fputs ("\tje\t", asm_out_file);
10506 assemble_name_raw (asm_out_file, end_lab);
10507 fputc ('\n', asm_out_file);
10508
10509 /* SP = SP + PROBE_INTERVAL. */
10510 xops[1] = GEN_INT (PROBE_INTERVAL);
10511 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10512
10513 /* Probe at SP. */
10514 xops[1] = const0_rtx;
10515 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10516
10517 fprintf (asm_out_file, "\tjmp\t");
10518 assemble_name_raw (asm_out_file, loop_lab);
10519 fputc ('\n', asm_out_file);
10520
10521 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10522
10523 return "";
10524 }
10525
10526 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10527 inclusive. These are offsets from the current stack pointer. */
10528
10529 static void
10530 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10531 {
10532 /* See if we have a constant small number of probes to generate. If so,
10533 that's the easy case. The run-time loop is made up of 7 insns in the
10534 generic case while the compile-time loop is made up of n insns for n #
10535 of intervals. */
10536 if (size <= 7 * PROBE_INTERVAL)
10537 {
10538 HOST_WIDE_INT i;
10539
10540 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10541 it exceeds SIZE. If only one probe is needed, this will not
10542 generate any code. Then probe at FIRST + SIZE. */
10543 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10544 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10545 -(first + i)));
10546
10547 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10548 -(first + size)));
10549 }
10550
10551 /* Otherwise, do the same as above, but in a loop. Note that we must be
10552 extra careful with variables wrapping around because we might be at
10553 the very top (or the very bottom) of the address space and we have
10554 to be able to handle this case properly; in particular, we use an
10555 equality test for the loop condition. */
10556 else
10557 {
10558 HOST_WIDE_INT rounded_size, last;
10559 struct scratch_reg sr;
10560
10561 get_scratch_register_on_entry (&sr);
10562
10563
10564 /* Step 1: round SIZE to the previous multiple of the interval. */
10565
10566 rounded_size = size & -PROBE_INTERVAL;
10567
10568
10569 /* Step 2: compute initial and final value of the loop counter. */
10570
10571 /* TEST_OFFSET = FIRST. */
10572 emit_move_insn (sr.reg, GEN_INT (-first));
10573
10574 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10575 last = first + rounded_size;
10576
10577
10578 /* Step 3: the loop
10579
10580 while (TEST_ADDR != LAST_ADDR)
10581 {
10582 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10583 probe at TEST_ADDR
10584 }
10585
10586 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10587 until it is equal to ROUNDED_SIZE. */
10588
10589 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10590
10591
10592 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10593 that SIZE is equal to ROUNDED_SIZE. */
10594
10595 if (size != rounded_size)
10596 emit_stack_probe (plus_constant (Pmode,
10597 gen_rtx_PLUS (Pmode,
10598 stack_pointer_rtx,
10599 sr.reg),
10600 rounded_size - size));
10601
10602 release_scratch_register_on_entry (&sr);
10603 }
10604
10605 /* Make sure nothing is scheduled before we are done. */
10606 emit_insn (gen_blockage ());
10607 }
10608
10609 /* Probe a range of stack addresses from REG to END, inclusive. These are
10610 offsets from the current stack pointer. */
10611
10612 const char *
10613 output_probe_stack_range (rtx reg, rtx end)
10614 {
10615 static int labelno = 0;
10616 char loop_lab[32], end_lab[32];
10617 rtx xops[3];
10618
10619 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10620 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10621
10622 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10623
10624 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10625 xops[0] = reg;
10626 xops[1] = end;
10627 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10628 fputs ("\tje\t", asm_out_file);
10629 assemble_name_raw (asm_out_file, end_lab);
10630 fputc ('\n', asm_out_file);
10631
10632 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10633 xops[1] = GEN_INT (PROBE_INTERVAL);
10634 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10635
10636 /* Probe at TEST_ADDR. */
10637 xops[0] = stack_pointer_rtx;
10638 xops[1] = reg;
10639 xops[2] = const0_rtx;
10640 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10641
10642 fprintf (asm_out_file, "\tjmp\t");
10643 assemble_name_raw (asm_out_file, loop_lab);
10644 fputc ('\n', asm_out_file);
10645
10646 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10647
10648 return "";
10649 }
10650
10651 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10652 to be generated in correct form. */
10653 static void
10654 ix86_finalize_stack_realign_flags (void)
10655 {
10656 /* Check if stack realign is really needed after reload, and
10657 stores result in cfun */
10658 unsigned int incoming_stack_boundary
10659 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10660 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10661 unsigned int stack_realign = (incoming_stack_boundary
10662 < (crtl->is_leaf
10663 ? crtl->max_used_stack_slot_alignment
10664 : crtl->stack_alignment_needed));
10665
10666 if (crtl->stack_realign_finalized)
10667 {
10668 /* After stack_realign_needed is finalized, we can't no longer
10669 change it. */
10670 gcc_assert (crtl->stack_realign_needed == stack_realign);
10671 return;
10672 }
10673
10674 /* If the only reason for frame_pointer_needed is that we conservatively
10675 assumed stack realignment might be needed, but in the end nothing that
10676 needed the stack alignment had been spilled, clear frame_pointer_needed
10677 and say we don't need stack realignment. */
10678 if (stack_realign
10679 && frame_pointer_needed
10680 && crtl->is_leaf
10681 && flag_omit_frame_pointer
10682 && crtl->sp_is_unchanging
10683 && !ix86_current_function_calls_tls_descriptor
10684 && !crtl->accesses_prior_frames
10685 && !cfun->calls_alloca
10686 && !crtl->calls_eh_return
10687 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10688 && !ix86_frame_pointer_required ()
10689 && get_frame_size () == 0
10690 && ix86_nsaved_sseregs () == 0
10691 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10692 {
10693 HARD_REG_SET set_up_by_prologue, prologue_used;
10694 basic_block bb;
10695
10696 CLEAR_HARD_REG_SET (prologue_used);
10697 CLEAR_HARD_REG_SET (set_up_by_prologue);
10698 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10699 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10700 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10701 HARD_FRAME_POINTER_REGNUM);
10702 FOR_EACH_BB_FN (bb, cfun)
10703 {
10704 rtx_insn *insn;
10705 FOR_BB_INSNS (bb, insn)
10706 if (NONDEBUG_INSN_P (insn)
10707 && requires_stack_frame_p (insn, prologue_used,
10708 set_up_by_prologue))
10709 {
10710 crtl->stack_realign_needed = stack_realign;
10711 crtl->stack_realign_finalized = true;
10712 return;
10713 }
10714 }
10715
10716 /* If drap has been set, but it actually isn't live at the start
10717 of the function, there is no reason to set it up. */
10718 if (crtl->drap_reg)
10719 {
10720 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10721 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10722 {
10723 crtl->drap_reg = NULL_RTX;
10724 crtl->need_drap = false;
10725 }
10726 }
10727 else
10728 cfun->machine->no_drap_save_restore = true;
10729
10730 frame_pointer_needed = false;
10731 stack_realign = false;
10732 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10733 crtl->stack_alignment_needed = incoming_stack_boundary;
10734 crtl->stack_alignment_estimated = incoming_stack_boundary;
10735 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10736 crtl->preferred_stack_boundary = incoming_stack_boundary;
10737 df_finish_pass (true);
10738 df_scan_alloc (NULL);
10739 df_scan_blocks ();
10740 df_compute_regs_ever_live (true);
10741 df_analyze ();
10742 }
10743
10744 crtl->stack_realign_needed = stack_realign;
10745 crtl->stack_realign_finalized = true;
10746 }
10747
10748 /* Expand the prologue into a bunch of separate insns. */
10749
10750 void
10751 ix86_expand_prologue (void)
10752 {
10753 struct machine_function *m = cfun->machine;
10754 rtx insn, t;
10755 bool pic_reg_used;
10756 struct ix86_frame frame;
10757 HOST_WIDE_INT allocate;
10758 bool int_registers_saved;
10759 bool sse_registers_saved;
10760
10761 ix86_finalize_stack_realign_flags ();
10762
10763 /* DRAP should not coexist with stack_realign_fp */
10764 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10765
10766 memset (&m->fs, 0, sizeof (m->fs));
10767
10768 /* Initialize CFA state for before the prologue. */
10769 m->fs.cfa_reg = stack_pointer_rtx;
10770 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10771
10772 /* Track SP offset to the CFA. We continue tracking this after we've
10773 swapped the CFA register away from SP. In the case of re-alignment
10774 this is fudged; we're interested to offsets within the local frame. */
10775 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10776 m->fs.sp_valid = true;
10777
10778 ix86_compute_frame_layout (&frame);
10779
10780 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10781 {
10782 /* We should have already generated an error for any use of
10783 ms_hook on a nested function. */
10784 gcc_checking_assert (!ix86_static_chain_on_stack);
10785
10786 /* Check if profiling is active and we shall use profiling before
10787 prologue variant. If so sorry. */
10788 if (crtl->profile && flag_fentry != 0)
10789 sorry ("ms_hook_prologue attribute isn%'t compatible "
10790 "with -mfentry for 32-bit");
10791
10792 /* In ix86_asm_output_function_label we emitted:
10793 8b ff movl.s %edi,%edi
10794 55 push %ebp
10795 8b ec movl.s %esp,%ebp
10796
10797 This matches the hookable function prologue in Win32 API
10798 functions in Microsoft Windows XP Service Pack 2 and newer.
10799 Wine uses this to enable Windows apps to hook the Win32 API
10800 functions provided by Wine.
10801
10802 What that means is that we've already set up the frame pointer. */
10803
10804 if (frame_pointer_needed
10805 && !(crtl->drap_reg && crtl->stack_realign_needed))
10806 {
10807 rtx push, mov;
10808
10809 /* We've decided to use the frame pointer already set up.
10810 Describe this to the unwinder by pretending that both
10811 push and mov insns happen right here.
10812
10813 Putting the unwind info here at the end of the ms_hook
10814 is done so that we can make absolutely certain we get
10815 the required byte sequence at the start of the function,
10816 rather than relying on an assembler that can produce
10817 the exact encoding required.
10818
10819 However it does mean (in the unpatched case) that we have
10820 a 1 insn window where the asynchronous unwind info is
10821 incorrect. However, if we placed the unwind info at
10822 its correct location we would have incorrect unwind info
10823 in the patched case. Which is probably all moot since
10824 I don't expect Wine generates dwarf2 unwind info for the
10825 system libraries that use this feature. */
10826
10827 insn = emit_insn (gen_blockage ());
10828
10829 push = gen_push (hard_frame_pointer_rtx);
10830 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10831 stack_pointer_rtx);
10832 RTX_FRAME_RELATED_P (push) = 1;
10833 RTX_FRAME_RELATED_P (mov) = 1;
10834
10835 RTX_FRAME_RELATED_P (insn) = 1;
10836 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10837 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10838
10839 /* Note that gen_push incremented m->fs.cfa_offset, even
10840 though we didn't emit the push insn here. */
10841 m->fs.cfa_reg = hard_frame_pointer_rtx;
10842 m->fs.fp_offset = m->fs.cfa_offset;
10843 m->fs.fp_valid = true;
10844 }
10845 else
10846 {
10847 /* The frame pointer is not needed so pop %ebp again.
10848 This leaves us with a pristine state. */
10849 emit_insn (gen_pop (hard_frame_pointer_rtx));
10850 }
10851 }
10852
10853 /* The first insn of a function that accepts its static chain on the
10854 stack is to push the register that would be filled in by a direct
10855 call. This insn will be skipped by the trampoline. */
10856 else if (ix86_static_chain_on_stack)
10857 {
10858 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10859 emit_insn (gen_blockage ());
10860
10861 /* We don't want to interpret this push insn as a register save,
10862 only as a stack adjustment. The real copy of the register as
10863 a save will be done later, if needed. */
10864 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10865 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10866 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10867 RTX_FRAME_RELATED_P (insn) = 1;
10868 }
10869
10870 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10871 of DRAP is needed and stack realignment is really needed after reload */
10872 if (stack_realign_drap)
10873 {
10874 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10875
10876 /* Only need to push parameter pointer reg if it is caller saved. */
10877 if (!call_used_regs[REGNO (crtl->drap_reg)])
10878 {
10879 /* Push arg pointer reg */
10880 insn = emit_insn (gen_push (crtl->drap_reg));
10881 RTX_FRAME_RELATED_P (insn) = 1;
10882 }
10883
10884 /* Grab the argument pointer. */
10885 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10886 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10887 RTX_FRAME_RELATED_P (insn) = 1;
10888 m->fs.cfa_reg = crtl->drap_reg;
10889 m->fs.cfa_offset = 0;
10890
10891 /* Align the stack. */
10892 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10893 stack_pointer_rtx,
10894 GEN_INT (-align_bytes)));
10895 RTX_FRAME_RELATED_P (insn) = 1;
10896
10897 /* Replicate the return address on the stack so that return
10898 address can be reached via (argp - 1) slot. This is needed
10899 to implement macro RETURN_ADDR_RTX and intrinsic function
10900 expand_builtin_return_addr etc. */
10901 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10902 t = gen_frame_mem (word_mode, t);
10903 insn = emit_insn (gen_push (t));
10904 RTX_FRAME_RELATED_P (insn) = 1;
10905
10906 /* For the purposes of frame and register save area addressing,
10907 we've started over with a new frame. */
10908 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10909 m->fs.realigned = true;
10910 }
10911
10912 int_registers_saved = (frame.nregs == 0);
10913 sse_registers_saved = (frame.nsseregs == 0);
10914
10915 if (frame_pointer_needed && !m->fs.fp_valid)
10916 {
10917 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10918 slower on all targets. Also sdb doesn't like it. */
10919 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10920 RTX_FRAME_RELATED_P (insn) = 1;
10921
10922 /* Push registers now, before setting the frame pointer
10923 on SEH target. */
10924 if (!int_registers_saved
10925 && TARGET_SEH
10926 && !frame.save_regs_using_mov)
10927 {
10928 ix86_emit_save_regs ();
10929 int_registers_saved = true;
10930 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10931 }
10932
10933 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10934 {
10935 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10936 RTX_FRAME_RELATED_P (insn) = 1;
10937
10938 if (m->fs.cfa_reg == stack_pointer_rtx)
10939 m->fs.cfa_reg = hard_frame_pointer_rtx;
10940 m->fs.fp_offset = m->fs.sp_offset;
10941 m->fs.fp_valid = true;
10942 }
10943 }
10944
10945 if (!int_registers_saved)
10946 {
10947 /* If saving registers via PUSH, do so now. */
10948 if (!frame.save_regs_using_mov)
10949 {
10950 ix86_emit_save_regs ();
10951 int_registers_saved = true;
10952 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10953 }
10954
10955 /* When using red zone we may start register saving before allocating
10956 the stack frame saving one cycle of the prologue. However, avoid
10957 doing this if we have to probe the stack; at least on x86_64 the
10958 stack probe can turn into a call that clobbers a red zone location. */
10959 else if (ix86_using_red_zone ()
10960 && (! TARGET_STACK_PROBE
10961 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10962 {
10963 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10964 int_registers_saved = true;
10965 }
10966 }
10967
10968 if (stack_realign_fp)
10969 {
10970 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10971 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10972
10973 /* The computation of the size of the re-aligned stack frame means
10974 that we must allocate the size of the register save area before
10975 performing the actual alignment. Otherwise we cannot guarantee
10976 that there's enough storage above the realignment point. */
10977 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10978 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10979 GEN_INT (m->fs.sp_offset
10980 - frame.sse_reg_save_offset),
10981 -1, false);
10982
10983 /* Align the stack. */
10984 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10985 stack_pointer_rtx,
10986 GEN_INT (-align_bytes)));
10987
10988 /* For the purposes of register save area addressing, the stack
10989 pointer is no longer valid. As for the value of sp_offset,
10990 see ix86_compute_frame_layout, which we need to match in order
10991 to pass verification of stack_pointer_offset at the end. */
10992 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10993 m->fs.sp_valid = false;
10994 }
10995
10996 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10997
10998 if (flag_stack_usage_info)
10999 {
11000 /* We start to count from ARG_POINTER. */
11001 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11002
11003 /* If it was realigned, take into account the fake frame. */
11004 if (stack_realign_drap)
11005 {
11006 if (ix86_static_chain_on_stack)
11007 stack_size += UNITS_PER_WORD;
11008
11009 if (!call_used_regs[REGNO (crtl->drap_reg)])
11010 stack_size += UNITS_PER_WORD;
11011
11012 /* This over-estimates by 1 minimal-stack-alignment-unit but
11013 mitigates that by counting in the new return address slot. */
11014 current_function_dynamic_stack_size
11015 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11016 }
11017
11018 current_function_static_stack_size = stack_size;
11019 }
11020
11021 /* On SEH target with very large frame size, allocate an area to save
11022 SSE registers (as the very large allocation won't be described). */
11023 if (TARGET_SEH
11024 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11025 && !sse_registers_saved)
11026 {
11027 HOST_WIDE_INT sse_size =
11028 frame.sse_reg_save_offset - frame.reg_save_offset;
11029
11030 gcc_assert (int_registers_saved);
11031
11032 /* No need to do stack checking as the area will be immediately
11033 written. */
11034 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11035 GEN_INT (-sse_size), -1,
11036 m->fs.cfa_reg == stack_pointer_rtx);
11037 allocate -= sse_size;
11038 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11039 sse_registers_saved = true;
11040 }
11041
11042 /* The stack has already been decremented by the instruction calling us
11043 so probe if the size is non-negative to preserve the protection area. */
11044 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11045 {
11046 /* We expect the registers to be saved when probes are used. */
11047 gcc_assert (int_registers_saved);
11048
11049 if (STACK_CHECK_MOVING_SP)
11050 {
11051 if (!(crtl->is_leaf && !cfun->calls_alloca
11052 && allocate <= PROBE_INTERVAL))
11053 {
11054 ix86_adjust_stack_and_probe (allocate);
11055 allocate = 0;
11056 }
11057 }
11058 else
11059 {
11060 HOST_WIDE_INT size = allocate;
11061
11062 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11063 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11064
11065 if (TARGET_STACK_PROBE)
11066 {
11067 if (crtl->is_leaf && !cfun->calls_alloca)
11068 {
11069 if (size > PROBE_INTERVAL)
11070 ix86_emit_probe_stack_range (0, size);
11071 }
11072 else
11073 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11074 }
11075 else
11076 {
11077 if (crtl->is_leaf && !cfun->calls_alloca)
11078 {
11079 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11080 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11081 size - STACK_CHECK_PROTECT);
11082 }
11083 else
11084 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11085 }
11086 }
11087 }
11088
11089 if (allocate == 0)
11090 ;
11091 else if (!ix86_target_stack_probe ()
11092 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11093 {
11094 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11095 GEN_INT (-allocate), -1,
11096 m->fs.cfa_reg == stack_pointer_rtx);
11097 }
11098 else
11099 {
11100 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11101 rtx r10 = NULL;
11102 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11103 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11104 bool eax_live = ix86_eax_live_at_start_p ();
11105 bool r10_live = false;
11106
11107 if (TARGET_64BIT)
11108 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11109
11110 if (eax_live)
11111 {
11112 insn = emit_insn (gen_push (eax));
11113 allocate -= UNITS_PER_WORD;
11114 /* Note that SEH directives need to continue tracking the stack
11115 pointer even after the frame pointer has been set up. */
11116 if (sp_is_cfa_reg || TARGET_SEH)
11117 {
11118 if (sp_is_cfa_reg)
11119 m->fs.cfa_offset += UNITS_PER_WORD;
11120 RTX_FRAME_RELATED_P (insn) = 1;
11121 }
11122 }
11123
11124 if (r10_live)
11125 {
11126 r10 = gen_rtx_REG (Pmode, R10_REG);
11127 insn = emit_insn (gen_push (r10));
11128 allocate -= UNITS_PER_WORD;
11129 if (sp_is_cfa_reg || TARGET_SEH)
11130 {
11131 if (sp_is_cfa_reg)
11132 m->fs.cfa_offset += UNITS_PER_WORD;
11133 RTX_FRAME_RELATED_P (insn) = 1;
11134 }
11135 }
11136
11137 emit_move_insn (eax, GEN_INT (allocate));
11138 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11139
11140 /* Use the fact that AX still contains ALLOCATE. */
11141 adjust_stack_insn = (Pmode == DImode
11142 ? gen_pro_epilogue_adjust_stack_di_sub
11143 : gen_pro_epilogue_adjust_stack_si_sub);
11144
11145 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11146 stack_pointer_rtx, eax));
11147
11148 if (sp_is_cfa_reg || TARGET_SEH)
11149 {
11150 if (sp_is_cfa_reg)
11151 m->fs.cfa_offset += allocate;
11152 RTX_FRAME_RELATED_P (insn) = 1;
11153 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11154 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11155 plus_constant (Pmode, stack_pointer_rtx,
11156 -allocate)));
11157 }
11158 m->fs.sp_offset += allocate;
11159
11160 /* Use stack_pointer_rtx for relative addressing so that code
11161 works for realigned stack, too. */
11162 if (r10_live && eax_live)
11163 {
11164 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11165 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11166 gen_frame_mem (word_mode, t));
11167 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11168 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11169 gen_frame_mem (word_mode, t));
11170 }
11171 else if (eax_live || r10_live)
11172 {
11173 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11174 emit_move_insn (gen_rtx_REG (word_mode,
11175 (eax_live ? AX_REG : R10_REG)),
11176 gen_frame_mem (word_mode, t));
11177 }
11178 }
11179 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11180
11181 /* If we havn't already set up the frame pointer, do so now. */
11182 if (frame_pointer_needed && !m->fs.fp_valid)
11183 {
11184 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11185 GEN_INT (frame.stack_pointer_offset
11186 - frame.hard_frame_pointer_offset));
11187 insn = emit_insn (insn);
11188 RTX_FRAME_RELATED_P (insn) = 1;
11189 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11190
11191 if (m->fs.cfa_reg == stack_pointer_rtx)
11192 m->fs.cfa_reg = hard_frame_pointer_rtx;
11193 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11194 m->fs.fp_valid = true;
11195 }
11196
11197 if (!int_registers_saved)
11198 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11199 if (!sse_registers_saved)
11200 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11201
11202 pic_reg_used = false;
11203 /* We don't use pic-register for pe-coff target. */
11204 if (pic_offset_table_rtx
11205 && !TARGET_PECOFF
11206 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11207 || crtl->profile))
11208 {
11209 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11210
11211 if (alt_pic_reg_used != INVALID_REGNUM)
11212 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11213
11214 pic_reg_used = true;
11215 }
11216
11217 if (pic_reg_used)
11218 {
11219 if (TARGET_64BIT)
11220 {
11221 if (ix86_cmodel == CM_LARGE_PIC)
11222 {
11223 rtx_code_label *label;
11224 rtx tmp_reg;
11225
11226 gcc_assert (Pmode == DImode);
11227 label = gen_label_rtx ();
11228 emit_label (label);
11229 LABEL_PRESERVE_P (label) = 1;
11230 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11231 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11232 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11233 label));
11234 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11235 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11236 pic_offset_table_rtx, tmp_reg));
11237 }
11238 else
11239 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11240 }
11241 else
11242 {
11243 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11244 RTX_FRAME_RELATED_P (insn) = 1;
11245 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11246 }
11247 }
11248
11249 /* In the pic_reg_used case, make sure that the got load isn't deleted
11250 when mcount needs it. Blockage to avoid call movement across mcount
11251 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11252 note. */
11253 if (crtl->profile && !flag_fentry && pic_reg_used)
11254 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11255
11256 if (crtl->drap_reg && !crtl->stack_realign_needed)
11257 {
11258 /* vDRAP is setup but after reload it turns out stack realign
11259 isn't necessary, here we will emit prologue to setup DRAP
11260 without stack realign adjustment */
11261 t = choose_baseaddr (0);
11262 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11263 }
11264
11265 /* Prevent instructions from being scheduled into register save push
11266 sequence when access to the redzone area is done through frame pointer.
11267 The offset between the frame pointer and the stack pointer is calculated
11268 relative to the value of the stack pointer at the end of the function
11269 prologue, and moving instructions that access redzone area via frame
11270 pointer inside push sequence violates this assumption. */
11271 if (frame_pointer_needed && frame.red_zone_size)
11272 emit_insn (gen_memory_blockage ());
11273
11274 /* Emit cld instruction if stringops are used in the function. */
11275 if (TARGET_CLD && ix86_current_function_needs_cld)
11276 emit_insn (gen_cld ());
11277
11278 /* SEH requires that the prologue end within 256 bytes of the start of
11279 the function. Prevent instruction schedules that would extend that.
11280 Further, prevent alloca modifications to the stack pointer from being
11281 combined with prologue modifications. */
11282 if (TARGET_SEH)
11283 emit_insn (gen_prologue_use (stack_pointer_rtx));
11284 }
11285
11286 /* Emit code to restore REG using a POP insn. */
11287
11288 static void
11289 ix86_emit_restore_reg_using_pop (rtx reg)
11290 {
11291 struct machine_function *m = cfun->machine;
11292 rtx insn = emit_insn (gen_pop (reg));
11293
11294 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11295 m->fs.sp_offset -= UNITS_PER_WORD;
11296
11297 if (m->fs.cfa_reg == crtl->drap_reg
11298 && REGNO (reg) == REGNO (crtl->drap_reg))
11299 {
11300 /* Previously we'd represented the CFA as an expression
11301 like *(%ebp - 8). We've just popped that value from
11302 the stack, which means we need to reset the CFA to
11303 the drap register. This will remain until we restore
11304 the stack pointer. */
11305 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11306 RTX_FRAME_RELATED_P (insn) = 1;
11307
11308 /* This means that the DRAP register is valid for addressing too. */
11309 m->fs.drap_valid = true;
11310 return;
11311 }
11312
11313 if (m->fs.cfa_reg == stack_pointer_rtx)
11314 {
11315 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11316 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11317 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11318 RTX_FRAME_RELATED_P (insn) = 1;
11319
11320 m->fs.cfa_offset -= UNITS_PER_WORD;
11321 }
11322
11323 /* When the frame pointer is the CFA, and we pop it, we are
11324 swapping back to the stack pointer as the CFA. This happens
11325 for stack frames that don't allocate other data, so we assume
11326 the stack pointer is now pointing at the return address, i.e.
11327 the function entry state, which makes the offset be 1 word. */
11328 if (reg == hard_frame_pointer_rtx)
11329 {
11330 m->fs.fp_valid = false;
11331 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11332 {
11333 m->fs.cfa_reg = stack_pointer_rtx;
11334 m->fs.cfa_offset -= UNITS_PER_WORD;
11335
11336 add_reg_note (insn, REG_CFA_DEF_CFA,
11337 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11338 GEN_INT (m->fs.cfa_offset)));
11339 RTX_FRAME_RELATED_P (insn) = 1;
11340 }
11341 }
11342 }
11343
11344 /* Emit code to restore saved registers using POP insns. */
11345
11346 static void
11347 ix86_emit_restore_regs_using_pop (void)
11348 {
11349 unsigned int regno;
11350
11351 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11352 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11353 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11354 }
11355
11356 /* Emit code and notes for the LEAVE instruction. */
11357
11358 static void
11359 ix86_emit_leave (void)
11360 {
11361 struct machine_function *m = cfun->machine;
11362 rtx insn = emit_insn (ix86_gen_leave ());
11363
11364 ix86_add_queued_cfa_restore_notes (insn);
11365
11366 gcc_assert (m->fs.fp_valid);
11367 m->fs.sp_valid = true;
11368 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11369 m->fs.fp_valid = false;
11370
11371 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11372 {
11373 m->fs.cfa_reg = stack_pointer_rtx;
11374 m->fs.cfa_offset = m->fs.sp_offset;
11375
11376 add_reg_note (insn, REG_CFA_DEF_CFA,
11377 plus_constant (Pmode, stack_pointer_rtx,
11378 m->fs.sp_offset));
11379 RTX_FRAME_RELATED_P (insn) = 1;
11380 }
11381 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11382 m->fs.fp_offset);
11383 }
11384
11385 /* Emit code to restore saved registers using MOV insns.
11386 First register is restored from CFA - CFA_OFFSET. */
11387 static void
11388 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11389 bool maybe_eh_return)
11390 {
11391 struct machine_function *m = cfun->machine;
11392 unsigned int regno;
11393
11394 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11395 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11396 {
11397 rtx reg = gen_rtx_REG (word_mode, regno);
11398 rtx insn, mem;
11399
11400 mem = choose_baseaddr (cfa_offset);
11401 mem = gen_frame_mem (word_mode, mem);
11402 insn = emit_move_insn (reg, mem);
11403
11404 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11405 {
11406 /* Previously we'd represented the CFA as an expression
11407 like *(%ebp - 8). We've just popped that value from
11408 the stack, which means we need to reset the CFA to
11409 the drap register. This will remain until we restore
11410 the stack pointer. */
11411 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11412 RTX_FRAME_RELATED_P (insn) = 1;
11413
11414 /* This means that the DRAP register is valid for addressing. */
11415 m->fs.drap_valid = true;
11416 }
11417 else
11418 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11419
11420 cfa_offset -= UNITS_PER_WORD;
11421 }
11422 }
11423
11424 /* Emit code to restore saved registers using MOV insns.
11425 First register is restored from CFA - CFA_OFFSET. */
11426 static void
11427 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11428 bool maybe_eh_return)
11429 {
11430 unsigned int regno;
11431
11432 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11433 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11434 {
11435 rtx reg = gen_rtx_REG (V4SFmode, regno);
11436 rtx mem;
11437
11438 mem = choose_baseaddr (cfa_offset);
11439 mem = gen_rtx_MEM (V4SFmode, mem);
11440 set_mem_align (mem, 128);
11441 emit_move_insn (reg, mem);
11442
11443 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11444
11445 cfa_offset -= 16;
11446 }
11447 }
11448
11449 /* Restore function stack, frame, and registers. */
11450
11451 void
11452 ix86_expand_epilogue (int style)
11453 {
11454 struct machine_function *m = cfun->machine;
11455 struct machine_frame_state frame_state_save = m->fs;
11456 struct ix86_frame frame;
11457 bool restore_regs_via_mov;
11458 bool using_drap;
11459
11460 ix86_finalize_stack_realign_flags ();
11461 ix86_compute_frame_layout (&frame);
11462
11463 m->fs.sp_valid = (!frame_pointer_needed
11464 || (crtl->sp_is_unchanging
11465 && !stack_realign_fp));
11466 gcc_assert (!m->fs.sp_valid
11467 || m->fs.sp_offset == frame.stack_pointer_offset);
11468
11469 /* The FP must be valid if the frame pointer is present. */
11470 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11471 gcc_assert (!m->fs.fp_valid
11472 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11473
11474 /* We must have *some* valid pointer to the stack frame. */
11475 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11476
11477 /* The DRAP is never valid at this point. */
11478 gcc_assert (!m->fs.drap_valid);
11479
11480 /* See the comment about red zone and frame
11481 pointer usage in ix86_expand_prologue. */
11482 if (frame_pointer_needed && frame.red_zone_size)
11483 emit_insn (gen_memory_blockage ());
11484
11485 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11486 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11487
11488 /* Determine the CFA offset of the end of the red-zone. */
11489 m->fs.red_zone_offset = 0;
11490 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11491 {
11492 /* The red-zone begins below the return address. */
11493 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11494
11495 /* When the register save area is in the aligned portion of
11496 the stack, determine the maximum runtime displacement that
11497 matches up with the aligned frame. */
11498 if (stack_realign_drap)
11499 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11500 + UNITS_PER_WORD);
11501 }
11502
11503 /* Special care must be taken for the normal return case of a function
11504 using eh_return: the eax and edx registers are marked as saved, but
11505 not restored along this path. Adjust the save location to match. */
11506 if (crtl->calls_eh_return && style != 2)
11507 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11508
11509 /* EH_RETURN requires the use of moves to function properly. */
11510 if (crtl->calls_eh_return)
11511 restore_regs_via_mov = true;
11512 /* SEH requires the use of pops to identify the epilogue. */
11513 else if (TARGET_SEH)
11514 restore_regs_via_mov = false;
11515 /* If we're only restoring one register and sp is not valid then
11516 using a move instruction to restore the register since it's
11517 less work than reloading sp and popping the register. */
11518 else if (!m->fs.sp_valid && frame.nregs <= 1)
11519 restore_regs_via_mov = true;
11520 else if (TARGET_EPILOGUE_USING_MOVE
11521 && cfun->machine->use_fast_prologue_epilogue
11522 && (frame.nregs > 1
11523 || m->fs.sp_offset != frame.reg_save_offset))
11524 restore_regs_via_mov = true;
11525 else if (frame_pointer_needed
11526 && !frame.nregs
11527 && m->fs.sp_offset != frame.reg_save_offset)
11528 restore_regs_via_mov = true;
11529 else if (frame_pointer_needed
11530 && TARGET_USE_LEAVE
11531 && cfun->machine->use_fast_prologue_epilogue
11532 && frame.nregs == 1)
11533 restore_regs_via_mov = true;
11534 else
11535 restore_regs_via_mov = false;
11536
11537 if (restore_regs_via_mov || frame.nsseregs)
11538 {
11539 /* Ensure that the entire register save area is addressable via
11540 the stack pointer, if we will restore via sp. */
11541 if (TARGET_64BIT
11542 && m->fs.sp_offset > 0x7fffffff
11543 && !(m->fs.fp_valid || m->fs.drap_valid)
11544 && (frame.nsseregs + frame.nregs) != 0)
11545 {
11546 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11547 GEN_INT (m->fs.sp_offset
11548 - frame.sse_reg_save_offset),
11549 style,
11550 m->fs.cfa_reg == stack_pointer_rtx);
11551 }
11552 }
11553
11554 /* If there are any SSE registers to restore, then we have to do it
11555 via moves, since there's obviously no pop for SSE regs. */
11556 if (frame.nsseregs)
11557 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11558 style == 2);
11559
11560 if (restore_regs_via_mov)
11561 {
11562 rtx t;
11563
11564 if (frame.nregs)
11565 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11566
11567 /* eh_return epilogues need %ecx added to the stack pointer. */
11568 if (style == 2)
11569 {
11570 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11571
11572 /* Stack align doesn't work with eh_return. */
11573 gcc_assert (!stack_realign_drap);
11574 /* Neither does regparm nested functions. */
11575 gcc_assert (!ix86_static_chain_on_stack);
11576
11577 if (frame_pointer_needed)
11578 {
11579 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11580 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11581 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11582
11583 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11584 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11585
11586 /* Note that we use SA as a temporary CFA, as the return
11587 address is at the proper place relative to it. We
11588 pretend this happens at the FP restore insn because
11589 prior to this insn the FP would be stored at the wrong
11590 offset relative to SA, and after this insn we have no
11591 other reasonable register to use for the CFA. We don't
11592 bother resetting the CFA to the SP for the duration of
11593 the return insn. */
11594 add_reg_note (insn, REG_CFA_DEF_CFA,
11595 plus_constant (Pmode, sa, UNITS_PER_WORD));
11596 ix86_add_queued_cfa_restore_notes (insn);
11597 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11598 RTX_FRAME_RELATED_P (insn) = 1;
11599
11600 m->fs.cfa_reg = sa;
11601 m->fs.cfa_offset = UNITS_PER_WORD;
11602 m->fs.fp_valid = false;
11603
11604 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11605 const0_rtx, style, false);
11606 }
11607 else
11608 {
11609 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11610 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11611 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11612 ix86_add_queued_cfa_restore_notes (insn);
11613
11614 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11615 if (m->fs.cfa_offset != UNITS_PER_WORD)
11616 {
11617 m->fs.cfa_offset = UNITS_PER_WORD;
11618 add_reg_note (insn, REG_CFA_DEF_CFA,
11619 plus_constant (Pmode, stack_pointer_rtx,
11620 UNITS_PER_WORD));
11621 RTX_FRAME_RELATED_P (insn) = 1;
11622 }
11623 }
11624 m->fs.sp_offset = UNITS_PER_WORD;
11625 m->fs.sp_valid = true;
11626 }
11627 }
11628 else
11629 {
11630 /* SEH requires that the function end with (1) a stack adjustment
11631 if necessary, (2) a sequence of pops, and (3) a return or
11632 jump instruction. Prevent insns from the function body from
11633 being scheduled into this sequence. */
11634 if (TARGET_SEH)
11635 {
11636 /* Prevent a catch region from being adjacent to the standard
11637 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11638 several other flags that would be interesting to test are
11639 not yet set up. */
11640 if (flag_non_call_exceptions)
11641 emit_insn (gen_nops (const1_rtx));
11642 else
11643 emit_insn (gen_blockage ());
11644 }
11645
11646 /* First step is to deallocate the stack frame so that we can
11647 pop the registers. Also do it on SEH target for very large
11648 frame as the emitted instructions aren't allowed by the ABI in
11649 epilogues. */
11650 if (!m->fs.sp_valid
11651 || (TARGET_SEH
11652 && (m->fs.sp_offset - frame.reg_save_offset
11653 >= SEH_MAX_FRAME_SIZE)))
11654 {
11655 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11656 GEN_INT (m->fs.fp_offset
11657 - frame.reg_save_offset),
11658 style, false);
11659 }
11660 else if (m->fs.sp_offset != frame.reg_save_offset)
11661 {
11662 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11663 GEN_INT (m->fs.sp_offset
11664 - frame.reg_save_offset),
11665 style,
11666 m->fs.cfa_reg == stack_pointer_rtx);
11667 }
11668
11669 ix86_emit_restore_regs_using_pop ();
11670 }
11671
11672 /* If we used a stack pointer and haven't already got rid of it,
11673 then do so now. */
11674 if (m->fs.fp_valid)
11675 {
11676 /* If the stack pointer is valid and pointing at the frame
11677 pointer store address, then we only need a pop. */
11678 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11679 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11680 /* Leave results in shorter dependency chains on CPUs that are
11681 able to grok it fast. */
11682 else if (TARGET_USE_LEAVE
11683 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11684 || !cfun->machine->use_fast_prologue_epilogue)
11685 ix86_emit_leave ();
11686 else
11687 {
11688 pro_epilogue_adjust_stack (stack_pointer_rtx,
11689 hard_frame_pointer_rtx,
11690 const0_rtx, style, !using_drap);
11691 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11692 }
11693 }
11694
11695 if (using_drap)
11696 {
11697 int param_ptr_offset = UNITS_PER_WORD;
11698 rtx insn;
11699
11700 gcc_assert (stack_realign_drap);
11701
11702 if (ix86_static_chain_on_stack)
11703 param_ptr_offset += UNITS_PER_WORD;
11704 if (!call_used_regs[REGNO (crtl->drap_reg)])
11705 param_ptr_offset += UNITS_PER_WORD;
11706
11707 insn = emit_insn (gen_rtx_SET
11708 (VOIDmode, stack_pointer_rtx,
11709 gen_rtx_PLUS (Pmode,
11710 crtl->drap_reg,
11711 GEN_INT (-param_ptr_offset))));
11712 m->fs.cfa_reg = stack_pointer_rtx;
11713 m->fs.cfa_offset = param_ptr_offset;
11714 m->fs.sp_offset = param_ptr_offset;
11715 m->fs.realigned = false;
11716
11717 add_reg_note (insn, REG_CFA_DEF_CFA,
11718 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11719 GEN_INT (param_ptr_offset)));
11720 RTX_FRAME_RELATED_P (insn) = 1;
11721
11722 if (!call_used_regs[REGNO (crtl->drap_reg)])
11723 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11724 }
11725
11726 /* At this point the stack pointer must be valid, and we must have
11727 restored all of the registers. We may not have deallocated the
11728 entire stack frame. We've delayed this until now because it may
11729 be possible to merge the local stack deallocation with the
11730 deallocation forced by ix86_static_chain_on_stack. */
11731 gcc_assert (m->fs.sp_valid);
11732 gcc_assert (!m->fs.fp_valid);
11733 gcc_assert (!m->fs.realigned);
11734 if (m->fs.sp_offset != UNITS_PER_WORD)
11735 {
11736 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11737 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11738 style, true);
11739 }
11740 else
11741 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11742
11743 /* Sibcall epilogues don't want a return instruction. */
11744 if (style == 0)
11745 {
11746 m->fs = frame_state_save;
11747 return;
11748 }
11749
11750 if (crtl->args.pops_args && crtl->args.size)
11751 {
11752 rtx popc = GEN_INT (crtl->args.pops_args);
11753
11754 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11755 address, do explicit add, and jump indirectly to the caller. */
11756
11757 if (crtl->args.pops_args >= 65536)
11758 {
11759 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11760 rtx insn;
11761
11762 /* There is no "pascal" calling convention in any 64bit ABI. */
11763 gcc_assert (!TARGET_64BIT);
11764
11765 insn = emit_insn (gen_pop (ecx));
11766 m->fs.cfa_offset -= UNITS_PER_WORD;
11767 m->fs.sp_offset -= UNITS_PER_WORD;
11768
11769 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11770 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11771 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11772 add_reg_note (insn, REG_CFA_REGISTER,
11773 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11774 RTX_FRAME_RELATED_P (insn) = 1;
11775
11776 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11777 popc, -1, true);
11778 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11779 }
11780 else
11781 emit_jump_insn (gen_simple_return_pop_internal (popc));
11782 }
11783 else
11784 emit_jump_insn (gen_simple_return_internal ());
11785
11786 /* Restore the state back to the state from the prologue,
11787 so that it's correct for the next epilogue. */
11788 m->fs = frame_state_save;
11789 }
11790
11791 /* Reset from the function's potential modifications. */
11792
11793 static void
11794 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11795 {
11796 if (pic_offset_table_rtx)
11797 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11798 #if TARGET_MACHO
11799 /* Mach-O doesn't support labels at the end of objects, so if
11800 it looks like we might want one, insert a NOP. */
11801 {
11802 rtx_insn *insn = get_last_insn ();
11803 rtx_insn *deleted_debug_label = NULL;
11804 while (insn
11805 && NOTE_P (insn)
11806 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11807 {
11808 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11809 notes only, instead set their CODE_LABEL_NUMBER to -1,
11810 otherwise there would be code generation differences
11811 in between -g and -g0. */
11812 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11813 deleted_debug_label = insn;
11814 insn = PREV_INSN (insn);
11815 }
11816 if (insn
11817 && (LABEL_P (insn)
11818 || (NOTE_P (insn)
11819 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11820 fputs ("\tnop\n", file);
11821 else if (deleted_debug_label)
11822 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11823 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11824 CODE_LABEL_NUMBER (insn) = -1;
11825 }
11826 #endif
11827
11828 }
11829
11830 /* Return a scratch register to use in the split stack prologue. The
11831 split stack prologue is used for -fsplit-stack. It is the first
11832 instructions in the function, even before the regular prologue.
11833 The scratch register can be any caller-saved register which is not
11834 used for parameters or for the static chain. */
11835
11836 static unsigned int
11837 split_stack_prologue_scratch_regno (void)
11838 {
11839 if (TARGET_64BIT)
11840 return R11_REG;
11841 else
11842 {
11843 bool is_fastcall, is_thiscall;
11844 int regparm;
11845
11846 is_fastcall = (lookup_attribute ("fastcall",
11847 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11848 != NULL);
11849 is_thiscall = (lookup_attribute ("thiscall",
11850 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11851 != NULL);
11852 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11853
11854 if (is_fastcall)
11855 {
11856 if (DECL_STATIC_CHAIN (cfun->decl))
11857 {
11858 sorry ("-fsplit-stack does not support fastcall with "
11859 "nested function");
11860 return INVALID_REGNUM;
11861 }
11862 return AX_REG;
11863 }
11864 else if (is_thiscall)
11865 {
11866 if (!DECL_STATIC_CHAIN (cfun->decl))
11867 return DX_REG;
11868 return AX_REG;
11869 }
11870 else if (regparm < 3)
11871 {
11872 if (!DECL_STATIC_CHAIN (cfun->decl))
11873 return CX_REG;
11874 else
11875 {
11876 if (regparm >= 2)
11877 {
11878 sorry ("-fsplit-stack does not support 2 register "
11879 "parameters for a nested function");
11880 return INVALID_REGNUM;
11881 }
11882 return DX_REG;
11883 }
11884 }
11885 else
11886 {
11887 /* FIXME: We could make this work by pushing a register
11888 around the addition and comparison. */
11889 sorry ("-fsplit-stack does not support 3 register parameters");
11890 return INVALID_REGNUM;
11891 }
11892 }
11893 }
11894
11895 /* A SYMBOL_REF for the function which allocates new stackspace for
11896 -fsplit-stack. */
11897
11898 static GTY(()) rtx split_stack_fn;
11899
11900 /* A SYMBOL_REF for the more stack function when using the large
11901 model. */
11902
11903 static GTY(()) rtx split_stack_fn_large;
11904
11905 /* Handle -fsplit-stack. These are the first instructions in the
11906 function, even before the regular prologue. */
11907
11908 void
11909 ix86_expand_split_stack_prologue (void)
11910 {
11911 struct ix86_frame frame;
11912 HOST_WIDE_INT allocate;
11913 unsigned HOST_WIDE_INT args_size;
11914 rtx_code_label *label;
11915 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11916 rtx scratch_reg = NULL_RTX;
11917 rtx_code_label *varargs_label = NULL;
11918 rtx fn;
11919
11920 gcc_assert (flag_split_stack && reload_completed);
11921
11922 ix86_finalize_stack_realign_flags ();
11923 ix86_compute_frame_layout (&frame);
11924 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11925
11926 /* This is the label we will branch to if we have enough stack
11927 space. We expect the basic block reordering pass to reverse this
11928 branch if optimizing, so that we branch in the unlikely case. */
11929 label = gen_label_rtx ();
11930
11931 /* We need to compare the stack pointer minus the frame size with
11932 the stack boundary in the TCB. The stack boundary always gives
11933 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11934 can compare directly. Otherwise we need to do an addition. */
11935
11936 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11937 UNSPEC_STACK_CHECK);
11938 limit = gen_rtx_CONST (Pmode, limit);
11939 limit = gen_rtx_MEM (Pmode, limit);
11940 if (allocate < SPLIT_STACK_AVAILABLE)
11941 current = stack_pointer_rtx;
11942 else
11943 {
11944 unsigned int scratch_regno;
11945 rtx offset;
11946
11947 /* We need a scratch register to hold the stack pointer minus
11948 the required frame size. Since this is the very start of the
11949 function, the scratch register can be any caller-saved
11950 register which is not used for parameters. */
11951 offset = GEN_INT (- allocate);
11952 scratch_regno = split_stack_prologue_scratch_regno ();
11953 if (scratch_regno == INVALID_REGNUM)
11954 return;
11955 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11956 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11957 {
11958 /* We don't use ix86_gen_add3 in this case because it will
11959 want to split to lea, but when not optimizing the insn
11960 will not be split after this point. */
11961 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11962 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11963 offset)));
11964 }
11965 else
11966 {
11967 emit_move_insn (scratch_reg, offset);
11968 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11969 stack_pointer_rtx));
11970 }
11971 current = scratch_reg;
11972 }
11973
11974 ix86_expand_branch (GEU, current, limit, label);
11975 jump_insn = get_last_insn ();
11976 JUMP_LABEL (jump_insn) = label;
11977
11978 /* Mark the jump as very likely to be taken. */
11979 add_int_reg_note (jump_insn, REG_BR_PROB,
11980 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11981
11982 if (split_stack_fn == NULL_RTX)
11983 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11984 fn = split_stack_fn;
11985
11986 /* Get more stack space. We pass in the desired stack space and the
11987 size of the arguments to copy to the new stack. In 32-bit mode
11988 we push the parameters; __morestack will return on a new stack
11989 anyhow. In 64-bit mode we pass the parameters in r10 and
11990 r11. */
11991 allocate_rtx = GEN_INT (allocate);
11992 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11993 call_fusage = NULL_RTX;
11994 if (TARGET_64BIT)
11995 {
11996 rtx reg10, reg11;
11997
11998 reg10 = gen_rtx_REG (Pmode, R10_REG);
11999 reg11 = gen_rtx_REG (Pmode, R11_REG);
12000
12001 /* If this function uses a static chain, it will be in %r10.
12002 Preserve it across the call to __morestack. */
12003 if (DECL_STATIC_CHAIN (cfun->decl))
12004 {
12005 rtx rax;
12006
12007 rax = gen_rtx_REG (word_mode, AX_REG);
12008 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12009 use_reg (&call_fusage, rax);
12010 }
12011
12012 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12013 && !TARGET_PECOFF)
12014 {
12015 HOST_WIDE_INT argval;
12016
12017 gcc_assert (Pmode == DImode);
12018 /* When using the large model we need to load the address
12019 into a register, and we've run out of registers. So we
12020 switch to a different calling convention, and we call a
12021 different function: __morestack_large. We pass the
12022 argument size in the upper 32 bits of r10 and pass the
12023 frame size in the lower 32 bits. */
12024 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12025 gcc_assert ((args_size & 0xffffffff) == args_size);
12026
12027 if (split_stack_fn_large == NULL_RTX)
12028 split_stack_fn_large =
12029 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12030
12031 if (ix86_cmodel == CM_LARGE_PIC)
12032 {
12033 rtx_code_label *label;
12034 rtx x;
12035
12036 label = gen_label_rtx ();
12037 emit_label (label);
12038 LABEL_PRESERVE_P (label) = 1;
12039 emit_insn (gen_set_rip_rex64 (reg10, label));
12040 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12041 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12042 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12043 UNSPEC_GOT);
12044 x = gen_rtx_CONST (Pmode, x);
12045 emit_move_insn (reg11, x);
12046 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12047 x = gen_const_mem (Pmode, x);
12048 emit_move_insn (reg11, x);
12049 }
12050 else
12051 emit_move_insn (reg11, split_stack_fn_large);
12052
12053 fn = reg11;
12054
12055 argval = ((args_size << 16) << 16) + allocate;
12056 emit_move_insn (reg10, GEN_INT (argval));
12057 }
12058 else
12059 {
12060 emit_move_insn (reg10, allocate_rtx);
12061 emit_move_insn (reg11, GEN_INT (args_size));
12062 use_reg (&call_fusage, reg11);
12063 }
12064
12065 use_reg (&call_fusage, reg10);
12066 }
12067 else
12068 {
12069 emit_insn (gen_push (GEN_INT (args_size)));
12070 emit_insn (gen_push (allocate_rtx));
12071 }
12072 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12073 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12074 NULL_RTX, false);
12075 add_function_usage_to (call_insn, call_fusage);
12076
12077 /* In order to make call/return prediction work right, we now need
12078 to execute a return instruction. See
12079 libgcc/config/i386/morestack.S for the details on how this works.
12080
12081 For flow purposes gcc must not see this as a return
12082 instruction--we need control flow to continue at the subsequent
12083 label. Therefore, we use an unspec. */
12084 gcc_assert (crtl->args.pops_args < 65536);
12085 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12086
12087 /* If we are in 64-bit mode and this function uses a static chain,
12088 we saved %r10 in %rax before calling _morestack. */
12089 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12090 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12091 gen_rtx_REG (word_mode, AX_REG));
12092
12093 /* If this function calls va_start, we need to store a pointer to
12094 the arguments on the old stack, because they may not have been
12095 all copied to the new stack. At this point the old stack can be
12096 found at the frame pointer value used by __morestack, because
12097 __morestack has set that up before calling back to us. Here we
12098 store that pointer in a scratch register, and in
12099 ix86_expand_prologue we store the scratch register in a stack
12100 slot. */
12101 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12102 {
12103 unsigned int scratch_regno;
12104 rtx frame_reg;
12105 int words;
12106
12107 scratch_regno = split_stack_prologue_scratch_regno ();
12108 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12109 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12110
12111 /* 64-bit:
12112 fp -> old fp value
12113 return address within this function
12114 return address of caller of this function
12115 stack arguments
12116 So we add three words to get to the stack arguments.
12117
12118 32-bit:
12119 fp -> old fp value
12120 return address within this function
12121 first argument to __morestack
12122 second argument to __morestack
12123 return address of caller of this function
12124 stack arguments
12125 So we add five words to get to the stack arguments.
12126 */
12127 words = TARGET_64BIT ? 3 : 5;
12128 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12129 gen_rtx_PLUS (Pmode, frame_reg,
12130 GEN_INT (words * UNITS_PER_WORD))));
12131
12132 varargs_label = gen_label_rtx ();
12133 emit_jump_insn (gen_jump (varargs_label));
12134 JUMP_LABEL (get_last_insn ()) = varargs_label;
12135
12136 emit_barrier ();
12137 }
12138
12139 emit_label (label);
12140 LABEL_NUSES (label) = 1;
12141
12142 /* If this function calls va_start, we now have to set the scratch
12143 register for the case where we do not call __morestack. In this
12144 case we need to set it based on the stack pointer. */
12145 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12146 {
12147 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12148 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12149 GEN_INT (UNITS_PER_WORD))));
12150
12151 emit_label (varargs_label);
12152 LABEL_NUSES (varargs_label) = 1;
12153 }
12154 }
12155
12156 /* We may have to tell the dataflow pass that the split stack prologue
12157 is initializing a scratch register. */
12158
12159 static void
12160 ix86_live_on_entry (bitmap regs)
12161 {
12162 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12163 {
12164 gcc_assert (flag_split_stack);
12165 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12166 }
12167 }
12168 \f
12169 /* Extract the parts of an RTL expression that is a valid memory address
12170 for an instruction. Return 0 if the structure of the address is
12171 grossly off. Return -1 if the address contains ASHIFT, so it is not
12172 strictly valid, but still used for computing length of lea instruction. */
12173
12174 int
12175 ix86_decompose_address (rtx addr, struct ix86_address *out)
12176 {
12177 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12178 rtx base_reg, index_reg;
12179 HOST_WIDE_INT scale = 1;
12180 rtx scale_rtx = NULL_RTX;
12181 rtx tmp;
12182 int retval = 1;
12183 enum ix86_address_seg seg = SEG_DEFAULT;
12184
12185 /* Allow zero-extended SImode addresses,
12186 they will be emitted with addr32 prefix. */
12187 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12188 {
12189 if (GET_CODE (addr) == ZERO_EXTEND
12190 && GET_MODE (XEXP (addr, 0)) == SImode)
12191 {
12192 addr = XEXP (addr, 0);
12193 if (CONST_INT_P (addr))
12194 return 0;
12195 }
12196 else if (GET_CODE (addr) == AND
12197 && const_32bit_mask (XEXP (addr, 1), DImode))
12198 {
12199 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12200 if (addr == NULL_RTX)
12201 return 0;
12202
12203 if (CONST_INT_P (addr))
12204 return 0;
12205 }
12206 }
12207
12208 /* Allow SImode subregs of DImode addresses,
12209 they will be emitted with addr32 prefix. */
12210 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12211 {
12212 if (GET_CODE (addr) == SUBREG
12213 && GET_MODE (SUBREG_REG (addr)) == DImode)
12214 {
12215 addr = SUBREG_REG (addr);
12216 if (CONST_INT_P (addr))
12217 return 0;
12218 }
12219 }
12220
12221 if (REG_P (addr))
12222 base = addr;
12223 else if (GET_CODE (addr) == SUBREG)
12224 {
12225 if (REG_P (SUBREG_REG (addr)))
12226 base = addr;
12227 else
12228 return 0;
12229 }
12230 else if (GET_CODE (addr) == PLUS)
12231 {
12232 rtx addends[4], op;
12233 int n = 0, i;
12234
12235 op = addr;
12236 do
12237 {
12238 if (n >= 4)
12239 return 0;
12240 addends[n++] = XEXP (op, 1);
12241 op = XEXP (op, 0);
12242 }
12243 while (GET_CODE (op) == PLUS);
12244 if (n >= 4)
12245 return 0;
12246 addends[n] = op;
12247
12248 for (i = n; i >= 0; --i)
12249 {
12250 op = addends[i];
12251 switch (GET_CODE (op))
12252 {
12253 case MULT:
12254 if (index)
12255 return 0;
12256 index = XEXP (op, 0);
12257 scale_rtx = XEXP (op, 1);
12258 break;
12259
12260 case ASHIFT:
12261 if (index)
12262 return 0;
12263 index = XEXP (op, 0);
12264 tmp = XEXP (op, 1);
12265 if (!CONST_INT_P (tmp))
12266 return 0;
12267 scale = INTVAL (tmp);
12268 if ((unsigned HOST_WIDE_INT) scale > 3)
12269 return 0;
12270 scale = 1 << scale;
12271 break;
12272
12273 case ZERO_EXTEND:
12274 op = XEXP (op, 0);
12275 if (GET_CODE (op) != UNSPEC)
12276 return 0;
12277 /* FALLTHRU */
12278
12279 case UNSPEC:
12280 if (XINT (op, 1) == UNSPEC_TP
12281 && TARGET_TLS_DIRECT_SEG_REFS
12282 && seg == SEG_DEFAULT)
12283 seg = DEFAULT_TLS_SEG_REG;
12284 else
12285 return 0;
12286 break;
12287
12288 case SUBREG:
12289 if (!REG_P (SUBREG_REG (op)))
12290 return 0;
12291 /* FALLTHRU */
12292
12293 case REG:
12294 if (!base)
12295 base = op;
12296 else if (!index)
12297 index = op;
12298 else
12299 return 0;
12300 break;
12301
12302 case CONST:
12303 case CONST_INT:
12304 case SYMBOL_REF:
12305 case LABEL_REF:
12306 if (disp)
12307 return 0;
12308 disp = op;
12309 break;
12310
12311 default:
12312 return 0;
12313 }
12314 }
12315 }
12316 else if (GET_CODE (addr) == MULT)
12317 {
12318 index = XEXP (addr, 0); /* index*scale */
12319 scale_rtx = XEXP (addr, 1);
12320 }
12321 else if (GET_CODE (addr) == ASHIFT)
12322 {
12323 /* We're called for lea too, which implements ashift on occasion. */
12324 index = XEXP (addr, 0);
12325 tmp = XEXP (addr, 1);
12326 if (!CONST_INT_P (tmp))
12327 return 0;
12328 scale = INTVAL (tmp);
12329 if ((unsigned HOST_WIDE_INT) scale > 3)
12330 return 0;
12331 scale = 1 << scale;
12332 retval = -1;
12333 }
12334 else
12335 disp = addr; /* displacement */
12336
12337 if (index)
12338 {
12339 if (REG_P (index))
12340 ;
12341 else if (GET_CODE (index) == SUBREG
12342 && REG_P (SUBREG_REG (index)))
12343 ;
12344 else
12345 return 0;
12346 }
12347
12348 /* Extract the integral value of scale. */
12349 if (scale_rtx)
12350 {
12351 if (!CONST_INT_P (scale_rtx))
12352 return 0;
12353 scale = INTVAL (scale_rtx);
12354 }
12355
12356 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12357 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12358
12359 /* Avoid useless 0 displacement. */
12360 if (disp == const0_rtx && (base || index))
12361 disp = NULL_RTX;
12362
12363 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12364 if (base_reg && index_reg && scale == 1
12365 && (index_reg == arg_pointer_rtx
12366 || index_reg == frame_pointer_rtx
12367 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12368 {
12369 rtx tmp;
12370 tmp = base, base = index, index = tmp;
12371 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12372 }
12373
12374 /* Special case: %ebp cannot be encoded as a base without a displacement.
12375 Similarly %r13. */
12376 if (!disp
12377 && base_reg
12378 && (base_reg == hard_frame_pointer_rtx
12379 || base_reg == frame_pointer_rtx
12380 || base_reg == arg_pointer_rtx
12381 || (REG_P (base_reg)
12382 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12383 || REGNO (base_reg) == R13_REG))))
12384 disp = const0_rtx;
12385
12386 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12387 Avoid this by transforming to [%esi+0].
12388 Reload calls address legitimization without cfun defined, so we need
12389 to test cfun for being non-NULL. */
12390 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12391 && base_reg && !index_reg && !disp
12392 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12393 disp = const0_rtx;
12394
12395 /* Special case: encode reg+reg instead of reg*2. */
12396 if (!base && index && scale == 2)
12397 base = index, base_reg = index_reg, scale = 1;
12398
12399 /* Special case: scaling cannot be encoded without base or displacement. */
12400 if (!base && !disp && index && scale != 1)
12401 disp = const0_rtx;
12402
12403 out->base = base;
12404 out->index = index;
12405 out->disp = disp;
12406 out->scale = scale;
12407 out->seg = seg;
12408
12409 return retval;
12410 }
12411 \f
12412 /* Return cost of the memory address x.
12413 For i386, it is better to use a complex address than let gcc copy
12414 the address into a reg and make a new pseudo. But not if the address
12415 requires to two regs - that would mean more pseudos with longer
12416 lifetimes. */
12417 static int
12418 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12419 {
12420 struct ix86_address parts;
12421 int cost = 1;
12422 int ok = ix86_decompose_address (x, &parts);
12423
12424 gcc_assert (ok);
12425
12426 if (parts.base && GET_CODE (parts.base) == SUBREG)
12427 parts.base = SUBREG_REG (parts.base);
12428 if (parts.index && GET_CODE (parts.index) == SUBREG)
12429 parts.index = SUBREG_REG (parts.index);
12430
12431 /* Attempt to minimize number of registers in the address. */
12432 if ((parts.base
12433 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12434 || (parts.index
12435 && (!REG_P (parts.index)
12436 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12437 cost++;
12438
12439 if (parts.base
12440 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12441 && parts.index
12442 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12443 && parts.base != parts.index)
12444 cost++;
12445
12446 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12447 since it's predecode logic can't detect the length of instructions
12448 and it degenerates to vector decoded. Increase cost of such
12449 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12450 to split such addresses or even refuse such addresses at all.
12451
12452 Following addressing modes are affected:
12453 [base+scale*index]
12454 [scale*index+disp]
12455 [base+index]
12456
12457 The first and last case may be avoidable by explicitly coding the zero in
12458 memory address, but I don't have AMD-K6 machine handy to check this
12459 theory. */
12460
12461 if (TARGET_K6
12462 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12463 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12464 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12465 cost += 10;
12466
12467 return cost;
12468 }
12469 \f
12470 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12471 this is used for to form addresses to local data when -fPIC is in
12472 use. */
12473
12474 static bool
12475 darwin_local_data_pic (rtx disp)
12476 {
12477 return (GET_CODE (disp) == UNSPEC
12478 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12479 }
12480
12481 /* Determine if a given RTX is a valid constant. We already know this
12482 satisfies CONSTANT_P. */
12483
12484 static bool
12485 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12486 {
12487 switch (GET_CODE (x))
12488 {
12489 case CONST:
12490 x = XEXP (x, 0);
12491
12492 if (GET_CODE (x) == PLUS)
12493 {
12494 if (!CONST_INT_P (XEXP (x, 1)))
12495 return false;
12496 x = XEXP (x, 0);
12497 }
12498
12499 if (TARGET_MACHO && darwin_local_data_pic (x))
12500 return true;
12501
12502 /* Only some unspecs are valid as "constants". */
12503 if (GET_CODE (x) == UNSPEC)
12504 switch (XINT (x, 1))
12505 {
12506 case UNSPEC_GOT:
12507 case UNSPEC_GOTOFF:
12508 case UNSPEC_PLTOFF:
12509 return TARGET_64BIT;
12510 case UNSPEC_TPOFF:
12511 case UNSPEC_NTPOFF:
12512 x = XVECEXP (x, 0, 0);
12513 return (GET_CODE (x) == SYMBOL_REF
12514 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12515 case UNSPEC_DTPOFF:
12516 x = XVECEXP (x, 0, 0);
12517 return (GET_CODE (x) == SYMBOL_REF
12518 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12519 default:
12520 return false;
12521 }
12522
12523 /* We must have drilled down to a symbol. */
12524 if (GET_CODE (x) == LABEL_REF)
12525 return true;
12526 if (GET_CODE (x) != SYMBOL_REF)
12527 return false;
12528 /* FALLTHRU */
12529
12530 case SYMBOL_REF:
12531 /* TLS symbols are never valid. */
12532 if (SYMBOL_REF_TLS_MODEL (x))
12533 return false;
12534
12535 /* DLLIMPORT symbols are never valid. */
12536 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12537 && SYMBOL_REF_DLLIMPORT_P (x))
12538 return false;
12539
12540 #if TARGET_MACHO
12541 /* mdynamic-no-pic */
12542 if (MACHO_DYNAMIC_NO_PIC_P)
12543 return machopic_symbol_defined_p (x);
12544 #endif
12545 break;
12546
12547 case CONST_DOUBLE:
12548 if (GET_MODE (x) == TImode
12549 && x != CONST0_RTX (TImode)
12550 && !TARGET_64BIT)
12551 return false;
12552 break;
12553
12554 case CONST_VECTOR:
12555 if (!standard_sse_constant_p (x))
12556 return false;
12557
12558 default:
12559 break;
12560 }
12561
12562 /* Otherwise we handle everything else in the move patterns. */
12563 return true;
12564 }
12565
12566 /* Determine if it's legal to put X into the constant pool. This
12567 is not possible for the address of thread-local symbols, which
12568 is checked above. */
12569
12570 static bool
12571 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12572 {
12573 /* We can always put integral constants and vectors in memory. */
12574 switch (GET_CODE (x))
12575 {
12576 case CONST_INT:
12577 case CONST_DOUBLE:
12578 case CONST_VECTOR:
12579 return false;
12580
12581 default:
12582 break;
12583 }
12584 return !ix86_legitimate_constant_p (mode, x);
12585 }
12586
12587 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12588 otherwise zero. */
12589
12590 static bool
12591 is_imported_p (rtx x)
12592 {
12593 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12594 || GET_CODE (x) != SYMBOL_REF)
12595 return false;
12596
12597 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12598 }
12599
12600
12601 /* Nonzero if the constant value X is a legitimate general operand
12602 when generating PIC code. It is given that flag_pic is on and
12603 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12604
12605 bool
12606 legitimate_pic_operand_p (rtx x)
12607 {
12608 rtx inner;
12609
12610 switch (GET_CODE (x))
12611 {
12612 case CONST:
12613 inner = XEXP (x, 0);
12614 if (GET_CODE (inner) == PLUS
12615 && CONST_INT_P (XEXP (inner, 1)))
12616 inner = XEXP (inner, 0);
12617
12618 /* Only some unspecs are valid as "constants". */
12619 if (GET_CODE (inner) == UNSPEC)
12620 switch (XINT (inner, 1))
12621 {
12622 case UNSPEC_GOT:
12623 case UNSPEC_GOTOFF:
12624 case UNSPEC_PLTOFF:
12625 return TARGET_64BIT;
12626 case UNSPEC_TPOFF:
12627 x = XVECEXP (inner, 0, 0);
12628 return (GET_CODE (x) == SYMBOL_REF
12629 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12630 case UNSPEC_MACHOPIC_OFFSET:
12631 return legitimate_pic_address_disp_p (x);
12632 default:
12633 return false;
12634 }
12635 /* FALLTHRU */
12636
12637 case SYMBOL_REF:
12638 case LABEL_REF:
12639 return legitimate_pic_address_disp_p (x);
12640
12641 default:
12642 return true;
12643 }
12644 }
12645
12646 /* Determine if a given CONST RTX is a valid memory displacement
12647 in PIC mode. */
12648
12649 bool
12650 legitimate_pic_address_disp_p (rtx disp)
12651 {
12652 bool saw_plus;
12653
12654 /* In 64bit mode we can allow direct addresses of symbols and labels
12655 when they are not dynamic symbols. */
12656 if (TARGET_64BIT)
12657 {
12658 rtx op0 = disp, op1;
12659
12660 switch (GET_CODE (disp))
12661 {
12662 case LABEL_REF:
12663 return true;
12664
12665 case CONST:
12666 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12667 break;
12668 op0 = XEXP (XEXP (disp, 0), 0);
12669 op1 = XEXP (XEXP (disp, 0), 1);
12670 if (!CONST_INT_P (op1)
12671 || INTVAL (op1) >= 16*1024*1024
12672 || INTVAL (op1) < -16*1024*1024)
12673 break;
12674 if (GET_CODE (op0) == LABEL_REF)
12675 return true;
12676 if (GET_CODE (op0) == CONST
12677 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12678 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12679 return true;
12680 if (GET_CODE (op0) == UNSPEC
12681 && XINT (op0, 1) == UNSPEC_PCREL)
12682 return true;
12683 if (GET_CODE (op0) != SYMBOL_REF)
12684 break;
12685 /* FALLTHRU */
12686
12687 case SYMBOL_REF:
12688 /* TLS references should always be enclosed in UNSPEC.
12689 The dllimported symbol needs always to be resolved. */
12690 if (SYMBOL_REF_TLS_MODEL (op0)
12691 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12692 return false;
12693
12694 if (TARGET_PECOFF)
12695 {
12696 if (is_imported_p (op0))
12697 return true;
12698
12699 if (SYMBOL_REF_FAR_ADDR_P (op0)
12700 || !SYMBOL_REF_LOCAL_P (op0))
12701 break;
12702
12703 /* Function-symbols need to be resolved only for
12704 large-model.
12705 For the small-model we don't need to resolve anything
12706 here. */
12707 if ((ix86_cmodel != CM_LARGE_PIC
12708 && SYMBOL_REF_FUNCTION_P (op0))
12709 || ix86_cmodel == CM_SMALL_PIC)
12710 return true;
12711 /* Non-external symbols don't need to be resolved for
12712 large, and medium-model. */
12713 if ((ix86_cmodel == CM_LARGE_PIC
12714 || ix86_cmodel == CM_MEDIUM_PIC)
12715 && !SYMBOL_REF_EXTERNAL_P (op0))
12716 return true;
12717 }
12718 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12719 && SYMBOL_REF_LOCAL_P (op0)
12720 && ix86_cmodel != CM_LARGE_PIC)
12721 return true;
12722 break;
12723
12724 default:
12725 break;
12726 }
12727 }
12728 if (GET_CODE (disp) != CONST)
12729 return false;
12730 disp = XEXP (disp, 0);
12731
12732 if (TARGET_64BIT)
12733 {
12734 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12735 of GOT tables. We should not need these anyway. */
12736 if (GET_CODE (disp) != UNSPEC
12737 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12738 && XINT (disp, 1) != UNSPEC_GOTOFF
12739 && XINT (disp, 1) != UNSPEC_PCREL
12740 && XINT (disp, 1) != UNSPEC_PLTOFF))
12741 return false;
12742
12743 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12744 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12745 return false;
12746 return true;
12747 }
12748
12749 saw_plus = false;
12750 if (GET_CODE (disp) == PLUS)
12751 {
12752 if (!CONST_INT_P (XEXP (disp, 1)))
12753 return false;
12754 disp = XEXP (disp, 0);
12755 saw_plus = true;
12756 }
12757
12758 if (TARGET_MACHO && darwin_local_data_pic (disp))
12759 return true;
12760
12761 if (GET_CODE (disp) != UNSPEC)
12762 return false;
12763
12764 switch (XINT (disp, 1))
12765 {
12766 case UNSPEC_GOT:
12767 if (saw_plus)
12768 return false;
12769 /* We need to check for both symbols and labels because VxWorks loads
12770 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12771 details. */
12772 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12773 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12774 case UNSPEC_GOTOFF:
12775 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12776 While ABI specify also 32bit relocation but we don't produce it in
12777 small PIC model at all. */
12778 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12779 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12780 && !TARGET_64BIT)
12781 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12782 return false;
12783 case UNSPEC_GOTTPOFF:
12784 case UNSPEC_GOTNTPOFF:
12785 case UNSPEC_INDNTPOFF:
12786 if (saw_plus)
12787 return false;
12788 disp = XVECEXP (disp, 0, 0);
12789 return (GET_CODE (disp) == SYMBOL_REF
12790 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12791 case UNSPEC_NTPOFF:
12792 disp = XVECEXP (disp, 0, 0);
12793 return (GET_CODE (disp) == SYMBOL_REF
12794 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12795 case UNSPEC_DTPOFF:
12796 disp = XVECEXP (disp, 0, 0);
12797 return (GET_CODE (disp) == SYMBOL_REF
12798 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12799 }
12800
12801 return false;
12802 }
12803
12804 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12805 replace the input X, or the original X if no replacement is called for.
12806 The output parameter *WIN is 1 if the calling macro should goto WIN,
12807 0 if it should not. */
12808
12809 bool
12810 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12811 int)
12812 {
12813 /* Reload can generate:
12814
12815 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12816 (reg:DI 97))
12817 (reg:DI 2 cx))
12818
12819 This RTX is rejected from ix86_legitimate_address_p due to
12820 non-strictness of base register 97. Following this rejection,
12821 reload pushes all three components into separate registers,
12822 creating invalid memory address RTX.
12823
12824 Following code reloads only the invalid part of the
12825 memory address RTX. */
12826
12827 if (GET_CODE (x) == PLUS
12828 && REG_P (XEXP (x, 1))
12829 && GET_CODE (XEXP (x, 0)) == PLUS
12830 && REG_P (XEXP (XEXP (x, 0), 1)))
12831 {
12832 rtx base, index;
12833 bool something_reloaded = false;
12834
12835 base = XEXP (XEXP (x, 0), 1);
12836 if (!REG_OK_FOR_BASE_STRICT_P (base))
12837 {
12838 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12839 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12840 opnum, (enum reload_type) type);
12841 something_reloaded = true;
12842 }
12843
12844 index = XEXP (x, 1);
12845 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12846 {
12847 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12848 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12849 opnum, (enum reload_type) type);
12850 something_reloaded = true;
12851 }
12852
12853 gcc_assert (something_reloaded);
12854 return true;
12855 }
12856
12857 return false;
12858 }
12859
12860 /* Determine if op is suitable RTX for an address register.
12861 Return naked register if a register or a register subreg is
12862 found, otherwise return NULL_RTX. */
12863
12864 static rtx
12865 ix86_validate_address_register (rtx op)
12866 {
12867 enum machine_mode mode = GET_MODE (op);
12868
12869 /* Only SImode or DImode registers can form the address. */
12870 if (mode != SImode && mode != DImode)
12871 return NULL_RTX;
12872
12873 if (REG_P (op))
12874 return op;
12875 else if (GET_CODE (op) == SUBREG)
12876 {
12877 rtx reg = SUBREG_REG (op);
12878
12879 if (!REG_P (reg))
12880 return NULL_RTX;
12881
12882 mode = GET_MODE (reg);
12883
12884 /* Don't allow SUBREGs that span more than a word. It can
12885 lead to spill failures when the register is one word out
12886 of a two word structure. */
12887 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12888 return NULL_RTX;
12889
12890 /* Allow only SUBREGs of non-eliminable hard registers. */
12891 if (register_no_elim_operand (reg, mode))
12892 return reg;
12893 }
12894
12895 /* Op is not a register. */
12896 return NULL_RTX;
12897 }
12898
12899 /* Recognizes RTL expressions that are valid memory addresses for an
12900 instruction. The MODE argument is the machine mode for the MEM
12901 expression that wants to use this address.
12902
12903 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12904 convert common non-canonical forms to canonical form so that they will
12905 be recognized. */
12906
12907 static bool
12908 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12909 {
12910 struct ix86_address parts;
12911 rtx base, index, disp;
12912 HOST_WIDE_INT scale;
12913 enum ix86_address_seg seg;
12914
12915 if (ix86_decompose_address (addr, &parts) <= 0)
12916 /* Decomposition failed. */
12917 return false;
12918
12919 base = parts.base;
12920 index = parts.index;
12921 disp = parts.disp;
12922 scale = parts.scale;
12923 seg = parts.seg;
12924
12925 /* Validate base register. */
12926 if (base)
12927 {
12928 rtx reg = ix86_validate_address_register (base);
12929
12930 if (reg == NULL_RTX)
12931 return false;
12932
12933 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12934 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12935 /* Base is not valid. */
12936 return false;
12937 }
12938
12939 /* Validate index register. */
12940 if (index)
12941 {
12942 rtx reg = ix86_validate_address_register (index);
12943
12944 if (reg == NULL_RTX)
12945 return false;
12946
12947 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12948 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12949 /* Index is not valid. */
12950 return false;
12951 }
12952
12953 /* Index and base should have the same mode. */
12954 if (base && index
12955 && GET_MODE (base) != GET_MODE (index))
12956 return false;
12957
12958 /* Address override works only on the (%reg) part of %fs:(%reg). */
12959 if (seg != SEG_DEFAULT
12960 && ((base && GET_MODE (base) != word_mode)
12961 || (index && GET_MODE (index) != word_mode)))
12962 return false;
12963
12964 /* Validate scale factor. */
12965 if (scale != 1)
12966 {
12967 if (!index)
12968 /* Scale without index. */
12969 return false;
12970
12971 if (scale != 2 && scale != 4 && scale != 8)
12972 /* Scale is not a valid multiplier. */
12973 return false;
12974 }
12975
12976 /* Validate displacement. */
12977 if (disp)
12978 {
12979 if (GET_CODE (disp) == CONST
12980 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12981 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12982 switch (XINT (XEXP (disp, 0), 1))
12983 {
12984 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12985 used. While ABI specify also 32bit relocations, we don't produce
12986 them at all and use IP relative instead. */
12987 case UNSPEC_GOT:
12988 case UNSPEC_GOTOFF:
12989 gcc_assert (flag_pic);
12990 if (!TARGET_64BIT)
12991 goto is_legitimate_pic;
12992
12993 /* 64bit address unspec. */
12994 return false;
12995
12996 case UNSPEC_GOTPCREL:
12997 case UNSPEC_PCREL:
12998 gcc_assert (flag_pic);
12999 goto is_legitimate_pic;
13000
13001 case UNSPEC_GOTTPOFF:
13002 case UNSPEC_GOTNTPOFF:
13003 case UNSPEC_INDNTPOFF:
13004 case UNSPEC_NTPOFF:
13005 case UNSPEC_DTPOFF:
13006 break;
13007
13008 case UNSPEC_STACK_CHECK:
13009 gcc_assert (flag_split_stack);
13010 break;
13011
13012 default:
13013 /* Invalid address unspec. */
13014 return false;
13015 }
13016
13017 else if (SYMBOLIC_CONST (disp)
13018 && (flag_pic
13019 || (TARGET_MACHO
13020 #if TARGET_MACHO
13021 && MACHOPIC_INDIRECT
13022 && !machopic_operand_p (disp)
13023 #endif
13024 )))
13025 {
13026
13027 is_legitimate_pic:
13028 if (TARGET_64BIT && (index || base))
13029 {
13030 /* foo@dtpoff(%rX) is ok. */
13031 if (GET_CODE (disp) != CONST
13032 || GET_CODE (XEXP (disp, 0)) != PLUS
13033 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13034 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13035 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13036 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13037 /* Non-constant pic memory reference. */
13038 return false;
13039 }
13040 else if ((!TARGET_MACHO || flag_pic)
13041 && ! legitimate_pic_address_disp_p (disp))
13042 /* Displacement is an invalid pic construct. */
13043 return false;
13044 #if TARGET_MACHO
13045 else if (MACHO_DYNAMIC_NO_PIC_P
13046 && !ix86_legitimate_constant_p (Pmode, disp))
13047 /* displacment must be referenced via non_lazy_pointer */
13048 return false;
13049 #endif
13050
13051 /* This code used to verify that a symbolic pic displacement
13052 includes the pic_offset_table_rtx register.
13053
13054 While this is good idea, unfortunately these constructs may
13055 be created by "adds using lea" optimization for incorrect
13056 code like:
13057
13058 int a;
13059 int foo(int i)
13060 {
13061 return *(&a+i);
13062 }
13063
13064 This code is nonsensical, but results in addressing
13065 GOT table with pic_offset_table_rtx base. We can't
13066 just refuse it easily, since it gets matched by
13067 "addsi3" pattern, that later gets split to lea in the
13068 case output register differs from input. While this
13069 can be handled by separate addsi pattern for this case
13070 that never results in lea, this seems to be easier and
13071 correct fix for crash to disable this test. */
13072 }
13073 else if (GET_CODE (disp) != LABEL_REF
13074 && !CONST_INT_P (disp)
13075 && (GET_CODE (disp) != CONST
13076 || !ix86_legitimate_constant_p (Pmode, disp))
13077 && (GET_CODE (disp) != SYMBOL_REF
13078 || !ix86_legitimate_constant_p (Pmode, disp)))
13079 /* Displacement is not constant. */
13080 return false;
13081 else if (TARGET_64BIT
13082 && !x86_64_immediate_operand (disp, VOIDmode))
13083 /* Displacement is out of range. */
13084 return false;
13085 /* In x32 mode, constant addresses are sign extended to 64bit, so
13086 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13087 else if (TARGET_X32 && !(index || base)
13088 && CONST_INT_P (disp)
13089 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13090 return false;
13091 }
13092
13093 /* Everything looks valid. */
13094 return true;
13095 }
13096
13097 /* Determine if a given RTX is a valid constant address. */
13098
13099 bool
13100 constant_address_p (rtx x)
13101 {
13102 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13103 }
13104 \f
13105 /* Return a unique alias set for the GOT. */
13106
13107 static alias_set_type
13108 ix86_GOT_alias_set (void)
13109 {
13110 static alias_set_type set = -1;
13111 if (set == -1)
13112 set = new_alias_set ();
13113 return set;
13114 }
13115
13116 /* Return a legitimate reference for ORIG (an address) using the
13117 register REG. If REG is 0, a new pseudo is generated.
13118
13119 There are two types of references that must be handled:
13120
13121 1. Global data references must load the address from the GOT, via
13122 the PIC reg. An insn is emitted to do this load, and the reg is
13123 returned.
13124
13125 2. Static data references, constant pool addresses, and code labels
13126 compute the address as an offset from the GOT, whose base is in
13127 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13128 differentiate them from global data objects. The returned
13129 address is the PIC reg + an unspec constant.
13130
13131 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13132 reg also appears in the address. */
13133
13134 static rtx
13135 legitimize_pic_address (rtx orig, rtx reg)
13136 {
13137 rtx addr = orig;
13138 rtx new_rtx = orig;
13139
13140 #if TARGET_MACHO
13141 if (TARGET_MACHO && !TARGET_64BIT)
13142 {
13143 if (reg == 0)
13144 reg = gen_reg_rtx (Pmode);
13145 /* Use the generic Mach-O PIC machinery. */
13146 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13147 }
13148 #endif
13149
13150 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13151 {
13152 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13153 if (tmp)
13154 return tmp;
13155 }
13156
13157 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13158 new_rtx = addr;
13159 else if (TARGET_64BIT && !TARGET_PECOFF
13160 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13161 {
13162 rtx tmpreg;
13163 /* This symbol may be referenced via a displacement from the PIC
13164 base address (@GOTOFF). */
13165
13166 if (reload_in_progress)
13167 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13168 if (GET_CODE (addr) == CONST)
13169 addr = XEXP (addr, 0);
13170 if (GET_CODE (addr) == PLUS)
13171 {
13172 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13173 UNSPEC_GOTOFF);
13174 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13175 }
13176 else
13177 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13178 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13179 if (!reg)
13180 tmpreg = gen_reg_rtx (Pmode);
13181 else
13182 tmpreg = reg;
13183 emit_move_insn (tmpreg, new_rtx);
13184
13185 if (reg != 0)
13186 {
13187 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13188 tmpreg, 1, OPTAB_DIRECT);
13189 new_rtx = reg;
13190 }
13191 else
13192 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13193 }
13194 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13195 {
13196 /* This symbol may be referenced via a displacement from the PIC
13197 base address (@GOTOFF). */
13198
13199 if (reload_in_progress)
13200 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13201 if (GET_CODE (addr) == CONST)
13202 addr = XEXP (addr, 0);
13203 if (GET_CODE (addr) == PLUS)
13204 {
13205 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13206 UNSPEC_GOTOFF);
13207 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13208 }
13209 else
13210 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13211 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13212 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13213
13214 if (reg != 0)
13215 {
13216 emit_move_insn (reg, new_rtx);
13217 new_rtx = reg;
13218 }
13219 }
13220 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13221 /* We can't use @GOTOFF for text labels on VxWorks;
13222 see gotoff_operand. */
13223 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13224 {
13225 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13226 if (tmp)
13227 return tmp;
13228
13229 /* For x64 PE-COFF there is no GOT table. So we use address
13230 directly. */
13231 if (TARGET_64BIT && TARGET_PECOFF)
13232 {
13233 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13234 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13235
13236 if (reg == 0)
13237 reg = gen_reg_rtx (Pmode);
13238 emit_move_insn (reg, new_rtx);
13239 new_rtx = reg;
13240 }
13241 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13242 {
13243 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13244 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13245 new_rtx = gen_const_mem (Pmode, new_rtx);
13246 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13247
13248 if (reg == 0)
13249 reg = gen_reg_rtx (Pmode);
13250 /* Use directly gen_movsi, otherwise the address is loaded
13251 into register for CSE. We don't want to CSE this addresses,
13252 instead we CSE addresses from the GOT table, so skip this. */
13253 emit_insn (gen_movsi (reg, new_rtx));
13254 new_rtx = reg;
13255 }
13256 else
13257 {
13258 /* This symbol must be referenced via a load from the
13259 Global Offset Table (@GOT). */
13260
13261 if (reload_in_progress)
13262 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13263 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13264 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13265 if (TARGET_64BIT)
13266 new_rtx = force_reg (Pmode, new_rtx);
13267 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13268 new_rtx = gen_const_mem (Pmode, new_rtx);
13269 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13270
13271 if (reg == 0)
13272 reg = gen_reg_rtx (Pmode);
13273 emit_move_insn (reg, new_rtx);
13274 new_rtx = reg;
13275 }
13276 }
13277 else
13278 {
13279 if (CONST_INT_P (addr)
13280 && !x86_64_immediate_operand (addr, VOIDmode))
13281 {
13282 if (reg)
13283 {
13284 emit_move_insn (reg, addr);
13285 new_rtx = reg;
13286 }
13287 else
13288 new_rtx = force_reg (Pmode, addr);
13289 }
13290 else if (GET_CODE (addr) == CONST)
13291 {
13292 addr = XEXP (addr, 0);
13293
13294 /* We must match stuff we generate before. Assume the only
13295 unspecs that can get here are ours. Not that we could do
13296 anything with them anyway.... */
13297 if (GET_CODE (addr) == UNSPEC
13298 || (GET_CODE (addr) == PLUS
13299 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13300 return orig;
13301 gcc_assert (GET_CODE (addr) == PLUS);
13302 }
13303 if (GET_CODE (addr) == PLUS)
13304 {
13305 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13306
13307 /* Check first to see if this is a constant offset from a @GOTOFF
13308 symbol reference. */
13309 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13310 && CONST_INT_P (op1))
13311 {
13312 if (!TARGET_64BIT)
13313 {
13314 if (reload_in_progress)
13315 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13316 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13317 UNSPEC_GOTOFF);
13318 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13319 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13320 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13321
13322 if (reg != 0)
13323 {
13324 emit_move_insn (reg, new_rtx);
13325 new_rtx = reg;
13326 }
13327 }
13328 else
13329 {
13330 if (INTVAL (op1) < -16*1024*1024
13331 || INTVAL (op1) >= 16*1024*1024)
13332 {
13333 if (!x86_64_immediate_operand (op1, Pmode))
13334 op1 = force_reg (Pmode, op1);
13335 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13336 }
13337 }
13338 }
13339 else
13340 {
13341 rtx base = legitimize_pic_address (op0, reg);
13342 enum machine_mode mode = GET_MODE (base);
13343 new_rtx
13344 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13345
13346 if (CONST_INT_P (new_rtx))
13347 {
13348 if (INTVAL (new_rtx) < -16*1024*1024
13349 || INTVAL (new_rtx) >= 16*1024*1024)
13350 {
13351 if (!x86_64_immediate_operand (new_rtx, mode))
13352 new_rtx = force_reg (mode, new_rtx);
13353 new_rtx
13354 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13355 }
13356 else
13357 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13358 }
13359 else
13360 {
13361 if (GET_CODE (new_rtx) == PLUS
13362 && CONSTANT_P (XEXP (new_rtx, 1)))
13363 {
13364 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13365 new_rtx = XEXP (new_rtx, 1);
13366 }
13367 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13368 }
13369 }
13370 }
13371 }
13372 return new_rtx;
13373 }
13374 \f
13375 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13376
13377 static rtx
13378 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13379 {
13380 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13381
13382 if (GET_MODE (tp) != tp_mode)
13383 {
13384 gcc_assert (GET_MODE (tp) == SImode);
13385 gcc_assert (tp_mode == DImode);
13386
13387 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13388 }
13389
13390 if (to_reg)
13391 tp = copy_to_mode_reg (tp_mode, tp);
13392
13393 return tp;
13394 }
13395
13396 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13397
13398 static GTY(()) rtx ix86_tls_symbol;
13399
13400 static rtx
13401 ix86_tls_get_addr (void)
13402 {
13403 if (!ix86_tls_symbol)
13404 {
13405 const char *sym
13406 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13407 ? "___tls_get_addr" : "__tls_get_addr");
13408
13409 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13410 }
13411
13412 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13413 {
13414 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13415 UNSPEC_PLTOFF);
13416 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13417 gen_rtx_CONST (Pmode, unspec));
13418 }
13419
13420 return ix86_tls_symbol;
13421 }
13422
13423 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13424
13425 static GTY(()) rtx ix86_tls_module_base_symbol;
13426
13427 rtx
13428 ix86_tls_module_base (void)
13429 {
13430 if (!ix86_tls_module_base_symbol)
13431 {
13432 ix86_tls_module_base_symbol
13433 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13434
13435 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13436 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13437 }
13438
13439 return ix86_tls_module_base_symbol;
13440 }
13441
13442 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13443 false if we expect this to be used for a memory address and true if
13444 we expect to load the address into a register. */
13445
13446 static rtx
13447 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13448 {
13449 rtx dest, base, off;
13450 rtx pic = NULL_RTX, tp = NULL_RTX;
13451 enum machine_mode tp_mode = Pmode;
13452 int type;
13453
13454 /* Fall back to global dynamic model if tool chain cannot support local
13455 dynamic. */
13456 if (TARGET_SUN_TLS && !TARGET_64BIT
13457 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13458 && model == TLS_MODEL_LOCAL_DYNAMIC)
13459 model = TLS_MODEL_GLOBAL_DYNAMIC;
13460
13461 switch (model)
13462 {
13463 case TLS_MODEL_GLOBAL_DYNAMIC:
13464 dest = gen_reg_rtx (Pmode);
13465
13466 if (!TARGET_64BIT)
13467 {
13468 if (flag_pic && !TARGET_PECOFF)
13469 pic = pic_offset_table_rtx;
13470 else
13471 {
13472 pic = gen_reg_rtx (Pmode);
13473 emit_insn (gen_set_got (pic));
13474 }
13475 }
13476
13477 if (TARGET_GNU2_TLS)
13478 {
13479 if (TARGET_64BIT)
13480 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13481 else
13482 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13483
13484 tp = get_thread_pointer (Pmode, true);
13485 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13486
13487 if (GET_MODE (x) != Pmode)
13488 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13489
13490 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13491 }
13492 else
13493 {
13494 rtx caddr = ix86_tls_get_addr ();
13495
13496 if (TARGET_64BIT)
13497 {
13498 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13499 rtx_insn *insns;
13500
13501 start_sequence ();
13502 emit_call_insn
13503 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13504 insns = get_insns ();
13505 end_sequence ();
13506
13507 if (GET_MODE (x) != Pmode)
13508 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13509
13510 RTL_CONST_CALL_P (insns) = 1;
13511 emit_libcall_block (insns, dest, rax, x);
13512 }
13513 else
13514 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13515 }
13516 break;
13517
13518 case TLS_MODEL_LOCAL_DYNAMIC:
13519 base = gen_reg_rtx (Pmode);
13520
13521 if (!TARGET_64BIT)
13522 {
13523 if (flag_pic)
13524 pic = pic_offset_table_rtx;
13525 else
13526 {
13527 pic = gen_reg_rtx (Pmode);
13528 emit_insn (gen_set_got (pic));
13529 }
13530 }
13531
13532 if (TARGET_GNU2_TLS)
13533 {
13534 rtx tmp = ix86_tls_module_base ();
13535
13536 if (TARGET_64BIT)
13537 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13538 else
13539 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13540
13541 tp = get_thread_pointer (Pmode, true);
13542 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13543 gen_rtx_MINUS (Pmode, tmp, tp));
13544 }
13545 else
13546 {
13547 rtx caddr = ix86_tls_get_addr ();
13548
13549 if (TARGET_64BIT)
13550 {
13551 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13552 rtx_insn *insns;
13553 rtx eqv;
13554
13555 start_sequence ();
13556 emit_call_insn
13557 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13558 insns = get_insns ();
13559 end_sequence ();
13560
13561 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13562 share the LD_BASE result with other LD model accesses. */
13563 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13564 UNSPEC_TLS_LD_BASE);
13565
13566 RTL_CONST_CALL_P (insns) = 1;
13567 emit_libcall_block (insns, base, rax, eqv);
13568 }
13569 else
13570 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13571 }
13572
13573 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13574 off = gen_rtx_CONST (Pmode, off);
13575
13576 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13577
13578 if (TARGET_GNU2_TLS)
13579 {
13580 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13581
13582 if (GET_MODE (x) != Pmode)
13583 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13584
13585 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13586 }
13587 break;
13588
13589 case TLS_MODEL_INITIAL_EXEC:
13590 if (TARGET_64BIT)
13591 {
13592 if (TARGET_SUN_TLS && !TARGET_X32)
13593 {
13594 /* The Sun linker took the AMD64 TLS spec literally
13595 and can only handle %rax as destination of the
13596 initial executable code sequence. */
13597
13598 dest = gen_reg_rtx (DImode);
13599 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13600 return dest;
13601 }
13602
13603 /* Generate DImode references to avoid %fs:(%reg32)
13604 problems and linker IE->LE relaxation bug. */
13605 tp_mode = DImode;
13606 pic = NULL;
13607 type = UNSPEC_GOTNTPOFF;
13608 }
13609 else if (flag_pic)
13610 {
13611 if (reload_in_progress)
13612 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13613 pic = pic_offset_table_rtx;
13614 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13615 }
13616 else if (!TARGET_ANY_GNU_TLS)
13617 {
13618 pic = gen_reg_rtx (Pmode);
13619 emit_insn (gen_set_got (pic));
13620 type = UNSPEC_GOTTPOFF;
13621 }
13622 else
13623 {
13624 pic = NULL;
13625 type = UNSPEC_INDNTPOFF;
13626 }
13627
13628 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13629 off = gen_rtx_CONST (tp_mode, off);
13630 if (pic)
13631 off = gen_rtx_PLUS (tp_mode, pic, off);
13632 off = gen_const_mem (tp_mode, off);
13633 set_mem_alias_set (off, ix86_GOT_alias_set ());
13634
13635 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13636 {
13637 base = get_thread_pointer (tp_mode,
13638 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13639 off = force_reg (tp_mode, off);
13640 return gen_rtx_PLUS (tp_mode, base, off);
13641 }
13642 else
13643 {
13644 base = get_thread_pointer (Pmode, true);
13645 dest = gen_reg_rtx (Pmode);
13646 emit_insn (ix86_gen_sub3 (dest, base, off));
13647 }
13648 break;
13649
13650 case TLS_MODEL_LOCAL_EXEC:
13651 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13652 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13653 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13654 off = gen_rtx_CONST (Pmode, off);
13655
13656 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13657 {
13658 base = get_thread_pointer (Pmode,
13659 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13660 return gen_rtx_PLUS (Pmode, base, off);
13661 }
13662 else
13663 {
13664 base = get_thread_pointer (Pmode, true);
13665 dest = gen_reg_rtx (Pmode);
13666 emit_insn (ix86_gen_sub3 (dest, base, off));
13667 }
13668 break;
13669
13670 default:
13671 gcc_unreachable ();
13672 }
13673
13674 return dest;
13675 }
13676
13677 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13678 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13679 unique refptr-DECL symbol corresponding to symbol DECL. */
13680
13681 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13682 htab_t dllimport_map;
13683
13684 static tree
13685 get_dllimport_decl (tree decl, bool beimport)
13686 {
13687 struct tree_map *h, in;
13688 void **loc;
13689 const char *name;
13690 const char *prefix;
13691 size_t namelen, prefixlen;
13692 char *imp_name;
13693 tree to;
13694 rtx rtl;
13695
13696 if (!dllimport_map)
13697 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13698
13699 in.hash = htab_hash_pointer (decl);
13700 in.base.from = decl;
13701 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13702 h = (struct tree_map *) *loc;
13703 if (h)
13704 return h->to;
13705
13706 *loc = h = ggc_alloc<tree_map> ();
13707 h->hash = in.hash;
13708 h->base.from = decl;
13709 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13710 VAR_DECL, NULL, ptr_type_node);
13711 DECL_ARTIFICIAL (to) = 1;
13712 DECL_IGNORED_P (to) = 1;
13713 DECL_EXTERNAL (to) = 1;
13714 TREE_READONLY (to) = 1;
13715
13716 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13717 name = targetm.strip_name_encoding (name);
13718 if (beimport)
13719 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13720 ? "*__imp_" : "*__imp__";
13721 else
13722 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13723 namelen = strlen (name);
13724 prefixlen = strlen (prefix);
13725 imp_name = (char *) alloca (namelen + prefixlen + 1);
13726 memcpy (imp_name, prefix, prefixlen);
13727 memcpy (imp_name + prefixlen, name, namelen + 1);
13728
13729 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13730 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13731 SET_SYMBOL_REF_DECL (rtl, to);
13732 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13733 if (!beimport)
13734 {
13735 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13736 #ifdef SUB_TARGET_RECORD_STUB
13737 SUB_TARGET_RECORD_STUB (name);
13738 #endif
13739 }
13740
13741 rtl = gen_const_mem (Pmode, rtl);
13742 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13743
13744 SET_DECL_RTL (to, rtl);
13745 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13746
13747 return to;
13748 }
13749
13750 /* Expand SYMBOL into its corresponding far-addresse symbol.
13751 WANT_REG is true if we require the result be a register. */
13752
13753 static rtx
13754 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13755 {
13756 tree imp_decl;
13757 rtx x;
13758
13759 gcc_assert (SYMBOL_REF_DECL (symbol));
13760 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13761
13762 x = DECL_RTL (imp_decl);
13763 if (want_reg)
13764 x = force_reg (Pmode, x);
13765 return x;
13766 }
13767
13768 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13769 true if we require the result be a register. */
13770
13771 static rtx
13772 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13773 {
13774 tree imp_decl;
13775 rtx x;
13776
13777 gcc_assert (SYMBOL_REF_DECL (symbol));
13778 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13779
13780 x = DECL_RTL (imp_decl);
13781 if (want_reg)
13782 x = force_reg (Pmode, x);
13783 return x;
13784 }
13785
13786 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13787 is true if we require the result be a register. */
13788
13789 static rtx
13790 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13791 {
13792 if (!TARGET_PECOFF)
13793 return NULL_RTX;
13794
13795 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13796 {
13797 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13798 return legitimize_dllimport_symbol (addr, inreg);
13799 if (GET_CODE (addr) == CONST
13800 && GET_CODE (XEXP (addr, 0)) == PLUS
13801 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13802 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13803 {
13804 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13805 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13806 }
13807 }
13808
13809 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13810 return NULL_RTX;
13811 if (GET_CODE (addr) == SYMBOL_REF
13812 && !is_imported_p (addr)
13813 && SYMBOL_REF_EXTERNAL_P (addr)
13814 && SYMBOL_REF_DECL (addr))
13815 return legitimize_pe_coff_extern_decl (addr, inreg);
13816
13817 if (GET_CODE (addr) == CONST
13818 && GET_CODE (XEXP (addr, 0)) == PLUS
13819 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13820 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13821 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13822 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13823 {
13824 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13825 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13826 }
13827 return NULL_RTX;
13828 }
13829
13830 /* Try machine-dependent ways of modifying an illegitimate address
13831 to be legitimate. If we find one, return the new, valid address.
13832 This macro is used in only one place: `memory_address' in explow.c.
13833
13834 OLDX is the address as it was before break_out_memory_refs was called.
13835 In some cases it is useful to look at this to decide what needs to be done.
13836
13837 It is always safe for this macro to do nothing. It exists to recognize
13838 opportunities to optimize the output.
13839
13840 For the 80386, we handle X+REG by loading X into a register R and
13841 using R+REG. R will go in a general reg and indexing will be used.
13842 However, if REG is a broken-out memory address or multiplication,
13843 nothing needs to be done because REG can certainly go in a general reg.
13844
13845 When -fpic is used, special handling is needed for symbolic references.
13846 See comments by legitimize_pic_address in i386.c for details. */
13847
13848 static rtx
13849 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13850 {
13851 int changed = 0;
13852 unsigned log;
13853
13854 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13855 if (log)
13856 return legitimize_tls_address (x, (enum tls_model) log, false);
13857 if (GET_CODE (x) == CONST
13858 && GET_CODE (XEXP (x, 0)) == PLUS
13859 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13860 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13861 {
13862 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13863 (enum tls_model) log, false);
13864 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13865 }
13866
13867 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13868 {
13869 rtx tmp = legitimize_pe_coff_symbol (x, true);
13870 if (tmp)
13871 return tmp;
13872 }
13873
13874 if (flag_pic && SYMBOLIC_CONST (x))
13875 return legitimize_pic_address (x, 0);
13876
13877 #if TARGET_MACHO
13878 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13879 return machopic_indirect_data_reference (x, 0);
13880 #endif
13881
13882 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13883 if (GET_CODE (x) == ASHIFT
13884 && CONST_INT_P (XEXP (x, 1))
13885 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13886 {
13887 changed = 1;
13888 log = INTVAL (XEXP (x, 1));
13889 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13890 GEN_INT (1 << log));
13891 }
13892
13893 if (GET_CODE (x) == PLUS)
13894 {
13895 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13896
13897 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13898 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13899 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13900 {
13901 changed = 1;
13902 log = INTVAL (XEXP (XEXP (x, 0), 1));
13903 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13904 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13905 GEN_INT (1 << log));
13906 }
13907
13908 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13909 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13910 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13911 {
13912 changed = 1;
13913 log = INTVAL (XEXP (XEXP (x, 1), 1));
13914 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13915 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13916 GEN_INT (1 << log));
13917 }
13918
13919 /* Put multiply first if it isn't already. */
13920 if (GET_CODE (XEXP (x, 1)) == MULT)
13921 {
13922 rtx tmp = XEXP (x, 0);
13923 XEXP (x, 0) = XEXP (x, 1);
13924 XEXP (x, 1) = tmp;
13925 changed = 1;
13926 }
13927
13928 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13929 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13930 created by virtual register instantiation, register elimination, and
13931 similar optimizations. */
13932 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13933 {
13934 changed = 1;
13935 x = gen_rtx_PLUS (Pmode,
13936 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13937 XEXP (XEXP (x, 1), 0)),
13938 XEXP (XEXP (x, 1), 1));
13939 }
13940
13941 /* Canonicalize
13942 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13943 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13944 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13945 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13946 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13947 && CONSTANT_P (XEXP (x, 1)))
13948 {
13949 rtx constant;
13950 rtx other = NULL_RTX;
13951
13952 if (CONST_INT_P (XEXP (x, 1)))
13953 {
13954 constant = XEXP (x, 1);
13955 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13956 }
13957 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13958 {
13959 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13960 other = XEXP (x, 1);
13961 }
13962 else
13963 constant = 0;
13964
13965 if (constant)
13966 {
13967 changed = 1;
13968 x = gen_rtx_PLUS (Pmode,
13969 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13970 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13971 plus_constant (Pmode, other,
13972 INTVAL (constant)));
13973 }
13974 }
13975
13976 if (changed && ix86_legitimate_address_p (mode, x, false))
13977 return x;
13978
13979 if (GET_CODE (XEXP (x, 0)) == MULT)
13980 {
13981 changed = 1;
13982 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13983 }
13984
13985 if (GET_CODE (XEXP (x, 1)) == MULT)
13986 {
13987 changed = 1;
13988 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13989 }
13990
13991 if (changed
13992 && REG_P (XEXP (x, 1))
13993 && REG_P (XEXP (x, 0)))
13994 return x;
13995
13996 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13997 {
13998 changed = 1;
13999 x = legitimize_pic_address (x, 0);
14000 }
14001
14002 if (changed && ix86_legitimate_address_p (mode, x, false))
14003 return x;
14004
14005 if (REG_P (XEXP (x, 0)))
14006 {
14007 rtx temp = gen_reg_rtx (Pmode);
14008 rtx val = force_operand (XEXP (x, 1), temp);
14009 if (val != temp)
14010 {
14011 val = convert_to_mode (Pmode, val, 1);
14012 emit_move_insn (temp, val);
14013 }
14014
14015 XEXP (x, 1) = temp;
14016 return x;
14017 }
14018
14019 else if (REG_P (XEXP (x, 1)))
14020 {
14021 rtx temp = gen_reg_rtx (Pmode);
14022 rtx val = force_operand (XEXP (x, 0), temp);
14023 if (val != temp)
14024 {
14025 val = convert_to_mode (Pmode, val, 1);
14026 emit_move_insn (temp, val);
14027 }
14028
14029 XEXP (x, 0) = temp;
14030 return x;
14031 }
14032 }
14033
14034 return x;
14035 }
14036 \f
14037 /* Print an integer constant expression in assembler syntax. Addition
14038 and subtraction are the only arithmetic that may appear in these
14039 expressions. FILE is the stdio stream to write to, X is the rtx, and
14040 CODE is the operand print code from the output string. */
14041
14042 static void
14043 output_pic_addr_const (FILE *file, rtx x, int code)
14044 {
14045 char buf[256];
14046
14047 switch (GET_CODE (x))
14048 {
14049 case PC:
14050 gcc_assert (flag_pic);
14051 putc ('.', file);
14052 break;
14053
14054 case SYMBOL_REF:
14055 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14056 output_addr_const (file, x);
14057 else
14058 {
14059 const char *name = XSTR (x, 0);
14060
14061 /* Mark the decl as referenced so that cgraph will
14062 output the function. */
14063 if (SYMBOL_REF_DECL (x))
14064 mark_decl_referenced (SYMBOL_REF_DECL (x));
14065
14066 #if TARGET_MACHO
14067 if (MACHOPIC_INDIRECT
14068 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14069 name = machopic_indirection_name (x, /*stub_p=*/true);
14070 #endif
14071 assemble_name (file, name);
14072 }
14073 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14074 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14075 fputs ("@PLT", file);
14076 break;
14077
14078 case LABEL_REF:
14079 x = XEXP (x, 0);
14080 /* FALLTHRU */
14081 case CODE_LABEL:
14082 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14083 assemble_name (asm_out_file, buf);
14084 break;
14085
14086 case CONST_INT:
14087 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14088 break;
14089
14090 case CONST:
14091 /* This used to output parentheses around the expression,
14092 but that does not work on the 386 (either ATT or BSD assembler). */
14093 output_pic_addr_const (file, XEXP (x, 0), code);
14094 break;
14095
14096 case CONST_DOUBLE:
14097 if (GET_MODE (x) == VOIDmode)
14098 {
14099 /* We can use %d if the number is <32 bits and positive. */
14100 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14101 fprintf (file, "0x%lx%08lx",
14102 (unsigned long) CONST_DOUBLE_HIGH (x),
14103 (unsigned long) CONST_DOUBLE_LOW (x));
14104 else
14105 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14106 }
14107 else
14108 /* We can't handle floating point constants;
14109 TARGET_PRINT_OPERAND must handle them. */
14110 output_operand_lossage ("floating constant misused");
14111 break;
14112
14113 case PLUS:
14114 /* Some assemblers need integer constants to appear first. */
14115 if (CONST_INT_P (XEXP (x, 0)))
14116 {
14117 output_pic_addr_const (file, XEXP (x, 0), code);
14118 putc ('+', file);
14119 output_pic_addr_const (file, XEXP (x, 1), code);
14120 }
14121 else
14122 {
14123 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14124 output_pic_addr_const (file, XEXP (x, 1), code);
14125 putc ('+', file);
14126 output_pic_addr_const (file, XEXP (x, 0), code);
14127 }
14128 break;
14129
14130 case MINUS:
14131 if (!TARGET_MACHO)
14132 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14133 output_pic_addr_const (file, XEXP (x, 0), code);
14134 putc ('-', file);
14135 output_pic_addr_const (file, XEXP (x, 1), code);
14136 if (!TARGET_MACHO)
14137 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14138 break;
14139
14140 case UNSPEC:
14141 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14142 {
14143 bool f = i386_asm_output_addr_const_extra (file, x);
14144 gcc_assert (f);
14145 break;
14146 }
14147
14148 gcc_assert (XVECLEN (x, 0) == 1);
14149 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14150 switch (XINT (x, 1))
14151 {
14152 case UNSPEC_GOT:
14153 fputs ("@GOT", file);
14154 break;
14155 case UNSPEC_GOTOFF:
14156 fputs ("@GOTOFF", file);
14157 break;
14158 case UNSPEC_PLTOFF:
14159 fputs ("@PLTOFF", file);
14160 break;
14161 case UNSPEC_PCREL:
14162 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14163 "(%rip)" : "[rip]", file);
14164 break;
14165 case UNSPEC_GOTPCREL:
14166 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14167 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14168 break;
14169 case UNSPEC_GOTTPOFF:
14170 /* FIXME: This might be @TPOFF in Sun ld too. */
14171 fputs ("@gottpoff", file);
14172 break;
14173 case UNSPEC_TPOFF:
14174 fputs ("@tpoff", file);
14175 break;
14176 case UNSPEC_NTPOFF:
14177 if (TARGET_64BIT)
14178 fputs ("@tpoff", file);
14179 else
14180 fputs ("@ntpoff", file);
14181 break;
14182 case UNSPEC_DTPOFF:
14183 fputs ("@dtpoff", file);
14184 break;
14185 case UNSPEC_GOTNTPOFF:
14186 if (TARGET_64BIT)
14187 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14188 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14189 else
14190 fputs ("@gotntpoff", file);
14191 break;
14192 case UNSPEC_INDNTPOFF:
14193 fputs ("@indntpoff", file);
14194 break;
14195 #if TARGET_MACHO
14196 case UNSPEC_MACHOPIC_OFFSET:
14197 putc ('-', file);
14198 machopic_output_function_base_name (file);
14199 break;
14200 #endif
14201 default:
14202 output_operand_lossage ("invalid UNSPEC as operand");
14203 break;
14204 }
14205 break;
14206
14207 default:
14208 output_operand_lossage ("invalid expression as operand");
14209 }
14210 }
14211
14212 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14213 We need to emit DTP-relative relocations. */
14214
14215 static void ATTRIBUTE_UNUSED
14216 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14217 {
14218 fputs (ASM_LONG, file);
14219 output_addr_const (file, x);
14220 fputs ("@dtpoff", file);
14221 switch (size)
14222 {
14223 case 4:
14224 break;
14225 case 8:
14226 fputs (", 0", file);
14227 break;
14228 default:
14229 gcc_unreachable ();
14230 }
14231 }
14232
14233 /* Return true if X is a representation of the PIC register. This copes
14234 with calls from ix86_find_base_term, where the register might have
14235 been replaced by a cselib value. */
14236
14237 static bool
14238 ix86_pic_register_p (rtx x)
14239 {
14240 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14241 return (pic_offset_table_rtx
14242 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14243 else
14244 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14245 }
14246
14247 /* Helper function for ix86_delegitimize_address.
14248 Attempt to delegitimize TLS local-exec accesses. */
14249
14250 static rtx
14251 ix86_delegitimize_tls_address (rtx orig_x)
14252 {
14253 rtx x = orig_x, unspec;
14254 struct ix86_address addr;
14255
14256 if (!TARGET_TLS_DIRECT_SEG_REFS)
14257 return orig_x;
14258 if (MEM_P (x))
14259 x = XEXP (x, 0);
14260 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14261 return orig_x;
14262 if (ix86_decompose_address (x, &addr) == 0
14263 || addr.seg != DEFAULT_TLS_SEG_REG
14264 || addr.disp == NULL_RTX
14265 || GET_CODE (addr.disp) != CONST)
14266 return orig_x;
14267 unspec = XEXP (addr.disp, 0);
14268 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14269 unspec = XEXP (unspec, 0);
14270 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14271 return orig_x;
14272 x = XVECEXP (unspec, 0, 0);
14273 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14274 if (unspec != XEXP (addr.disp, 0))
14275 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14276 if (addr.index)
14277 {
14278 rtx idx = addr.index;
14279 if (addr.scale != 1)
14280 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14281 x = gen_rtx_PLUS (Pmode, idx, x);
14282 }
14283 if (addr.base)
14284 x = gen_rtx_PLUS (Pmode, addr.base, x);
14285 if (MEM_P (orig_x))
14286 x = replace_equiv_address_nv (orig_x, x);
14287 return x;
14288 }
14289
14290 /* In the name of slightly smaller debug output, and to cater to
14291 general assembler lossage, recognize PIC+GOTOFF and turn it back
14292 into a direct symbol reference.
14293
14294 On Darwin, this is necessary to avoid a crash, because Darwin
14295 has a different PIC label for each routine but the DWARF debugging
14296 information is not associated with any particular routine, so it's
14297 necessary to remove references to the PIC label from RTL stored by
14298 the DWARF output code. */
14299
14300 static rtx
14301 ix86_delegitimize_address (rtx x)
14302 {
14303 rtx orig_x = delegitimize_mem_from_attrs (x);
14304 /* addend is NULL or some rtx if x is something+GOTOFF where
14305 something doesn't include the PIC register. */
14306 rtx addend = NULL_RTX;
14307 /* reg_addend is NULL or a multiple of some register. */
14308 rtx reg_addend = NULL_RTX;
14309 /* const_addend is NULL or a const_int. */
14310 rtx const_addend = NULL_RTX;
14311 /* This is the result, or NULL. */
14312 rtx result = NULL_RTX;
14313
14314 x = orig_x;
14315
14316 if (MEM_P (x))
14317 x = XEXP (x, 0);
14318
14319 if (TARGET_64BIT)
14320 {
14321 if (GET_CODE (x) == CONST
14322 && GET_CODE (XEXP (x, 0)) == PLUS
14323 && GET_MODE (XEXP (x, 0)) == Pmode
14324 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14325 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14326 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14327 {
14328 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14329 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14330 if (MEM_P (orig_x))
14331 x = replace_equiv_address_nv (orig_x, x);
14332 return x;
14333 }
14334
14335 if (GET_CODE (x) == CONST
14336 && GET_CODE (XEXP (x, 0)) == UNSPEC
14337 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14338 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14339 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14340 {
14341 x = XVECEXP (XEXP (x, 0), 0, 0);
14342 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14343 {
14344 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14345 GET_MODE (x), 0);
14346 if (x == NULL_RTX)
14347 return orig_x;
14348 }
14349 return x;
14350 }
14351
14352 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14353 return ix86_delegitimize_tls_address (orig_x);
14354
14355 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14356 and -mcmodel=medium -fpic. */
14357 }
14358
14359 if (GET_CODE (x) != PLUS
14360 || GET_CODE (XEXP (x, 1)) != CONST)
14361 return ix86_delegitimize_tls_address (orig_x);
14362
14363 if (ix86_pic_register_p (XEXP (x, 0)))
14364 /* %ebx + GOT/GOTOFF */
14365 ;
14366 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14367 {
14368 /* %ebx + %reg * scale + GOT/GOTOFF */
14369 reg_addend = XEXP (x, 0);
14370 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14371 reg_addend = XEXP (reg_addend, 1);
14372 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14373 reg_addend = XEXP (reg_addend, 0);
14374 else
14375 {
14376 reg_addend = NULL_RTX;
14377 addend = XEXP (x, 0);
14378 }
14379 }
14380 else
14381 addend = XEXP (x, 0);
14382
14383 x = XEXP (XEXP (x, 1), 0);
14384 if (GET_CODE (x) == PLUS
14385 && CONST_INT_P (XEXP (x, 1)))
14386 {
14387 const_addend = XEXP (x, 1);
14388 x = XEXP (x, 0);
14389 }
14390
14391 if (GET_CODE (x) == UNSPEC
14392 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14393 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14394 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14395 && !MEM_P (orig_x) && !addend)))
14396 result = XVECEXP (x, 0, 0);
14397
14398 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14399 && !MEM_P (orig_x))
14400 result = XVECEXP (x, 0, 0);
14401
14402 if (! result)
14403 return ix86_delegitimize_tls_address (orig_x);
14404
14405 if (const_addend)
14406 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14407 if (reg_addend)
14408 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14409 if (addend)
14410 {
14411 /* If the rest of original X doesn't involve the PIC register, add
14412 addend and subtract pic_offset_table_rtx. This can happen e.g.
14413 for code like:
14414 leal (%ebx, %ecx, 4), %ecx
14415 ...
14416 movl foo@GOTOFF(%ecx), %edx
14417 in which case we return (%ecx - %ebx) + foo. */
14418 if (pic_offset_table_rtx)
14419 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14420 pic_offset_table_rtx),
14421 result);
14422 else
14423 return orig_x;
14424 }
14425 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14426 {
14427 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14428 if (result == NULL_RTX)
14429 return orig_x;
14430 }
14431 return result;
14432 }
14433
14434 /* If X is a machine specific address (i.e. a symbol or label being
14435 referenced as a displacement from the GOT implemented using an
14436 UNSPEC), then return the base term. Otherwise return X. */
14437
14438 rtx
14439 ix86_find_base_term (rtx x)
14440 {
14441 rtx term;
14442
14443 if (TARGET_64BIT)
14444 {
14445 if (GET_CODE (x) != CONST)
14446 return x;
14447 term = XEXP (x, 0);
14448 if (GET_CODE (term) == PLUS
14449 && (CONST_INT_P (XEXP (term, 1))
14450 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14451 term = XEXP (term, 0);
14452 if (GET_CODE (term) != UNSPEC
14453 || (XINT (term, 1) != UNSPEC_GOTPCREL
14454 && XINT (term, 1) != UNSPEC_PCREL))
14455 return x;
14456
14457 return XVECEXP (term, 0, 0);
14458 }
14459
14460 return ix86_delegitimize_address (x);
14461 }
14462 \f
14463 static void
14464 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14465 bool fp, FILE *file)
14466 {
14467 const char *suffix;
14468
14469 if (mode == CCFPmode || mode == CCFPUmode)
14470 {
14471 code = ix86_fp_compare_code_to_integer (code);
14472 mode = CCmode;
14473 }
14474 if (reverse)
14475 code = reverse_condition (code);
14476
14477 switch (code)
14478 {
14479 case EQ:
14480 switch (mode)
14481 {
14482 case CCAmode:
14483 suffix = "a";
14484 break;
14485
14486 case CCCmode:
14487 suffix = "c";
14488 break;
14489
14490 case CCOmode:
14491 suffix = "o";
14492 break;
14493
14494 case CCSmode:
14495 suffix = "s";
14496 break;
14497
14498 default:
14499 suffix = "e";
14500 }
14501 break;
14502 case NE:
14503 switch (mode)
14504 {
14505 case CCAmode:
14506 suffix = "na";
14507 break;
14508
14509 case CCCmode:
14510 suffix = "nc";
14511 break;
14512
14513 case CCOmode:
14514 suffix = "no";
14515 break;
14516
14517 case CCSmode:
14518 suffix = "ns";
14519 break;
14520
14521 default:
14522 suffix = "ne";
14523 }
14524 break;
14525 case GT:
14526 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14527 suffix = "g";
14528 break;
14529 case GTU:
14530 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14531 Those same assemblers have the same but opposite lossage on cmov. */
14532 if (mode == CCmode)
14533 suffix = fp ? "nbe" : "a";
14534 else
14535 gcc_unreachable ();
14536 break;
14537 case LT:
14538 switch (mode)
14539 {
14540 case CCNOmode:
14541 case CCGOCmode:
14542 suffix = "s";
14543 break;
14544
14545 case CCmode:
14546 case CCGCmode:
14547 suffix = "l";
14548 break;
14549
14550 default:
14551 gcc_unreachable ();
14552 }
14553 break;
14554 case LTU:
14555 if (mode == CCmode)
14556 suffix = "b";
14557 else if (mode == CCCmode)
14558 suffix = "c";
14559 else
14560 gcc_unreachable ();
14561 break;
14562 case GE:
14563 switch (mode)
14564 {
14565 case CCNOmode:
14566 case CCGOCmode:
14567 suffix = "ns";
14568 break;
14569
14570 case CCmode:
14571 case CCGCmode:
14572 suffix = "ge";
14573 break;
14574
14575 default:
14576 gcc_unreachable ();
14577 }
14578 break;
14579 case GEU:
14580 if (mode == CCmode)
14581 suffix = fp ? "nb" : "ae";
14582 else if (mode == CCCmode)
14583 suffix = "nc";
14584 else
14585 gcc_unreachable ();
14586 break;
14587 case LE:
14588 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14589 suffix = "le";
14590 break;
14591 case LEU:
14592 if (mode == CCmode)
14593 suffix = "be";
14594 else
14595 gcc_unreachable ();
14596 break;
14597 case UNORDERED:
14598 suffix = fp ? "u" : "p";
14599 break;
14600 case ORDERED:
14601 suffix = fp ? "nu" : "np";
14602 break;
14603 default:
14604 gcc_unreachable ();
14605 }
14606 fputs (suffix, file);
14607 }
14608
14609 /* Print the name of register X to FILE based on its machine mode and number.
14610 If CODE is 'w', pretend the mode is HImode.
14611 If CODE is 'b', pretend the mode is QImode.
14612 If CODE is 'k', pretend the mode is SImode.
14613 If CODE is 'q', pretend the mode is DImode.
14614 If CODE is 'x', pretend the mode is V4SFmode.
14615 If CODE is 't', pretend the mode is V8SFmode.
14616 If CODE is 'g', pretend the mode is V16SFmode.
14617 If CODE is 'h', pretend the reg is the 'high' byte register.
14618 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14619 If CODE is 'd', duplicate the operand for AVX instruction.
14620 */
14621
14622 void
14623 print_reg (rtx x, int code, FILE *file)
14624 {
14625 const char *reg;
14626 unsigned int regno;
14627 bool duplicated = code == 'd' && TARGET_AVX;
14628
14629 if (ASSEMBLER_DIALECT == ASM_ATT)
14630 putc ('%', file);
14631
14632 if (x == pc_rtx)
14633 {
14634 gcc_assert (TARGET_64BIT);
14635 fputs ("rip", file);
14636 return;
14637 }
14638
14639 regno = true_regnum (x);
14640 gcc_assert (regno != ARG_POINTER_REGNUM
14641 && regno != FRAME_POINTER_REGNUM
14642 && regno != FLAGS_REG
14643 && regno != FPSR_REG
14644 && regno != FPCR_REG);
14645
14646 if (code == 'w' || MMX_REG_P (x))
14647 code = 2;
14648 else if (code == 'b')
14649 code = 1;
14650 else if (code == 'k')
14651 code = 4;
14652 else if (code == 'q')
14653 code = 8;
14654 else if (code == 'y')
14655 code = 3;
14656 else if (code == 'h')
14657 code = 0;
14658 else if (code == 'x')
14659 code = 16;
14660 else if (code == 't')
14661 code = 32;
14662 else if (code == 'g')
14663 code = 64;
14664 else
14665 code = GET_MODE_SIZE (GET_MODE (x));
14666
14667 /* Irritatingly, AMD extended registers use different naming convention
14668 from the normal registers: "r%d[bwd]" */
14669 if (REX_INT_REGNO_P (regno))
14670 {
14671 gcc_assert (TARGET_64BIT);
14672 putc ('r', file);
14673 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14674 switch (code)
14675 {
14676 case 0:
14677 error ("extended registers have no high halves");
14678 break;
14679 case 1:
14680 putc ('b', file);
14681 break;
14682 case 2:
14683 putc ('w', file);
14684 break;
14685 case 4:
14686 putc ('d', file);
14687 break;
14688 case 8:
14689 /* no suffix */
14690 break;
14691 default:
14692 error ("unsupported operand size for extended register");
14693 break;
14694 }
14695 return;
14696 }
14697
14698 reg = NULL;
14699 switch (code)
14700 {
14701 case 3:
14702 if (STACK_TOP_P (x))
14703 {
14704 reg = "st(0)";
14705 break;
14706 }
14707 /* FALLTHRU */
14708 case 8:
14709 case 4:
14710 case 12:
14711 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14712 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14713 /* FALLTHRU */
14714 case 16:
14715 case 2:
14716 normal:
14717 reg = hi_reg_name[regno];
14718 break;
14719 case 1:
14720 if (regno >= ARRAY_SIZE (qi_reg_name))
14721 goto normal;
14722 reg = qi_reg_name[regno];
14723 break;
14724 case 0:
14725 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14726 goto normal;
14727 reg = qi_high_reg_name[regno];
14728 break;
14729 case 32:
14730 if (SSE_REG_P (x))
14731 {
14732 gcc_assert (!duplicated);
14733 putc ('y', file);
14734 fputs (hi_reg_name[regno] + 1, file);
14735 return;
14736 }
14737 case 64:
14738 if (SSE_REG_P (x))
14739 {
14740 gcc_assert (!duplicated);
14741 putc ('z', file);
14742 fputs (hi_reg_name[REGNO (x)] + 1, file);
14743 return;
14744 }
14745 break;
14746 default:
14747 gcc_unreachable ();
14748 }
14749
14750 fputs (reg, file);
14751 if (duplicated)
14752 {
14753 if (ASSEMBLER_DIALECT == ASM_ATT)
14754 fprintf (file, ", %%%s", reg);
14755 else
14756 fprintf (file, ", %s", reg);
14757 }
14758 }
14759
14760 /* Meaning of CODE:
14761 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14762 C -- print opcode suffix for set/cmov insn.
14763 c -- like C, but print reversed condition
14764 F,f -- likewise, but for floating-point.
14765 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14766 otherwise nothing
14767 R -- print embeded rounding and sae.
14768 r -- print only sae.
14769 z -- print the opcode suffix for the size of the current operand.
14770 Z -- likewise, with special suffixes for x87 instructions.
14771 * -- print a star (in certain assembler syntax)
14772 A -- print an absolute memory reference.
14773 E -- print address with DImode register names if TARGET_64BIT.
14774 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14775 s -- print a shift double count, followed by the assemblers argument
14776 delimiter.
14777 b -- print the QImode name of the register for the indicated operand.
14778 %b0 would print %al if operands[0] is reg 0.
14779 w -- likewise, print the HImode name of the register.
14780 k -- likewise, print the SImode name of the register.
14781 q -- likewise, print the DImode name of the register.
14782 x -- likewise, print the V4SFmode name of the register.
14783 t -- likewise, print the V8SFmode name of the register.
14784 g -- likewise, print the V16SFmode name of the register.
14785 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14786 y -- print "st(0)" instead of "st" as a register.
14787 d -- print duplicated register operand for AVX instruction.
14788 D -- print condition for SSE cmp instruction.
14789 P -- if PIC, print an @PLT suffix.
14790 p -- print raw symbol name.
14791 X -- don't print any sort of PIC '@' suffix for a symbol.
14792 & -- print some in-use local-dynamic symbol name.
14793 H -- print a memory address offset by 8; used for sse high-parts
14794 Y -- print condition for XOP pcom* instruction.
14795 + -- print a branch hint as 'cs' or 'ds' prefix
14796 ; -- print a semicolon (after prefixes due to bug in older gas).
14797 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14798 @ -- print a segment register of thread base pointer load
14799 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14800 */
14801
14802 void
14803 ix86_print_operand (FILE *file, rtx x, int code)
14804 {
14805 if (code)
14806 {
14807 switch (code)
14808 {
14809 case 'A':
14810 switch (ASSEMBLER_DIALECT)
14811 {
14812 case ASM_ATT:
14813 putc ('*', file);
14814 break;
14815
14816 case ASM_INTEL:
14817 /* Intel syntax. For absolute addresses, registers should not
14818 be surrounded by braces. */
14819 if (!REG_P (x))
14820 {
14821 putc ('[', file);
14822 ix86_print_operand (file, x, 0);
14823 putc (']', file);
14824 return;
14825 }
14826 break;
14827
14828 default:
14829 gcc_unreachable ();
14830 }
14831
14832 ix86_print_operand (file, x, 0);
14833 return;
14834
14835 case 'E':
14836 /* Wrap address in an UNSPEC to declare special handling. */
14837 if (TARGET_64BIT)
14838 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14839
14840 output_address (x);
14841 return;
14842
14843 case 'L':
14844 if (ASSEMBLER_DIALECT == ASM_ATT)
14845 putc ('l', file);
14846 return;
14847
14848 case 'W':
14849 if (ASSEMBLER_DIALECT == ASM_ATT)
14850 putc ('w', file);
14851 return;
14852
14853 case 'B':
14854 if (ASSEMBLER_DIALECT == ASM_ATT)
14855 putc ('b', file);
14856 return;
14857
14858 case 'Q':
14859 if (ASSEMBLER_DIALECT == ASM_ATT)
14860 putc ('l', file);
14861 return;
14862
14863 case 'S':
14864 if (ASSEMBLER_DIALECT == ASM_ATT)
14865 putc ('s', file);
14866 return;
14867
14868 case 'T':
14869 if (ASSEMBLER_DIALECT == ASM_ATT)
14870 putc ('t', file);
14871 return;
14872
14873 case 'O':
14874 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14875 if (ASSEMBLER_DIALECT != ASM_ATT)
14876 return;
14877
14878 switch (GET_MODE_SIZE (GET_MODE (x)))
14879 {
14880 case 2:
14881 putc ('w', file);
14882 break;
14883
14884 case 4:
14885 putc ('l', file);
14886 break;
14887
14888 case 8:
14889 putc ('q', file);
14890 break;
14891
14892 default:
14893 output_operand_lossage
14894 ("invalid operand size for operand code 'O'");
14895 return;
14896 }
14897
14898 putc ('.', file);
14899 #endif
14900 return;
14901
14902 case 'z':
14903 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14904 {
14905 /* Opcodes don't get size suffixes if using Intel opcodes. */
14906 if (ASSEMBLER_DIALECT == ASM_INTEL)
14907 return;
14908
14909 switch (GET_MODE_SIZE (GET_MODE (x)))
14910 {
14911 case 1:
14912 putc ('b', file);
14913 return;
14914
14915 case 2:
14916 putc ('w', file);
14917 return;
14918
14919 case 4:
14920 putc ('l', file);
14921 return;
14922
14923 case 8:
14924 putc ('q', file);
14925 return;
14926
14927 default:
14928 output_operand_lossage
14929 ("invalid operand size for operand code 'z'");
14930 return;
14931 }
14932 }
14933
14934 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14935 warning
14936 (0, "non-integer operand used with operand code 'z'");
14937 /* FALLTHRU */
14938
14939 case 'Z':
14940 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14941 if (ASSEMBLER_DIALECT == ASM_INTEL)
14942 return;
14943
14944 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14945 {
14946 switch (GET_MODE_SIZE (GET_MODE (x)))
14947 {
14948 case 2:
14949 #ifdef HAVE_AS_IX86_FILDS
14950 putc ('s', file);
14951 #endif
14952 return;
14953
14954 case 4:
14955 putc ('l', file);
14956 return;
14957
14958 case 8:
14959 #ifdef HAVE_AS_IX86_FILDQ
14960 putc ('q', file);
14961 #else
14962 fputs ("ll", file);
14963 #endif
14964 return;
14965
14966 default:
14967 break;
14968 }
14969 }
14970 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14971 {
14972 /* 387 opcodes don't get size suffixes
14973 if the operands are registers. */
14974 if (STACK_REG_P (x))
14975 return;
14976
14977 switch (GET_MODE_SIZE (GET_MODE (x)))
14978 {
14979 case 4:
14980 putc ('s', file);
14981 return;
14982
14983 case 8:
14984 putc ('l', file);
14985 return;
14986
14987 case 12:
14988 case 16:
14989 putc ('t', file);
14990 return;
14991
14992 default:
14993 break;
14994 }
14995 }
14996 else
14997 {
14998 output_operand_lossage
14999 ("invalid operand type used with operand code 'Z'");
15000 return;
15001 }
15002
15003 output_operand_lossage
15004 ("invalid operand size for operand code 'Z'");
15005 return;
15006
15007 case 'd':
15008 case 'b':
15009 case 'w':
15010 case 'k':
15011 case 'q':
15012 case 'h':
15013 case 't':
15014 case 'g':
15015 case 'y':
15016 case 'x':
15017 case 'X':
15018 case 'P':
15019 case 'p':
15020 break;
15021
15022 case 's':
15023 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15024 {
15025 ix86_print_operand (file, x, 0);
15026 fputs (", ", file);
15027 }
15028 return;
15029
15030 case 'Y':
15031 switch (GET_CODE (x))
15032 {
15033 case NE:
15034 fputs ("neq", file);
15035 break;
15036 case EQ:
15037 fputs ("eq", file);
15038 break;
15039 case GE:
15040 case GEU:
15041 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15042 break;
15043 case GT:
15044 case GTU:
15045 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15046 break;
15047 case LE:
15048 case LEU:
15049 fputs ("le", file);
15050 break;
15051 case LT:
15052 case LTU:
15053 fputs ("lt", file);
15054 break;
15055 case UNORDERED:
15056 fputs ("unord", file);
15057 break;
15058 case ORDERED:
15059 fputs ("ord", file);
15060 break;
15061 case UNEQ:
15062 fputs ("ueq", file);
15063 break;
15064 case UNGE:
15065 fputs ("nlt", file);
15066 break;
15067 case UNGT:
15068 fputs ("nle", file);
15069 break;
15070 case UNLE:
15071 fputs ("ule", file);
15072 break;
15073 case UNLT:
15074 fputs ("ult", file);
15075 break;
15076 case LTGT:
15077 fputs ("une", file);
15078 break;
15079 default:
15080 output_operand_lossage ("operand is not a condition code, "
15081 "invalid operand code 'Y'");
15082 return;
15083 }
15084 return;
15085
15086 case 'D':
15087 /* Little bit of braindamage here. The SSE compare instructions
15088 does use completely different names for the comparisons that the
15089 fp conditional moves. */
15090 switch (GET_CODE (x))
15091 {
15092 case UNEQ:
15093 if (TARGET_AVX)
15094 {
15095 fputs ("eq_us", file);
15096 break;
15097 }
15098 case EQ:
15099 fputs ("eq", file);
15100 break;
15101 case UNLT:
15102 if (TARGET_AVX)
15103 {
15104 fputs ("nge", file);
15105 break;
15106 }
15107 case LT:
15108 fputs ("lt", file);
15109 break;
15110 case UNLE:
15111 if (TARGET_AVX)
15112 {
15113 fputs ("ngt", file);
15114 break;
15115 }
15116 case LE:
15117 fputs ("le", file);
15118 break;
15119 case UNORDERED:
15120 fputs ("unord", file);
15121 break;
15122 case LTGT:
15123 if (TARGET_AVX)
15124 {
15125 fputs ("neq_oq", file);
15126 break;
15127 }
15128 case NE:
15129 fputs ("neq", file);
15130 break;
15131 case GE:
15132 if (TARGET_AVX)
15133 {
15134 fputs ("ge", file);
15135 break;
15136 }
15137 case UNGE:
15138 fputs ("nlt", file);
15139 break;
15140 case GT:
15141 if (TARGET_AVX)
15142 {
15143 fputs ("gt", file);
15144 break;
15145 }
15146 case UNGT:
15147 fputs ("nle", file);
15148 break;
15149 case ORDERED:
15150 fputs ("ord", file);
15151 break;
15152 default:
15153 output_operand_lossage ("operand is not a condition code, "
15154 "invalid operand code 'D'");
15155 return;
15156 }
15157 return;
15158
15159 case 'F':
15160 case 'f':
15161 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15162 if (ASSEMBLER_DIALECT == ASM_ATT)
15163 putc ('.', file);
15164 #endif
15165
15166 case 'C':
15167 case 'c':
15168 if (!COMPARISON_P (x))
15169 {
15170 output_operand_lossage ("operand is not a condition code, "
15171 "invalid operand code '%c'", code);
15172 return;
15173 }
15174 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15175 code == 'c' || code == 'f',
15176 code == 'F' || code == 'f',
15177 file);
15178 return;
15179
15180 case 'H':
15181 if (!offsettable_memref_p (x))
15182 {
15183 output_operand_lossage ("operand is not an offsettable memory "
15184 "reference, invalid operand code 'H'");
15185 return;
15186 }
15187 /* It doesn't actually matter what mode we use here, as we're
15188 only going to use this for printing. */
15189 x = adjust_address_nv (x, DImode, 8);
15190 /* Output 'qword ptr' for intel assembler dialect. */
15191 if (ASSEMBLER_DIALECT == ASM_INTEL)
15192 code = 'q';
15193 break;
15194
15195 case 'K':
15196 gcc_assert (CONST_INT_P (x));
15197
15198 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15199 #ifdef HAVE_AS_IX86_HLE
15200 fputs ("xacquire ", file);
15201 #else
15202 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15203 #endif
15204 else if (INTVAL (x) & IX86_HLE_RELEASE)
15205 #ifdef HAVE_AS_IX86_HLE
15206 fputs ("xrelease ", file);
15207 #else
15208 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15209 #endif
15210 /* We do not want to print value of the operand. */
15211 return;
15212
15213 case 'N':
15214 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15215 fputs ("{z}", file);
15216 return;
15217
15218 case 'r':
15219 gcc_assert (CONST_INT_P (x));
15220 gcc_assert (INTVAL (x) == ROUND_SAE);
15221
15222 if (ASSEMBLER_DIALECT == ASM_INTEL)
15223 fputs (", ", file);
15224
15225 fputs ("{sae}", file);
15226
15227 if (ASSEMBLER_DIALECT == ASM_ATT)
15228 fputs (", ", file);
15229
15230 return;
15231
15232 case 'R':
15233 gcc_assert (CONST_INT_P (x));
15234
15235 if (ASSEMBLER_DIALECT == ASM_INTEL)
15236 fputs (", ", file);
15237
15238 switch (INTVAL (x))
15239 {
15240 case ROUND_NEAREST_INT | ROUND_SAE:
15241 fputs ("{rn-sae}", file);
15242 break;
15243 case ROUND_NEG_INF | ROUND_SAE:
15244 fputs ("{rd-sae}", file);
15245 break;
15246 case ROUND_POS_INF | ROUND_SAE:
15247 fputs ("{ru-sae}", file);
15248 break;
15249 case ROUND_ZERO | ROUND_SAE:
15250 fputs ("{rz-sae}", file);
15251 break;
15252 default:
15253 gcc_unreachable ();
15254 }
15255
15256 if (ASSEMBLER_DIALECT == ASM_ATT)
15257 fputs (", ", file);
15258
15259 return;
15260
15261 case '*':
15262 if (ASSEMBLER_DIALECT == ASM_ATT)
15263 putc ('*', file);
15264 return;
15265
15266 case '&':
15267 {
15268 const char *name = get_some_local_dynamic_name ();
15269 if (name == NULL)
15270 output_operand_lossage ("'%%&' used without any "
15271 "local dynamic TLS references");
15272 else
15273 assemble_name (file, name);
15274 return;
15275 }
15276
15277 case '+':
15278 {
15279 rtx x;
15280
15281 if (!optimize
15282 || optimize_function_for_size_p (cfun)
15283 || !TARGET_BRANCH_PREDICTION_HINTS)
15284 return;
15285
15286 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15287 if (x)
15288 {
15289 int pred_val = XINT (x, 0);
15290
15291 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15292 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15293 {
15294 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15295 bool cputaken
15296 = final_forward_branch_p (current_output_insn) == 0;
15297
15298 /* Emit hints only in the case default branch prediction
15299 heuristics would fail. */
15300 if (taken != cputaken)
15301 {
15302 /* We use 3e (DS) prefix for taken branches and
15303 2e (CS) prefix for not taken branches. */
15304 if (taken)
15305 fputs ("ds ; ", file);
15306 else
15307 fputs ("cs ; ", file);
15308 }
15309 }
15310 }
15311 return;
15312 }
15313
15314 case ';':
15315 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15316 putc (';', file);
15317 #endif
15318 return;
15319
15320 case '@':
15321 if (ASSEMBLER_DIALECT == ASM_ATT)
15322 putc ('%', file);
15323
15324 /* The kernel uses a different segment register for performance
15325 reasons; a system call would not have to trash the userspace
15326 segment register, which would be expensive. */
15327 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15328 fputs ("fs", file);
15329 else
15330 fputs ("gs", file);
15331 return;
15332
15333 case '~':
15334 putc (TARGET_AVX2 ? 'i' : 'f', file);
15335 return;
15336
15337 case '^':
15338 if (TARGET_64BIT && Pmode != word_mode)
15339 fputs ("addr32 ", file);
15340 return;
15341
15342 default:
15343 output_operand_lossage ("invalid operand code '%c'", code);
15344 }
15345 }
15346
15347 if (REG_P (x))
15348 print_reg (x, code, file);
15349
15350 else if (MEM_P (x))
15351 {
15352 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15353 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15354 && GET_MODE (x) != BLKmode)
15355 {
15356 const char * size;
15357 switch (GET_MODE_SIZE (GET_MODE (x)))
15358 {
15359 case 1: size = "BYTE"; break;
15360 case 2: size = "WORD"; break;
15361 case 4: size = "DWORD"; break;
15362 case 8: size = "QWORD"; break;
15363 case 12: size = "TBYTE"; break;
15364 case 16:
15365 if (GET_MODE (x) == XFmode)
15366 size = "TBYTE";
15367 else
15368 size = "XMMWORD";
15369 break;
15370 case 32: size = "YMMWORD"; break;
15371 case 64: size = "ZMMWORD"; break;
15372 default:
15373 gcc_unreachable ();
15374 }
15375
15376 /* Check for explicit size override (codes 'b', 'w', 'k',
15377 'q' and 'x') */
15378 if (code == 'b')
15379 size = "BYTE";
15380 else if (code == 'w')
15381 size = "WORD";
15382 else if (code == 'k')
15383 size = "DWORD";
15384 else if (code == 'q')
15385 size = "QWORD";
15386 else if (code == 'x')
15387 size = "XMMWORD";
15388
15389 fputs (size, file);
15390 fputs (" PTR ", file);
15391 }
15392
15393 x = XEXP (x, 0);
15394 /* Avoid (%rip) for call operands. */
15395 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15396 && !CONST_INT_P (x))
15397 output_addr_const (file, x);
15398 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15399 output_operand_lossage ("invalid constraints for operand");
15400 else
15401 output_address (x);
15402 }
15403
15404 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15405 {
15406 REAL_VALUE_TYPE r;
15407 long l;
15408
15409 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15410 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15411
15412 if (ASSEMBLER_DIALECT == ASM_ATT)
15413 putc ('$', file);
15414 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15415 if (code == 'q')
15416 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15417 (unsigned long long) (int) l);
15418 else
15419 fprintf (file, "0x%08x", (unsigned int) l);
15420 }
15421
15422 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15423 {
15424 REAL_VALUE_TYPE r;
15425 long l[2];
15426
15427 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15428 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15429
15430 if (ASSEMBLER_DIALECT == ASM_ATT)
15431 putc ('$', file);
15432 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15433 }
15434
15435 /* These float cases don't actually occur as immediate operands. */
15436 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15437 {
15438 char dstr[30];
15439
15440 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15441 fputs (dstr, file);
15442 }
15443
15444 else
15445 {
15446 /* We have patterns that allow zero sets of memory, for instance.
15447 In 64-bit mode, we should probably support all 8-byte vectors,
15448 since we can in fact encode that into an immediate. */
15449 if (GET_CODE (x) == CONST_VECTOR)
15450 {
15451 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15452 x = const0_rtx;
15453 }
15454
15455 if (code != 'P' && code != 'p')
15456 {
15457 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15458 {
15459 if (ASSEMBLER_DIALECT == ASM_ATT)
15460 putc ('$', file);
15461 }
15462 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15463 || GET_CODE (x) == LABEL_REF)
15464 {
15465 if (ASSEMBLER_DIALECT == ASM_ATT)
15466 putc ('$', file);
15467 else
15468 fputs ("OFFSET FLAT:", file);
15469 }
15470 }
15471 if (CONST_INT_P (x))
15472 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15473 else if (flag_pic || MACHOPIC_INDIRECT)
15474 output_pic_addr_const (file, x, code);
15475 else
15476 output_addr_const (file, x);
15477 }
15478 }
15479
15480 static bool
15481 ix86_print_operand_punct_valid_p (unsigned char code)
15482 {
15483 return (code == '@' || code == '*' || code == '+' || code == '&'
15484 || code == ';' || code == '~' || code == '^');
15485 }
15486 \f
15487 /* Print a memory operand whose address is ADDR. */
15488
15489 static void
15490 ix86_print_operand_address (FILE *file, rtx addr)
15491 {
15492 struct ix86_address parts;
15493 rtx base, index, disp;
15494 int scale;
15495 int ok;
15496 bool vsib = false;
15497 int code = 0;
15498
15499 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15500 {
15501 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15502 gcc_assert (parts.index == NULL_RTX);
15503 parts.index = XVECEXP (addr, 0, 1);
15504 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15505 addr = XVECEXP (addr, 0, 0);
15506 vsib = true;
15507 }
15508 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15509 {
15510 gcc_assert (TARGET_64BIT);
15511 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15512 code = 'q';
15513 }
15514 else
15515 ok = ix86_decompose_address (addr, &parts);
15516
15517 gcc_assert (ok);
15518
15519 base = parts.base;
15520 index = parts.index;
15521 disp = parts.disp;
15522 scale = parts.scale;
15523
15524 switch (parts.seg)
15525 {
15526 case SEG_DEFAULT:
15527 break;
15528 case SEG_FS:
15529 case SEG_GS:
15530 if (ASSEMBLER_DIALECT == ASM_ATT)
15531 putc ('%', file);
15532 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15533 break;
15534 default:
15535 gcc_unreachable ();
15536 }
15537
15538 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15539 if (TARGET_64BIT && !base && !index)
15540 {
15541 rtx symbol = disp;
15542
15543 if (GET_CODE (disp) == CONST
15544 && GET_CODE (XEXP (disp, 0)) == PLUS
15545 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15546 symbol = XEXP (XEXP (disp, 0), 0);
15547
15548 if (GET_CODE (symbol) == LABEL_REF
15549 || (GET_CODE (symbol) == SYMBOL_REF
15550 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15551 base = pc_rtx;
15552 }
15553 if (!base && !index)
15554 {
15555 /* Displacement only requires special attention. */
15556
15557 if (CONST_INT_P (disp))
15558 {
15559 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15560 fputs ("ds:", file);
15561 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15562 }
15563 else if (flag_pic)
15564 output_pic_addr_const (file, disp, 0);
15565 else
15566 output_addr_const (file, disp);
15567 }
15568 else
15569 {
15570 /* Print SImode register names to force addr32 prefix. */
15571 if (SImode_address_operand (addr, VOIDmode))
15572 {
15573 #ifdef ENABLE_CHECKING
15574 gcc_assert (TARGET_64BIT);
15575 switch (GET_CODE (addr))
15576 {
15577 case SUBREG:
15578 gcc_assert (GET_MODE (addr) == SImode);
15579 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15580 break;
15581 case ZERO_EXTEND:
15582 case AND:
15583 gcc_assert (GET_MODE (addr) == DImode);
15584 break;
15585 default:
15586 gcc_unreachable ();
15587 }
15588 #endif
15589 gcc_assert (!code);
15590 code = 'k';
15591 }
15592 else if (code == 0
15593 && TARGET_X32
15594 && disp
15595 && CONST_INT_P (disp)
15596 && INTVAL (disp) < -16*1024*1024)
15597 {
15598 /* X32 runs in 64-bit mode, where displacement, DISP, in
15599 address DISP(%r64), is encoded as 32-bit immediate sign-
15600 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15601 address is %r64 + 0xffffffffbffffd00. When %r64 <
15602 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15603 which is invalid for x32. The correct address is %r64
15604 - 0x40000300 == 0xf7ffdd64. To properly encode
15605 -0x40000300(%r64) for x32, we zero-extend negative
15606 displacement by forcing addr32 prefix which truncates
15607 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15608 zero-extend all negative displacements, including -1(%rsp).
15609 However, for small negative displacements, sign-extension
15610 won't cause overflow. We only zero-extend negative
15611 displacements if they < -16*1024*1024, which is also used
15612 to check legitimate address displacements for PIC. */
15613 code = 'k';
15614 }
15615
15616 if (ASSEMBLER_DIALECT == ASM_ATT)
15617 {
15618 if (disp)
15619 {
15620 if (flag_pic)
15621 output_pic_addr_const (file, disp, 0);
15622 else if (GET_CODE (disp) == LABEL_REF)
15623 output_asm_label (disp);
15624 else
15625 output_addr_const (file, disp);
15626 }
15627
15628 putc ('(', file);
15629 if (base)
15630 print_reg (base, code, file);
15631 if (index)
15632 {
15633 putc (',', file);
15634 print_reg (index, vsib ? 0 : code, file);
15635 if (scale != 1 || vsib)
15636 fprintf (file, ",%d", scale);
15637 }
15638 putc (')', file);
15639 }
15640 else
15641 {
15642 rtx offset = NULL_RTX;
15643
15644 if (disp)
15645 {
15646 /* Pull out the offset of a symbol; print any symbol itself. */
15647 if (GET_CODE (disp) == CONST
15648 && GET_CODE (XEXP (disp, 0)) == PLUS
15649 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15650 {
15651 offset = XEXP (XEXP (disp, 0), 1);
15652 disp = gen_rtx_CONST (VOIDmode,
15653 XEXP (XEXP (disp, 0), 0));
15654 }
15655
15656 if (flag_pic)
15657 output_pic_addr_const (file, disp, 0);
15658 else if (GET_CODE (disp) == LABEL_REF)
15659 output_asm_label (disp);
15660 else if (CONST_INT_P (disp))
15661 offset = disp;
15662 else
15663 output_addr_const (file, disp);
15664 }
15665
15666 putc ('[', file);
15667 if (base)
15668 {
15669 print_reg (base, code, file);
15670 if (offset)
15671 {
15672 if (INTVAL (offset) >= 0)
15673 putc ('+', file);
15674 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15675 }
15676 }
15677 else if (offset)
15678 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15679 else
15680 putc ('0', file);
15681
15682 if (index)
15683 {
15684 putc ('+', file);
15685 print_reg (index, vsib ? 0 : code, file);
15686 if (scale != 1 || vsib)
15687 fprintf (file, "*%d", scale);
15688 }
15689 putc (']', file);
15690 }
15691 }
15692 }
15693
15694 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15695
15696 static bool
15697 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15698 {
15699 rtx op;
15700
15701 if (GET_CODE (x) != UNSPEC)
15702 return false;
15703
15704 op = XVECEXP (x, 0, 0);
15705 switch (XINT (x, 1))
15706 {
15707 case UNSPEC_GOTTPOFF:
15708 output_addr_const (file, op);
15709 /* FIXME: This might be @TPOFF in Sun ld. */
15710 fputs ("@gottpoff", file);
15711 break;
15712 case UNSPEC_TPOFF:
15713 output_addr_const (file, op);
15714 fputs ("@tpoff", file);
15715 break;
15716 case UNSPEC_NTPOFF:
15717 output_addr_const (file, op);
15718 if (TARGET_64BIT)
15719 fputs ("@tpoff", file);
15720 else
15721 fputs ("@ntpoff", file);
15722 break;
15723 case UNSPEC_DTPOFF:
15724 output_addr_const (file, op);
15725 fputs ("@dtpoff", file);
15726 break;
15727 case UNSPEC_GOTNTPOFF:
15728 output_addr_const (file, op);
15729 if (TARGET_64BIT)
15730 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15731 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15732 else
15733 fputs ("@gotntpoff", file);
15734 break;
15735 case UNSPEC_INDNTPOFF:
15736 output_addr_const (file, op);
15737 fputs ("@indntpoff", file);
15738 break;
15739 #if TARGET_MACHO
15740 case UNSPEC_MACHOPIC_OFFSET:
15741 output_addr_const (file, op);
15742 putc ('-', file);
15743 machopic_output_function_base_name (file);
15744 break;
15745 #endif
15746
15747 case UNSPEC_STACK_CHECK:
15748 {
15749 int offset;
15750
15751 gcc_assert (flag_split_stack);
15752
15753 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15754 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15755 #else
15756 gcc_unreachable ();
15757 #endif
15758
15759 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15760 }
15761 break;
15762
15763 default:
15764 return false;
15765 }
15766
15767 return true;
15768 }
15769 \f
15770 /* Split one or more double-mode RTL references into pairs of half-mode
15771 references. The RTL can be REG, offsettable MEM, integer constant, or
15772 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15773 split and "num" is its length. lo_half and hi_half are output arrays
15774 that parallel "operands". */
15775
15776 void
15777 split_double_mode (enum machine_mode mode, rtx operands[],
15778 int num, rtx lo_half[], rtx hi_half[])
15779 {
15780 enum machine_mode half_mode;
15781 unsigned int byte;
15782
15783 switch (mode)
15784 {
15785 case TImode:
15786 half_mode = DImode;
15787 break;
15788 case DImode:
15789 half_mode = SImode;
15790 break;
15791 default:
15792 gcc_unreachable ();
15793 }
15794
15795 byte = GET_MODE_SIZE (half_mode);
15796
15797 while (num--)
15798 {
15799 rtx op = operands[num];
15800
15801 /* simplify_subreg refuse to split volatile memory addresses,
15802 but we still have to handle it. */
15803 if (MEM_P (op))
15804 {
15805 lo_half[num] = adjust_address (op, half_mode, 0);
15806 hi_half[num] = adjust_address (op, half_mode, byte);
15807 }
15808 else
15809 {
15810 lo_half[num] = simplify_gen_subreg (half_mode, op,
15811 GET_MODE (op) == VOIDmode
15812 ? mode : GET_MODE (op), 0);
15813 hi_half[num] = simplify_gen_subreg (half_mode, op,
15814 GET_MODE (op) == VOIDmode
15815 ? mode : GET_MODE (op), byte);
15816 }
15817 }
15818 }
15819 \f
15820 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15821 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15822 is the expression of the binary operation. The output may either be
15823 emitted here, or returned to the caller, like all output_* functions.
15824
15825 There is no guarantee that the operands are the same mode, as they
15826 might be within FLOAT or FLOAT_EXTEND expressions. */
15827
15828 #ifndef SYSV386_COMPAT
15829 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15830 wants to fix the assemblers because that causes incompatibility
15831 with gcc. No-one wants to fix gcc because that causes
15832 incompatibility with assemblers... You can use the option of
15833 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15834 #define SYSV386_COMPAT 1
15835 #endif
15836
15837 const char *
15838 output_387_binary_op (rtx insn, rtx *operands)
15839 {
15840 static char buf[40];
15841 const char *p;
15842 const char *ssep;
15843 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15844
15845 #ifdef ENABLE_CHECKING
15846 /* Even if we do not want to check the inputs, this documents input
15847 constraints. Which helps in understanding the following code. */
15848 if (STACK_REG_P (operands[0])
15849 && ((REG_P (operands[1])
15850 && REGNO (operands[0]) == REGNO (operands[1])
15851 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15852 || (REG_P (operands[2])
15853 && REGNO (operands[0]) == REGNO (operands[2])
15854 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15855 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15856 ; /* ok */
15857 else
15858 gcc_assert (is_sse);
15859 #endif
15860
15861 switch (GET_CODE (operands[3]))
15862 {
15863 case PLUS:
15864 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15865 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15866 p = "fiadd";
15867 else
15868 p = "fadd";
15869 ssep = "vadd";
15870 break;
15871
15872 case MINUS:
15873 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15874 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15875 p = "fisub";
15876 else
15877 p = "fsub";
15878 ssep = "vsub";
15879 break;
15880
15881 case MULT:
15882 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15883 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15884 p = "fimul";
15885 else
15886 p = "fmul";
15887 ssep = "vmul";
15888 break;
15889
15890 case DIV:
15891 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15892 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15893 p = "fidiv";
15894 else
15895 p = "fdiv";
15896 ssep = "vdiv";
15897 break;
15898
15899 default:
15900 gcc_unreachable ();
15901 }
15902
15903 if (is_sse)
15904 {
15905 if (TARGET_AVX)
15906 {
15907 strcpy (buf, ssep);
15908 if (GET_MODE (operands[0]) == SFmode)
15909 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15910 else
15911 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15912 }
15913 else
15914 {
15915 strcpy (buf, ssep + 1);
15916 if (GET_MODE (operands[0]) == SFmode)
15917 strcat (buf, "ss\t{%2, %0|%0, %2}");
15918 else
15919 strcat (buf, "sd\t{%2, %0|%0, %2}");
15920 }
15921 return buf;
15922 }
15923 strcpy (buf, p);
15924
15925 switch (GET_CODE (operands[3]))
15926 {
15927 case MULT:
15928 case PLUS:
15929 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15930 {
15931 rtx temp = operands[2];
15932 operands[2] = operands[1];
15933 operands[1] = temp;
15934 }
15935
15936 /* know operands[0] == operands[1]. */
15937
15938 if (MEM_P (operands[2]))
15939 {
15940 p = "%Z2\t%2";
15941 break;
15942 }
15943
15944 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15945 {
15946 if (STACK_TOP_P (operands[0]))
15947 /* How is it that we are storing to a dead operand[2]?
15948 Well, presumably operands[1] is dead too. We can't
15949 store the result to st(0) as st(0) gets popped on this
15950 instruction. Instead store to operands[2] (which I
15951 think has to be st(1)). st(1) will be popped later.
15952 gcc <= 2.8.1 didn't have this check and generated
15953 assembly code that the Unixware assembler rejected. */
15954 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15955 else
15956 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15957 break;
15958 }
15959
15960 if (STACK_TOP_P (operands[0]))
15961 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15962 else
15963 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15964 break;
15965
15966 case MINUS:
15967 case DIV:
15968 if (MEM_P (operands[1]))
15969 {
15970 p = "r%Z1\t%1";
15971 break;
15972 }
15973
15974 if (MEM_P (operands[2]))
15975 {
15976 p = "%Z2\t%2";
15977 break;
15978 }
15979
15980 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15981 {
15982 #if SYSV386_COMPAT
15983 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15984 derived assemblers, confusingly reverse the direction of
15985 the operation for fsub{r} and fdiv{r} when the
15986 destination register is not st(0). The Intel assembler
15987 doesn't have this brain damage. Read !SYSV386_COMPAT to
15988 figure out what the hardware really does. */
15989 if (STACK_TOP_P (operands[0]))
15990 p = "{p\t%0, %2|rp\t%2, %0}";
15991 else
15992 p = "{rp\t%2, %0|p\t%0, %2}";
15993 #else
15994 if (STACK_TOP_P (operands[0]))
15995 /* As above for fmul/fadd, we can't store to st(0). */
15996 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15997 else
15998 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15999 #endif
16000 break;
16001 }
16002
16003 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16004 {
16005 #if SYSV386_COMPAT
16006 if (STACK_TOP_P (operands[0]))
16007 p = "{rp\t%0, %1|p\t%1, %0}";
16008 else
16009 p = "{p\t%1, %0|rp\t%0, %1}";
16010 #else
16011 if (STACK_TOP_P (operands[0]))
16012 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16013 else
16014 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16015 #endif
16016 break;
16017 }
16018
16019 if (STACK_TOP_P (operands[0]))
16020 {
16021 if (STACK_TOP_P (operands[1]))
16022 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16023 else
16024 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16025 break;
16026 }
16027 else if (STACK_TOP_P (operands[1]))
16028 {
16029 #if SYSV386_COMPAT
16030 p = "{\t%1, %0|r\t%0, %1}";
16031 #else
16032 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16033 #endif
16034 }
16035 else
16036 {
16037 #if SYSV386_COMPAT
16038 p = "{r\t%2, %0|\t%0, %2}";
16039 #else
16040 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16041 #endif
16042 }
16043 break;
16044
16045 default:
16046 gcc_unreachable ();
16047 }
16048
16049 strcat (buf, p);
16050 return buf;
16051 }
16052
16053 /* Check if a 256bit AVX register is referenced inside of EXP. */
16054
16055 static int
16056 ix86_check_avx256_register (rtx *pexp, void *)
16057 {
16058 rtx exp = *pexp;
16059
16060 if (GET_CODE (exp) == SUBREG)
16061 exp = SUBREG_REG (exp);
16062
16063 if (REG_P (exp)
16064 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16065 return 1;
16066
16067 return 0;
16068 }
16069
16070 /* Return needed mode for entity in optimize_mode_switching pass. */
16071
16072 static int
16073 ix86_avx_u128_mode_needed (rtx_insn *insn)
16074 {
16075 if (CALL_P (insn))
16076 {
16077 rtx link;
16078
16079 /* Needed mode is set to AVX_U128_CLEAN if there are
16080 no 256bit modes used in function arguments. */
16081 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16082 link;
16083 link = XEXP (link, 1))
16084 {
16085 if (GET_CODE (XEXP (link, 0)) == USE)
16086 {
16087 rtx arg = XEXP (XEXP (link, 0), 0);
16088
16089 if (ix86_check_avx256_register (&arg, NULL))
16090 return AVX_U128_DIRTY;
16091 }
16092 }
16093
16094 return AVX_U128_CLEAN;
16095 }
16096
16097 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16098 changes state only when a 256bit register is written to, but we need
16099 to prevent the compiler from moving optimal insertion point above
16100 eventual read from 256bit register. */
16101 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16102 return AVX_U128_DIRTY;
16103
16104 return AVX_U128_ANY;
16105 }
16106
16107 /* Return mode that i387 must be switched into
16108 prior to the execution of insn. */
16109
16110 static int
16111 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16112 {
16113 enum attr_i387_cw mode;
16114
16115 /* The mode UNINITIALIZED is used to store control word after a
16116 function call or ASM pattern. The mode ANY specify that function
16117 has no requirements on the control word and make no changes in the
16118 bits we are interested in. */
16119
16120 if (CALL_P (insn)
16121 || (NONJUMP_INSN_P (insn)
16122 && (asm_noperands (PATTERN (insn)) >= 0
16123 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16124 return I387_CW_UNINITIALIZED;
16125
16126 if (recog_memoized (insn) < 0)
16127 return I387_CW_ANY;
16128
16129 mode = get_attr_i387_cw (insn);
16130
16131 switch (entity)
16132 {
16133 case I387_TRUNC:
16134 if (mode == I387_CW_TRUNC)
16135 return mode;
16136 break;
16137
16138 case I387_FLOOR:
16139 if (mode == I387_CW_FLOOR)
16140 return mode;
16141 break;
16142
16143 case I387_CEIL:
16144 if (mode == I387_CW_CEIL)
16145 return mode;
16146 break;
16147
16148 case I387_MASK_PM:
16149 if (mode == I387_CW_MASK_PM)
16150 return mode;
16151 break;
16152
16153 default:
16154 gcc_unreachable ();
16155 }
16156
16157 return I387_CW_ANY;
16158 }
16159
16160 /* Return mode that entity must be switched into
16161 prior to the execution of insn. */
16162
16163 static int
16164 ix86_mode_needed (int entity, rtx_insn *insn)
16165 {
16166 switch (entity)
16167 {
16168 case AVX_U128:
16169 return ix86_avx_u128_mode_needed (insn);
16170 case I387_TRUNC:
16171 case I387_FLOOR:
16172 case I387_CEIL:
16173 case I387_MASK_PM:
16174 return ix86_i387_mode_needed (entity, insn);
16175 default:
16176 gcc_unreachable ();
16177 }
16178 return 0;
16179 }
16180
16181 /* Check if a 256bit AVX register is referenced in stores. */
16182
16183 static void
16184 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16185 {
16186 if (ix86_check_avx256_register (&dest, NULL))
16187 {
16188 bool *used = (bool *) data;
16189 *used = true;
16190 }
16191 }
16192
16193 /* Calculate mode of upper 128bit AVX registers after the insn. */
16194
16195 static int
16196 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16197 {
16198 rtx pat = PATTERN (insn);
16199
16200 if (vzeroupper_operation (pat, VOIDmode)
16201 || vzeroall_operation (pat, VOIDmode))
16202 return AVX_U128_CLEAN;
16203
16204 /* We know that state is clean after CALL insn if there are no
16205 256bit registers used in the function return register. */
16206 if (CALL_P (insn))
16207 {
16208 bool avx_reg256_found = false;
16209 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16210
16211 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16212 }
16213
16214 /* Otherwise, return current mode. Remember that if insn
16215 references AVX 256bit registers, the mode was already changed
16216 to DIRTY from MODE_NEEDED. */
16217 return mode;
16218 }
16219
16220 /* Return the mode that an insn results in. */
16221
16222 int
16223 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16224 {
16225 switch (entity)
16226 {
16227 case AVX_U128:
16228 return ix86_avx_u128_mode_after (mode, insn);
16229 case I387_TRUNC:
16230 case I387_FLOOR:
16231 case I387_CEIL:
16232 case I387_MASK_PM:
16233 return mode;
16234 default:
16235 gcc_unreachable ();
16236 }
16237 }
16238
16239 static int
16240 ix86_avx_u128_mode_entry (void)
16241 {
16242 tree arg;
16243
16244 /* Entry mode is set to AVX_U128_DIRTY if there are
16245 256bit modes used in function arguments. */
16246 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16247 arg = TREE_CHAIN (arg))
16248 {
16249 rtx incoming = DECL_INCOMING_RTL (arg);
16250
16251 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16252 return AVX_U128_DIRTY;
16253 }
16254
16255 return AVX_U128_CLEAN;
16256 }
16257
16258 /* Return a mode that ENTITY is assumed to be
16259 switched to at function entry. */
16260
16261 static int
16262 ix86_mode_entry (int entity)
16263 {
16264 switch (entity)
16265 {
16266 case AVX_U128:
16267 return ix86_avx_u128_mode_entry ();
16268 case I387_TRUNC:
16269 case I387_FLOOR:
16270 case I387_CEIL:
16271 case I387_MASK_PM:
16272 return I387_CW_ANY;
16273 default:
16274 gcc_unreachable ();
16275 }
16276 }
16277
16278 static int
16279 ix86_avx_u128_mode_exit (void)
16280 {
16281 rtx reg = crtl->return_rtx;
16282
16283 /* Exit mode is set to AVX_U128_DIRTY if there are
16284 256bit modes used in the function return register. */
16285 if (reg && ix86_check_avx256_register (&reg, NULL))
16286 return AVX_U128_DIRTY;
16287
16288 return AVX_U128_CLEAN;
16289 }
16290
16291 /* Return a mode that ENTITY is assumed to be
16292 switched to at function exit. */
16293
16294 static int
16295 ix86_mode_exit (int entity)
16296 {
16297 switch (entity)
16298 {
16299 case AVX_U128:
16300 return ix86_avx_u128_mode_exit ();
16301 case I387_TRUNC:
16302 case I387_FLOOR:
16303 case I387_CEIL:
16304 case I387_MASK_PM:
16305 return I387_CW_ANY;
16306 default:
16307 gcc_unreachable ();
16308 }
16309 }
16310
16311 static int
16312 ix86_mode_priority (int, int n)
16313 {
16314 return n;
16315 }
16316
16317 /* Output code to initialize control word copies used by trunc?f?i and
16318 rounding patterns. CURRENT_MODE is set to current control word,
16319 while NEW_MODE is set to new control word. */
16320
16321 static void
16322 emit_i387_cw_initialization (int mode)
16323 {
16324 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16325 rtx new_mode;
16326
16327 enum ix86_stack_slot slot;
16328
16329 rtx reg = gen_reg_rtx (HImode);
16330
16331 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16332 emit_move_insn (reg, copy_rtx (stored_mode));
16333
16334 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16335 || optimize_insn_for_size_p ())
16336 {
16337 switch (mode)
16338 {
16339 case I387_CW_TRUNC:
16340 /* round toward zero (truncate) */
16341 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16342 slot = SLOT_CW_TRUNC;
16343 break;
16344
16345 case I387_CW_FLOOR:
16346 /* round down toward -oo */
16347 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16348 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16349 slot = SLOT_CW_FLOOR;
16350 break;
16351
16352 case I387_CW_CEIL:
16353 /* round up toward +oo */
16354 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16355 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16356 slot = SLOT_CW_CEIL;
16357 break;
16358
16359 case I387_CW_MASK_PM:
16360 /* mask precision exception for nearbyint() */
16361 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16362 slot = SLOT_CW_MASK_PM;
16363 break;
16364
16365 default:
16366 gcc_unreachable ();
16367 }
16368 }
16369 else
16370 {
16371 switch (mode)
16372 {
16373 case I387_CW_TRUNC:
16374 /* round toward zero (truncate) */
16375 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16376 slot = SLOT_CW_TRUNC;
16377 break;
16378
16379 case I387_CW_FLOOR:
16380 /* round down toward -oo */
16381 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16382 slot = SLOT_CW_FLOOR;
16383 break;
16384
16385 case I387_CW_CEIL:
16386 /* round up toward +oo */
16387 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16388 slot = SLOT_CW_CEIL;
16389 break;
16390
16391 case I387_CW_MASK_PM:
16392 /* mask precision exception for nearbyint() */
16393 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16394 slot = SLOT_CW_MASK_PM;
16395 break;
16396
16397 default:
16398 gcc_unreachable ();
16399 }
16400 }
16401
16402 gcc_assert (slot < MAX_386_STACK_LOCALS);
16403
16404 new_mode = assign_386_stack_local (HImode, slot);
16405 emit_move_insn (new_mode, reg);
16406 }
16407
16408 /* Emit vzeroupper. */
16409
16410 void
16411 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16412 {
16413 int i;
16414
16415 /* Cancel automatic vzeroupper insertion if there are
16416 live call-saved SSE registers at the insertion point. */
16417
16418 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16419 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16420 return;
16421
16422 if (TARGET_64BIT)
16423 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16424 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16425 return;
16426
16427 emit_insn (gen_avx_vzeroupper ());
16428 }
16429
16430 /* Generate one or more insns to set ENTITY to MODE. */
16431
16432 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16433 is the set of hard registers live at the point where the insn(s)
16434 are to be inserted. */
16435
16436 static void
16437 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16438 HARD_REG_SET regs_live)
16439 {
16440 switch (entity)
16441 {
16442 case AVX_U128:
16443 if (mode == AVX_U128_CLEAN)
16444 ix86_avx_emit_vzeroupper (regs_live);
16445 break;
16446 case I387_TRUNC:
16447 case I387_FLOOR:
16448 case I387_CEIL:
16449 case I387_MASK_PM:
16450 if (mode != I387_CW_ANY
16451 && mode != I387_CW_UNINITIALIZED)
16452 emit_i387_cw_initialization (mode);
16453 break;
16454 default:
16455 gcc_unreachable ();
16456 }
16457 }
16458
16459 /* Output code for INSN to convert a float to a signed int. OPERANDS
16460 are the insn operands. The output may be [HSD]Imode and the input
16461 operand may be [SDX]Fmode. */
16462
16463 const char *
16464 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16465 {
16466 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16467 int dimode_p = GET_MODE (operands[0]) == DImode;
16468 int round_mode = get_attr_i387_cw (insn);
16469
16470 /* Jump through a hoop or two for DImode, since the hardware has no
16471 non-popping instruction. We used to do this a different way, but
16472 that was somewhat fragile and broke with post-reload splitters. */
16473 if ((dimode_p || fisttp) && !stack_top_dies)
16474 output_asm_insn ("fld\t%y1", operands);
16475
16476 gcc_assert (STACK_TOP_P (operands[1]));
16477 gcc_assert (MEM_P (operands[0]));
16478 gcc_assert (GET_MODE (operands[1]) != TFmode);
16479
16480 if (fisttp)
16481 output_asm_insn ("fisttp%Z0\t%0", operands);
16482 else
16483 {
16484 if (round_mode != I387_CW_ANY)
16485 output_asm_insn ("fldcw\t%3", operands);
16486 if (stack_top_dies || dimode_p)
16487 output_asm_insn ("fistp%Z0\t%0", operands);
16488 else
16489 output_asm_insn ("fist%Z0\t%0", operands);
16490 if (round_mode != I387_CW_ANY)
16491 output_asm_insn ("fldcw\t%2", operands);
16492 }
16493
16494 return "";
16495 }
16496
16497 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16498 have the values zero or one, indicates the ffreep insn's operand
16499 from the OPERANDS array. */
16500
16501 static const char *
16502 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16503 {
16504 if (TARGET_USE_FFREEP)
16505 #ifdef HAVE_AS_IX86_FFREEP
16506 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16507 #else
16508 {
16509 static char retval[32];
16510 int regno = REGNO (operands[opno]);
16511
16512 gcc_assert (STACK_REGNO_P (regno));
16513
16514 regno -= FIRST_STACK_REG;
16515
16516 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16517 return retval;
16518 }
16519 #endif
16520
16521 return opno ? "fstp\t%y1" : "fstp\t%y0";
16522 }
16523
16524
16525 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16526 should be used. UNORDERED_P is true when fucom should be used. */
16527
16528 const char *
16529 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16530 {
16531 int stack_top_dies;
16532 rtx cmp_op0, cmp_op1;
16533 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16534
16535 if (eflags_p)
16536 {
16537 cmp_op0 = operands[0];
16538 cmp_op1 = operands[1];
16539 }
16540 else
16541 {
16542 cmp_op0 = operands[1];
16543 cmp_op1 = operands[2];
16544 }
16545
16546 if (is_sse)
16547 {
16548 if (GET_MODE (operands[0]) == SFmode)
16549 if (unordered_p)
16550 return "%vucomiss\t{%1, %0|%0, %1}";
16551 else
16552 return "%vcomiss\t{%1, %0|%0, %1}";
16553 else
16554 if (unordered_p)
16555 return "%vucomisd\t{%1, %0|%0, %1}";
16556 else
16557 return "%vcomisd\t{%1, %0|%0, %1}";
16558 }
16559
16560 gcc_assert (STACK_TOP_P (cmp_op0));
16561
16562 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16563
16564 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16565 {
16566 if (stack_top_dies)
16567 {
16568 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16569 return output_387_ffreep (operands, 1);
16570 }
16571 else
16572 return "ftst\n\tfnstsw\t%0";
16573 }
16574
16575 if (STACK_REG_P (cmp_op1)
16576 && stack_top_dies
16577 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16578 && REGNO (cmp_op1) != FIRST_STACK_REG)
16579 {
16580 /* If both the top of the 387 stack dies, and the other operand
16581 is also a stack register that dies, then this must be a
16582 `fcompp' float compare */
16583
16584 if (eflags_p)
16585 {
16586 /* There is no double popping fcomi variant. Fortunately,
16587 eflags is immune from the fstp's cc clobbering. */
16588 if (unordered_p)
16589 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16590 else
16591 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16592 return output_387_ffreep (operands, 0);
16593 }
16594 else
16595 {
16596 if (unordered_p)
16597 return "fucompp\n\tfnstsw\t%0";
16598 else
16599 return "fcompp\n\tfnstsw\t%0";
16600 }
16601 }
16602 else
16603 {
16604 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16605
16606 static const char * const alt[16] =
16607 {
16608 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16609 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16610 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16611 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16612
16613 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16614 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16615 NULL,
16616 NULL,
16617
16618 "fcomi\t{%y1, %0|%0, %y1}",
16619 "fcomip\t{%y1, %0|%0, %y1}",
16620 "fucomi\t{%y1, %0|%0, %y1}",
16621 "fucomip\t{%y1, %0|%0, %y1}",
16622
16623 NULL,
16624 NULL,
16625 NULL,
16626 NULL
16627 };
16628
16629 int mask;
16630 const char *ret;
16631
16632 mask = eflags_p << 3;
16633 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16634 mask |= unordered_p << 1;
16635 mask |= stack_top_dies;
16636
16637 gcc_assert (mask < 16);
16638 ret = alt[mask];
16639 gcc_assert (ret);
16640
16641 return ret;
16642 }
16643 }
16644
16645 void
16646 ix86_output_addr_vec_elt (FILE *file, int value)
16647 {
16648 const char *directive = ASM_LONG;
16649
16650 #ifdef ASM_QUAD
16651 if (TARGET_LP64)
16652 directive = ASM_QUAD;
16653 #else
16654 gcc_assert (!TARGET_64BIT);
16655 #endif
16656
16657 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16658 }
16659
16660 void
16661 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16662 {
16663 const char *directive = ASM_LONG;
16664
16665 #ifdef ASM_QUAD
16666 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16667 directive = ASM_QUAD;
16668 #else
16669 gcc_assert (!TARGET_64BIT);
16670 #endif
16671 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16672 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16673 fprintf (file, "%s%s%d-%s%d\n",
16674 directive, LPREFIX, value, LPREFIX, rel);
16675 else if (HAVE_AS_GOTOFF_IN_DATA)
16676 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16677 #if TARGET_MACHO
16678 else if (TARGET_MACHO)
16679 {
16680 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16681 machopic_output_function_base_name (file);
16682 putc ('\n', file);
16683 }
16684 #endif
16685 else
16686 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16687 GOT_SYMBOL_NAME, LPREFIX, value);
16688 }
16689 \f
16690 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16691 for the target. */
16692
16693 void
16694 ix86_expand_clear (rtx dest)
16695 {
16696 rtx tmp;
16697
16698 /* We play register width games, which are only valid after reload. */
16699 gcc_assert (reload_completed);
16700
16701 /* Avoid HImode and its attendant prefix byte. */
16702 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16703 dest = gen_rtx_REG (SImode, REGNO (dest));
16704 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16705
16706 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16707 {
16708 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16709 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16710 }
16711
16712 emit_insn (tmp);
16713 }
16714
16715 /* X is an unchanging MEM. If it is a constant pool reference, return
16716 the constant pool rtx, else NULL. */
16717
16718 rtx
16719 maybe_get_pool_constant (rtx x)
16720 {
16721 x = ix86_delegitimize_address (XEXP (x, 0));
16722
16723 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16724 return get_pool_constant (x);
16725
16726 return NULL_RTX;
16727 }
16728
16729 void
16730 ix86_expand_move (enum machine_mode mode, rtx operands[])
16731 {
16732 rtx op0, op1;
16733 enum tls_model model;
16734
16735 op0 = operands[0];
16736 op1 = operands[1];
16737
16738 if (GET_CODE (op1) == SYMBOL_REF)
16739 {
16740 rtx tmp;
16741
16742 model = SYMBOL_REF_TLS_MODEL (op1);
16743 if (model)
16744 {
16745 op1 = legitimize_tls_address (op1, model, true);
16746 op1 = force_operand (op1, op0);
16747 if (op1 == op0)
16748 return;
16749 op1 = convert_to_mode (mode, op1, 1);
16750 }
16751 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16752 op1 = tmp;
16753 }
16754 else if (GET_CODE (op1) == CONST
16755 && GET_CODE (XEXP (op1, 0)) == PLUS
16756 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16757 {
16758 rtx addend = XEXP (XEXP (op1, 0), 1);
16759 rtx symbol = XEXP (XEXP (op1, 0), 0);
16760 rtx tmp;
16761
16762 model = SYMBOL_REF_TLS_MODEL (symbol);
16763 if (model)
16764 tmp = legitimize_tls_address (symbol, model, true);
16765 else
16766 tmp = legitimize_pe_coff_symbol (symbol, true);
16767
16768 if (tmp)
16769 {
16770 tmp = force_operand (tmp, NULL);
16771 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16772 op0, 1, OPTAB_DIRECT);
16773 if (tmp == op0)
16774 return;
16775 op1 = convert_to_mode (mode, tmp, 1);
16776 }
16777 }
16778
16779 if ((flag_pic || MACHOPIC_INDIRECT)
16780 && symbolic_operand (op1, mode))
16781 {
16782 if (TARGET_MACHO && !TARGET_64BIT)
16783 {
16784 #if TARGET_MACHO
16785 /* dynamic-no-pic */
16786 if (MACHOPIC_INDIRECT)
16787 {
16788 rtx temp = ((reload_in_progress
16789 || ((op0 && REG_P (op0))
16790 && mode == Pmode))
16791 ? op0 : gen_reg_rtx (Pmode));
16792 op1 = machopic_indirect_data_reference (op1, temp);
16793 if (MACHOPIC_PURE)
16794 op1 = machopic_legitimize_pic_address (op1, mode,
16795 temp == op1 ? 0 : temp);
16796 }
16797 if (op0 != op1 && GET_CODE (op0) != MEM)
16798 {
16799 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16800 emit_insn (insn);
16801 return;
16802 }
16803 if (GET_CODE (op0) == MEM)
16804 op1 = force_reg (Pmode, op1);
16805 else
16806 {
16807 rtx temp = op0;
16808 if (GET_CODE (temp) != REG)
16809 temp = gen_reg_rtx (Pmode);
16810 temp = legitimize_pic_address (op1, temp);
16811 if (temp == op0)
16812 return;
16813 op1 = temp;
16814 }
16815 /* dynamic-no-pic */
16816 #endif
16817 }
16818 else
16819 {
16820 if (MEM_P (op0))
16821 op1 = force_reg (mode, op1);
16822 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16823 {
16824 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16825 op1 = legitimize_pic_address (op1, reg);
16826 if (op0 == op1)
16827 return;
16828 op1 = convert_to_mode (mode, op1, 1);
16829 }
16830 }
16831 }
16832 else
16833 {
16834 if (MEM_P (op0)
16835 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16836 || !push_operand (op0, mode))
16837 && MEM_P (op1))
16838 op1 = force_reg (mode, op1);
16839
16840 if (push_operand (op0, mode)
16841 && ! general_no_elim_operand (op1, mode))
16842 op1 = copy_to_mode_reg (mode, op1);
16843
16844 /* Force large constants in 64bit compilation into register
16845 to get them CSEed. */
16846 if (can_create_pseudo_p ()
16847 && (mode == DImode) && TARGET_64BIT
16848 && immediate_operand (op1, mode)
16849 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16850 && !register_operand (op0, mode)
16851 && optimize)
16852 op1 = copy_to_mode_reg (mode, op1);
16853
16854 if (can_create_pseudo_p ()
16855 && FLOAT_MODE_P (mode)
16856 && GET_CODE (op1) == CONST_DOUBLE)
16857 {
16858 /* If we are loading a floating point constant to a register,
16859 force the value to memory now, since we'll get better code
16860 out the back end. */
16861
16862 op1 = validize_mem (force_const_mem (mode, op1));
16863 if (!register_operand (op0, mode))
16864 {
16865 rtx temp = gen_reg_rtx (mode);
16866 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16867 emit_move_insn (op0, temp);
16868 return;
16869 }
16870 }
16871 }
16872
16873 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16874 }
16875
16876 void
16877 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16878 {
16879 rtx op0 = operands[0], op1 = operands[1];
16880 unsigned int align = GET_MODE_ALIGNMENT (mode);
16881
16882 if (push_operand (op0, VOIDmode))
16883 op0 = emit_move_resolve_push (mode, op0);
16884
16885 /* Force constants other than zero into memory. We do not know how
16886 the instructions used to build constants modify the upper 64 bits
16887 of the register, once we have that information we may be able
16888 to handle some of them more efficiently. */
16889 if (can_create_pseudo_p ()
16890 && register_operand (op0, mode)
16891 && (CONSTANT_P (op1)
16892 || (GET_CODE (op1) == SUBREG
16893 && CONSTANT_P (SUBREG_REG (op1))))
16894 && !standard_sse_constant_p (op1))
16895 op1 = validize_mem (force_const_mem (mode, op1));
16896
16897 /* We need to check memory alignment for SSE mode since attribute
16898 can make operands unaligned. */
16899 if (can_create_pseudo_p ()
16900 && SSE_REG_MODE_P (mode)
16901 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16902 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16903 {
16904 rtx tmp[2];
16905
16906 /* ix86_expand_vector_move_misalign() does not like constants ... */
16907 if (CONSTANT_P (op1)
16908 || (GET_CODE (op1) == SUBREG
16909 && CONSTANT_P (SUBREG_REG (op1))))
16910 op1 = validize_mem (force_const_mem (mode, op1));
16911
16912 /* ... nor both arguments in memory. */
16913 if (!register_operand (op0, mode)
16914 && !register_operand (op1, mode))
16915 op1 = force_reg (mode, op1);
16916
16917 tmp[0] = op0; tmp[1] = op1;
16918 ix86_expand_vector_move_misalign (mode, tmp);
16919 return;
16920 }
16921
16922 /* Make operand1 a register if it isn't already. */
16923 if (can_create_pseudo_p ()
16924 && !register_operand (op0, mode)
16925 && !register_operand (op1, mode))
16926 {
16927 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16928 return;
16929 }
16930
16931 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16932 }
16933
16934 /* Split 32-byte AVX unaligned load and store if needed. */
16935
16936 static void
16937 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16938 {
16939 rtx m;
16940 rtx (*extract) (rtx, rtx, rtx);
16941 rtx (*load_unaligned) (rtx, rtx);
16942 rtx (*store_unaligned) (rtx, rtx);
16943 enum machine_mode mode;
16944
16945 switch (GET_MODE (op0))
16946 {
16947 default:
16948 gcc_unreachable ();
16949 case V32QImode:
16950 extract = gen_avx_vextractf128v32qi;
16951 load_unaligned = gen_avx_loaddquv32qi;
16952 store_unaligned = gen_avx_storedquv32qi;
16953 mode = V16QImode;
16954 break;
16955 case V8SFmode:
16956 extract = gen_avx_vextractf128v8sf;
16957 load_unaligned = gen_avx_loadups256;
16958 store_unaligned = gen_avx_storeups256;
16959 mode = V4SFmode;
16960 break;
16961 case V4DFmode:
16962 extract = gen_avx_vextractf128v4df;
16963 load_unaligned = gen_avx_loadupd256;
16964 store_unaligned = gen_avx_storeupd256;
16965 mode = V2DFmode;
16966 break;
16967 }
16968
16969 if (MEM_P (op1))
16970 {
16971 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16972 {
16973 rtx r = gen_reg_rtx (mode);
16974 m = adjust_address (op1, mode, 0);
16975 emit_move_insn (r, m);
16976 m = adjust_address (op1, mode, 16);
16977 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16978 emit_move_insn (op0, r);
16979 }
16980 /* Normal *mov<mode>_internal pattern will handle
16981 unaligned loads just fine if misaligned_operand
16982 is true, and without the UNSPEC it can be combined
16983 with arithmetic instructions. */
16984 else if (misaligned_operand (op1, GET_MODE (op1)))
16985 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16986 else
16987 emit_insn (load_unaligned (op0, op1));
16988 }
16989 else if (MEM_P (op0))
16990 {
16991 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16992 {
16993 m = adjust_address (op0, mode, 0);
16994 emit_insn (extract (m, op1, const0_rtx));
16995 m = adjust_address (op0, mode, 16);
16996 emit_insn (extract (m, op1, const1_rtx));
16997 }
16998 else
16999 emit_insn (store_unaligned (op0, op1));
17000 }
17001 else
17002 gcc_unreachable ();
17003 }
17004
17005 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17006 straight to ix86_expand_vector_move. */
17007 /* Code generation for scalar reg-reg moves of single and double precision data:
17008 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17009 movaps reg, reg
17010 else
17011 movss reg, reg
17012 if (x86_sse_partial_reg_dependency == true)
17013 movapd reg, reg
17014 else
17015 movsd reg, reg
17016
17017 Code generation for scalar loads of double precision data:
17018 if (x86_sse_split_regs == true)
17019 movlpd mem, reg (gas syntax)
17020 else
17021 movsd mem, reg
17022
17023 Code generation for unaligned packed loads of single precision data
17024 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17025 if (x86_sse_unaligned_move_optimal)
17026 movups mem, reg
17027
17028 if (x86_sse_partial_reg_dependency == true)
17029 {
17030 xorps reg, reg
17031 movlps mem, reg
17032 movhps mem+8, reg
17033 }
17034 else
17035 {
17036 movlps mem, reg
17037 movhps mem+8, reg
17038 }
17039
17040 Code generation for unaligned packed loads of double precision data
17041 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17042 if (x86_sse_unaligned_move_optimal)
17043 movupd mem, reg
17044
17045 if (x86_sse_split_regs == true)
17046 {
17047 movlpd mem, reg
17048 movhpd mem+8, reg
17049 }
17050 else
17051 {
17052 movsd mem, reg
17053 movhpd mem+8, reg
17054 }
17055 */
17056
17057 void
17058 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17059 {
17060 rtx op0, op1, orig_op0 = NULL_RTX, m;
17061 rtx (*load_unaligned) (rtx, rtx);
17062 rtx (*store_unaligned) (rtx, rtx);
17063
17064 op0 = operands[0];
17065 op1 = operands[1];
17066
17067 if (GET_MODE_SIZE (mode) == 64)
17068 {
17069 switch (GET_MODE_CLASS (mode))
17070 {
17071 case MODE_VECTOR_INT:
17072 case MODE_INT:
17073 if (GET_MODE (op0) != V16SImode)
17074 {
17075 if (!MEM_P (op0))
17076 {
17077 orig_op0 = op0;
17078 op0 = gen_reg_rtx (V16SImode);
17079 }
17080 else
17081 op0 = gen_lowpart (V16SImode, op0);
17082 }
17083 op1 = gen_lowpart (V16SImode, op1);
17084 /* FALLTHRU */
17085
17086 case MODE_VECTOR_FLOAT:
17087 switch (GET_MODE (op0))
17088 {
17089 default:
17090 gcc_unreachable ();
17091 case V16SImode:
17092 load_unaligned = gen_avx512f_loaddquv16si;
17093 store_unaligned = gen_avx512f_storedquv16si;
17094 break;
17095 case V16SFmode:
17096 load_unaligned = gen_avx512f_loadups512;
17097 store_unaligned = gen_avx512f_storeups512;
17098 break;
17099 case V8DFmode:
17100 load_unaligned = gen_avx512f_loadupd512;
17101 store_unaligned = gen_avx512f_storeupd512;
17102 break;
17103 }
17104
17105 if (MEM_P (op1))
17106 emit_insn (load_unaligned (op0, op1));
17107 else if (MEM_P (op0))
17108 emit_insn (store_unaligned (op0, op1));
17109 else
17110 gcc_unreachable ();
17111 if (orig_op0)
17112 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17113 break;
17114
17115 default:
17116 gcc_unreachable ();
17117 }
17118
17119 return;
17120 }
17121
17122 if (TARGET_AVX
17123 && GET_MODE_SIZE (mode) == 32)
17124 {
17125 switch (GET_MODE_CLASS (mode))
17126 {
17127 case MODE_VECTOR_INT:
17128 case MODE_INT:
17129 if (GET_MODE (op0) != V32QImode)
17130 {
17131 if (!MEM_P (op0))
17132 {
17133 orig_op0 = op0;
17134 op0 = gen_reg_rtx (V32QImode);
17135 }
17136 else
17137 op0 = gen_lowpart (V32QImode, op0);
17138 }
17139 op1 = gen_lowpart (V32QImode, op1);
17140 /* FALLTHRU */
17141
17142 case MODE_VECTOR_FLOAT:
17143 ix86_avx256_split_vector_move_misalign (op0, op1);
17144 if (orig_op0)
17145 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17146 break;
17147
17148 default:
17149 gcc_unreachable ();
17150 }
17151
17152 return;
17153 }
17154
17155 if (MEM_P (op1))
17156 {
17157 /* Normal *mov<mode>_internal pattern will handle
17158 unaligned loads just fine if misaligned_operand
17159 is true, and without the UNSPEC it can be combined
17160 with arithmetic instructions. */
17161 if (TARGET_AVX
17162 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17163 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17164 && misaligned_operand (op1, GET_MODE (op1)))
17165 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17166 /* ??? If we have typed data, then it would appear that using
17167 movdqu is the only way to get unaligned data loaded with
17168 integer type. */
17169 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17170 {
17171 if (GET_MODE (op0) != V16QImode)
17172 {
17173 orig_op0 = op0;
17174 op0 = gen_reg_rtx (V16QImode);
17175 }
17176 op1 = gen_lowpart (V16QImode, op1);
17177 /* We will eventually emit movups based on insn attributes. */
17178 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17179 if (orig_op0)
17180 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17181 }
17182 else if (TARGET_SSE2 && mode == V2DFmode)
17183 {
17184 rtx zero;
17185
17186 if (TARGET_AVX
17187 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17188 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17189 || optimize_insn_for_size_p ())
17190 {
17191 /* We will eventually emit movups based on insn attributes. */
17192 emit_insn (gen_sse2_loadupd (op0, op1));
17193 return;
17194 }
17195
17196 /* When SSE registers are split into halves, we can avoid
17197 writing to the top half twice. */
17198 if (TARGET_SSE_SPLIT_REGS)
17199 {
17200 emit_clobber (op0);
17201 zero = op0;
17202 }
17203 else
17204 {
17205 /* ??? Not sure about the best option for the Intel chips.
17206 The following would seem to satisfy; the register is
17207 entirely cleared, breaking the dependency chain. We
17208 then store to the upper half, with a dependency depth
17209 of one. A rumor has it that Intel recommends two movsd
17210 followed by an unpacklpd, but this is unconfirmed. And
17211 given that the dependency depth of the unpacklpd would
17212 still be one, I'm not sure why this would be better. */
17213 zero = CONST0_RTX (V2DFmode);
17214 }
17215
17216 m = adjust_address (op1, DFmode, 0);
17217 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17218 m = adjust_address (op1, DFmode, 8);
17219 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17220 }
17221 else
17222 {
17223 rtx t;
17224
17225 if (TARGET_AVX
17226 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17227 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17228 || optimize_insn_for_size_p ())
17229 {
17230 if (GET_MODE (op0) != V4SFmode)
17231 {
17232 orig_op0 = op0;
17233 op0 = gen_reg_rtx (V4SFmode);
17234 }
17235 op1 = gen_lowpart (V4SFmode, op1);
17236 emit_insn (gen_sse_loadups (op0, op1));
17237 if (orig_op0)
17238 emit_move_insn (orig_op0,
17239 gen_lowpart (GET_MODE (orig_op0), op0));
17240 return;
17241 }
17242
17243 if (mode != V4SFmode)
17244 t = gen_reg_rtx (V4SFmode);
17245 else
17246 t = op0;
17247
17248 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17249 emit_move_insn (t, CONST0_RTX (V4SFmode));
17250 else
17251 emit_clobber (t);
17252
17253 m = adjust_address (op1, V2SFmode, 0);
17254 emit_insn (gen_sse_loadlps (t, t, m));
17255 m = adjust_address (op1, V2SFmode, 8);
17256 emit_insn (gen_sse_loadhps (t, t, m));
17257 if (mode != V4SFmode)
17258 emit_move_insn (op0, gen_lowpart (mode, t));
17259 }
17260 }
17261 else if (MEM_P (op0))
17262 {
17263 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17264 {
17265 op0 = gen_lowpart (V16QImode, op0);
17266 op1 = gen_lowpart (V16QImode, op1);
17267 /* We will eventually emit movups based on insn attributes. */
17268 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17269 }
17270 else if (TARGET_SSE2 && mode == V2DFmode)
17271 {
17272 if (TARGET_AVX
17273 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17274 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17275 || optimize_insn_for_size_p ())
17276 /* We will eventually emit movups based on insn attributes. */
17277 emit_insn (gen_sse2_storeupd (op0, op1));
17278 else
17279 {
17280 m = adjust_address (op0, DFmode, 0);
17281 emit_insn (gen_sse2_storelpd (m, op1));
17282 m = adjust_address (op0, DFmode, 8);
17283 emit_insn (gen_sse2_storehpd (m, op1));
17284 }
17285 }
17286 else
17287 {
17288 if (mode != V4SFmode)
17289 op1 = gen_lowpart (V4SFmode, op1);
17290
17291 if (TARGET_AVX
17292 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17293 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17294 || optimize_insn_for_size_p ())
17295 {
17296 op0 = gen_lowpart (V4SFmode, op0);
17297 emit_insn (gen_sse_storeups (op0, op1));
17298 }
17299 else
17300 {
17301 m = adjust_address (op0, V2SFmode, 0);
17302 emit_insn (gen_sse_storelps (m, op1));
17303 m = adjust_address (op0, V2SFmode, 8);
17304 emit_insn (gen_sse_storehps (m, op1));
17305 }
17306 }
17307 }
17308 else
17309 gcc_unreachable ();
17310 }
17311
17312 /* Helper function of ix86_fixup_binary_operands to canonicalize
17313 operand order. Returns true if the operands should be swapped. */
17314
17315 static bool
17316 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17317 rtx operands[])
17318 {
17319 rtx dst = operands[0];
17320 rtx src1 = operands[1];
17321 rtx src2 = operands[2];
17322
17323 /* If the operation is not commutative, we can't do anything. */
17324 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17325 return false;
17326
17327 /* Highest priority is that src1 should match dst. */
17328 if (rtx_equal_p (dst, src1))
17329 return false;
17330 if (rtx_equal_p (dst, src2))
17331 return true;
17332
17333 /* Next highest priority is that immediate constants come second. */
17334 if (immediate_operand (src2, mode))
17335 return false;
17336 if (immediate_operand (src1, mode))
17337 return true;
17338
17339 /* Lowest priority is that memory references should come second. */
17340 if (MEM_P (src2))
17341 return false;
17342 if (MEM_P (src1))
17343 return true;
17344
17345 return false;
17346 }
17347
17348
17349 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17350 destination to use for the operation. If different from the true
17351 destination in operands[0], a copy operation will be required. */
17352
17353 rtx
17354 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17355 rtx operands[])
17356 {
17357 rtx dst = operands[0];
17358 rtx src1 = operands[1];
17359 rtx src2 = operands[2];
17360
17361 /* Canonicalize operand order. */
17362 if (ix86_swap_binary_operands_p (code, mode, operands))
17363 {
17364 rtx temp;
17365
17366 /* It is invalid to swap operands of different modes. */
17367 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17368
17369 temp = src1;
17370 src1 = src2;
17371 src2 = temp;
17372 }
17373
17374 /* Both source operands cannot be in memory. */
17375 if (MEM_P (src1) && MEM_P (src2))
17376 {
17377 /* Optimization: Only read from memory once. */
17378 if (rtx_equal_p (src1, src2))
17379 {
17380 src2 = force_reg (mode, src2);
17381 src1 = src2;
17382 }
17383 else if (rtx_equal_p (dst, src1))
17384 src2 = force_reg (mode, src2);
17385 else
17386 src1 = force_reg (mode, src1);
17387 }
17388
17389 /* If the destination is memory, and we do not have matching source
17390 operands, do things in registers. */
17391 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17392 dst = gen_reg_rtx (mode);
17393
17394 /* Source 1 cannot be a constant. */
17395 if (CONSTANT_P (src1))
17396 src1 = force_reg (mode, src1);
17397
17398 /* Source 1 cannot be a non-matching memory. */
17399 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17400 src1 = force_reg (mode, src1);
17401
17402 /* Improve address combine. */
17403 if (code == PLUS
17404 && GET_MODE_CLASS (mode) == MODE_INT
17405 && MEM_P (src2))
17406 src2 = force_reg (mode, src2);
17407
17408 operands[1] = src1;
17409 operands[2] = src2;
17410 return dst;
17411 }
17412
17413 /* Similarly, but assume that the destination has already been
17414 set up properly. */
17415
17416 void
17417 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17418 enum machine_mode mode, rtx operands[])
17419 {
17420 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17421 gcc_assert (dst == operands[0]);
17422 }
17423
17424 /* Attempt to expand a binary operator. Make the expansion closer to the
17425 actual machine, then just general_operand, which will allow 3 separate
17426 memory references (one output, two input) in a single insn. */
17427
17428 void
17429 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17430 rtx operands[])
17431 {
17432 rtx src1, src2, dst, op, clob;
17433
17434 dst = ix86_fixup_binary_operands (code, mode, operands);
17435 src1 = operands[1];
17436 src2 = operands[2];
17437
17438 /* Emit the instruction. */
17439
17440 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17441 if (reload_in_progress)
17442 {
17443 /* Reload doesn't know about the flags register, and doesn't know that
17444 it doesn't want to clobber it. We can only do this with PLUS. */
17445 gcc_assert (code == PLUS);
17446 emit_insn (op);
17447 }
17448 else if (reload_completed
17449 && code == PLUS
17450 && !rtx_equal_p (dst, src1))
17451 {
17452 /* This is going to be an LEA; avoid splitting it later. */
17453 emit_insn (op);
17454 }
17455 else
17456 {
17457 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17458 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17459 }
17460
17461 /* Fix up the destination if needed. */
17462 if (dst != operands[0])
17463 emit_move_insn (operands[0], dst);
17464 }
17465
17466 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17467 the given OPERANDS. */
17468
17469 void
17470 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17471 rtx operands[])
17472 {
17473 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17474 if (GET_CODE (operands[1]) == SUBREG)
17475 {
17476 op1 = operands[1];
17477 op2 = operands[2];
17478 }
17479 else if (GET_CODE (operands[2]) == SUBREG)
17480 {
17481 op1 = operands[2];
17482 op2 = operands[1];
17483 }
17484 /* Optimize (__m128i) d | (__m128i) e and similar code
17485 when d and e are float vectors into float vector logical
17486 insn. In C/C++ without using intrinsics there is no other way
17487 to express vector logical operation on float vectors than
17488 to cast them temporarily to integer vectors. */
17489 if (op1
17490 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17491 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17492 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17493 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17494 && SUBREG_BYTE (op1) == 0
17495 && (GET_CODE (op2) == CONST_VECTOR
17496 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17497 && SUBREG_BYTE (op2) == 0))
17498 && can_create_pseudo_p ())
17499 {
17500 rtx dst;
17501 switch (GET_MODE (SUBREG_REG (op1)))
17502 {
17503 case V4SFmode:
17504 case V8SFmode:
17505 case V2DFmode:
17506 case V4DFmode:
17507 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17508 if (GET_CODE (op2) == CONST_VECTOR)
17509 {
17510 op2 = gen_lowpart (GET_MODE (dst), op2);
17511 op2 = force_reg (GET_MODE (dst), op2);
17512 }
17513 else
17514 {
17515 op1 = operands[1];
17516 op2 = SUBREG_REG (operands[2]);
17517 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17518 op2 = force_reg (GET_MODE (dst), op2);
17519 }
17520 op1 = SUBREG_REG (op1);
17521 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17522 op1 = force_reg (GET_MODE (dst), op1);
17523 emit_insn (gen_rtx_SET (VOIDmode, dst,
17524 gen_rtx_fmt_ee (code, GET_MODE (dst),
17525 op1, op2)));
17526 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17527 return;
17528 default:
17529 break;
17530 }
17531 }
17532 if (!nonimmediate_operand (operands[1], mode))
17533 operands[1] = force_reg (mode, operands[1]);
17534 if (!nonimmediate_operand (operands[2], mode))
17535 operands[2] = force_reg (mode, operands[2]);
17536 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17537 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17538 gen_rtx_fmt_ee (code, mode, operands[1],
17539 operands[2])));
17540 }
17541
17542 /* Return TRUE or FALSE depending on whether the binary operator meets the
17543 appropriate constraints. */
17544
17545 bool
17546 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17547 rtx operands[3])
17548 {
17549 rtx dst = operands[0];
17550 rtx src1 = operands[1];
17551 rtx src2 = operands[2];
17552
17553 /* Both source operands cannot be in memory. */
17554 if (MEM_P (src1) && MEM_P (src2))
17555 return false;
17556
17557 /* Canonicalize operand order for commutative operators. */
17558 if (ix86_swap_binary_operands_p (code, mode, operands))
17559 {
17560 rtx temp = src1;
17561 src1 = src2;
17562 src2 = temp;
17563 }
17564
17565 /* If the destination is memory, we must have a matching source operand. */
17566 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17567 return false;
17568
17569 /* Source 1 cannot be a constant. */
17570 if (CONSTANT_P (src1))
17571 return false;
17572
17573 /* Source 1 cannot be a non-matching memory. */
17574 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17575 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17576 return (code == AND
17577 && (mode == HImode
17578 || mode == SImode
17579 || (TARGET_64BIT && mode == DImode))
17580 && satisfies_constraint_L (src2));
17581
17582 return true;
17583 }
17584
17585 /* Attempt to expand a unary operator. Make the expansion closer to the
17586 actual machine, then just general_operand, which will allow 2 separate
17587 memory references (one output, one input) in a single insn. */
17588
17589 void
17590 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17591 rtx operands[])
17592 {
17593 int matching_memory;
17594 rtx src, dst, op, clob;
17595
17596 dst = operands[0];
17597 src = operands[1];
17598
17599 /* If the destination is memory, and we do not have matching source
17600 operands, do things in registers. */
17601 matching_memory = 0;
17602 if (MEM_P (dst))
17603 {
17604 if (rtx_equal_p (dst, src))
17605 matching_memory = 1;
17606 else
17607 dst = gen_reg_rtx (mode);
17608 }
17609
17610 /* When source operand is memory, destination must match. */
17611 if (MEM_P (src) && !matching_memory)
17612 src = force_reg (mode, src);
17613
17614 /* Emit the instruction. */
17615
17616 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17617 if (reload_in_progress || code == NOT)
17618 {
17619 /* Reload doesn't know about the flags register, and doesn't know that
17620 it doesn't want to clobber it. */
17621 gcc_assert (code == NOT);
17622 emit_insn (op);
17623 }
17624 else
17625 {
17626 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17627 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17628 }
17629
17630 /* Fix up the destination if needed. */
17631 if (dst != operands[0])
17632 emit_move_insn (operands[0], dst);
17633 }
17634
17635 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17636 divisor are within the range [0-255]. */
17637
17638 void
17639 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17640 bool signed_p)
17641 {
17642 rtx_code_label *end_label, *qimode_label;
17643 rtx insn, div, mod;
17644 rtx scratch, tmp0, tmp1, tmp2;
17645 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17646 rtx (*gen_zero_extend) (rtx, rtx);
17647 rtx (*gen_test_ccno_1) (rtx, rtx);
17648
17649 switch (mode)
17650 {
17651 case SImode:
17652 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17653 gen_test_ccno_1 = gen_testsi_ccno_1;
17654 gen_zero_extend = gen_zero_extendqisi2;
17655 break;
17656 case DImode:
17657 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17658 gen_test_ccno_1 = gen_testdi_ccno_1;
17659 gen_zero_extend = gen_zero_extendqidi2;
17660 break;
17661 default:
17662 gcc_unreachable ();
17663 }
17664
17665 end_label = gen_label_rtx ();
17666 qimode_label = gen_label_rtx ();
17667
17668 scratch = gen_reg_rtx (mode);
17669
17670 /* Use 8bit unsigned divimod if dividend and divisor are within
17671 the range [0-255]. */
17672 emit_move_insn (scratch, operands[2]);
17673 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17674 scratch, 1, OPTAB_DIRECT);
17675 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17676 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17677 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17678 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17679 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17680 pc_rtx);
17681 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17682 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17683 JUMP_LABEL (insn) = qimode_label;
17684
17685 /* Generate original signed/unsigned divimod. */
17686 div = gen_divmod4_1 (operands[0], operands[1],
17687 operands[2], operands[3]);
17688 emit_insn (div);
17689
17690 /* Branch to the end. */
17691 emit_jump_insn (gen_jump (end_label));
17692 emit_barrier ();
17693
17694 /* Generate 8bit unsigned divide. */
17695 emit_label (qimode_label);
17696 /* Don't use operands[0] for result of 8bit divide since not all
17697 registers support QImode ZERO_EXTRACT. */
17698 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17699 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17700 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17701 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17702
17703 if (signed_p)
17704 {
17705 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17706 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17707 }
17708 else
17709 {
17710 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17711 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17712 }
17713
17714 /* Extract remainder from AH. */
17715 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17716 if (REG_P (operands[1]))
17717 insn = emit_move_insn (operands[1], tmp1);
17718 else
17719 {
17720 /* Need a new scratch register since the old one has result
17721 of 8bit divide. */
17722 scratch = gen_reg_rtx (mode);
17723 emit_move_insn (scratch, tmp1);
17724 insn = emit_move_insn (operands[1], scratch);
17725 }
17726 set_unique_reg_note (insn, REG_EQUAL, mod);
17727
17728 /* Zero extend quotient from AL. */
17729 tmp1 = gen_lowpart (QImode, tmp0);
17730 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17731 set_unique_reg_note (insn, REG_EQUAL, div);
17732
17733 emit_label (end_label);
17734 }
17735
17736 /* Whether it is OK to emit CFI directives when emitting asm code. */
17737
17738 bool
17739 ix86_emit_cfi ()
17740 {
17741 return dwarf2out_do_cfi_asm ();
17742 }
17743
17744 #define LEA_MAX_STALL (3)
17745 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17746
17747 /* Increase given DISTANCE in half-cycles according to
17748 dependencies between PREV and NEXT instructions.
17749 Add 1 half-cycle if there is no dependency and
17750 go to next cycle if there is some dependecy. */
17751
17752 static unsigned int
17753 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17754 {
17755 df_ref def, use;
17756
17757 if (!prev || !next)
17758 return distance + (distance & 1) + 2;
17759
17760 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17761 return distance + 1;
17762
17763 FOR_EACH_INSN_USE (use, next)
17764 FOR_EACH_INSN_DEF (def, prev)
17765 if (!DF_REF_IS_ARTIFICIAL (def)
17766 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17767 return distance + (distance & 1) + 2;
17768
17769 return distance + 1;
17770 }
17771
17772 /* Function checks if instruction INSN defines register number
17773 REGNO1 or REGNO2. */
17774
17775 static bool
17776 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17777 rtx insn)
17778 {
17779 df_ref def;
17780
17781 FOR_EACH_INSN_DEF (def, insn)
17782 if (DF_REF_REG_DEF_P (def)
17783 && !DF_REF_IS_ARTIFICIAL (def)
17784 && (regno1 == DF_REF_REGNO (def)
17785 || regno2 == DF_REF_REGNO (def)))
17786 return true;
17787
17788 return false;
17789 }
17790
17791 /* Function checks if instruction INSN uses register number
17792 REGNO as a part of address expression. */
17793
17794 static bool
17795 insn_uses_reg_mem (unsigned int regno, rtx insn)
17796 {
17797 df_ref use;
17798
17799 FOR_EACH_INSN_USE (use, insn)
17800 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17801 return true;
17802
17803 return false;
17804 }
17805
17806 /* Search backward for non-agu definition of register number REGNO1
17807 or register number REGNO2 in basic block starting from instruction
17808 START up to head of basic block or instruction INSN.
17809
17810 Function puts true value into *FOUND var if definition was found
17811 and false otherwise.
17812
17813 Distance in half-cycles between START and found instruction or head
17814 of BB is added to DISTANCE and returned. */
17815
17816 static int
17817 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17818 rtx_insn *insn, int distance,
17819 rtx_insn *start, bool *found)
17820 {
17821 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17822 rtx_insn *prev = start;
17823 rtx_insn *next = NULL;
17824
17825 *found = false;
17826
17827 while (prev
17828 && prev != insn
17829 && distance < LEA_SEARCH_THRESHOLD)
17830 {
17831 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17832 {
17833 distance = increase_distance (prev, next, distance);
17834 if (insn_defines_reg (regno1, regno2, prev))
17835 {
17836 if (recog_memoized (prev) < 0
17837 || get_attr_type (prev) != TYPE_LEA)
17838 {
17839 *found = true;
17840 return distance;
17841 }
17842 }
17843
17844 next = prev;
17845 }
17846 if (prev == BB_HEAD (bb))
17847 break;
17848
17849 prev = PREV_INSN (prev);
17850 }
17851
17852 return distance;
17853 }
17854
17855 /* Search backward for non-agu definition of register number REGNO1
17856 or register number REGNO2 in INSN's basic block until
17857 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17858 2. Reach neighbour BBs boundary, or
17859 3. Reach agu definition.
17860 Returns the distance between the non-agu definition point and INSN.
17861 If no definition point, returns -1. */
17862
17863 static int
17864 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17865 rtx_insn *insn)
17866 {
17867 basic_block bb = BLOCK_FOR_INSN (insn);
17868 int distance = 0;
17869 bool found = false;
17870
17871 if (insn != BB_HEAD (bb))
17872 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17873 distance, PREV_INSN (insn),
17874 &found);
17875
17876 if (!found && distance < LEA_SEARCH_THRESHOLD)
17877 {
17878 edge e;
17879 edge_iterator ei;
17880 bool simple_loop = false;
17881
17882 FOR_EACH_EDGE (e, ei, bb->preds)
17883 if (e->src == bb)
17884 {
17885 simple_loop = true;
17886 break;
17887 }
17888
17889 if (simple_loop)
17890 distance = distance_non_agu_define_in_bb (regno1, regno2,
17891 insn, distance,
17892 BB_END (bb), &found);
17893 else
17894 {
17895 int shortest_dist = -1;
17896 bool found_in_bb = false;
17897
17898 FOR_EACH_EDGE (e, ei, bb->preds)
17899 {
17900 int bb_dist
17901 = distance_non_agu_define_in_bb (regno1, regno2,
17902 insn, distance,
17903 BB_END (e->src),
17904 &found_in_bb);
17905 if (found_in_bb)
17906 {
17907 if (shortest_dist < 0)
17908 shortest_dist = bb_dist;
17909 else if (bb_dist > 0)
17910 shortest_dist = MIN (bb_dist, shortest_dist);
17911
17912 found = true;
17913 }
17914 }
17915
17916 distance = shortest_dist;
17917 }
17918 }
17919
17920 /* get_attr_type may modify recog data. We want to make sure
17921 that recog data is valid for instruction INSN, on which
17922 distance_non_agu_define is called. INSN is unchanged here. */
17923 extract_insn_cached (insn);
17924
17925 if (!found)
17926 return -1;
17927
17928 return distance >> 1;
17929 }
17930
17931 /* Return the distance in half-cycles between INSN and the next
17932 insn that uses register number REGNO in memory address added
17933 to DISTANCE. Return -1 if REGNO0 is set.
17934
17935 Put true value into *FOUND if register usage was found and
17936 false otherwise.
17937 Put true value into *REDEFINED if register redefinition was
17938 found and false otherwise. */
17939
17940 static int
17941 distance_agu_use_in_bb (unsigned int regno,
17942 rtx_insn *insn, int distance, rtx_insn *start,
17943 bool *found, bool *redefined)
17944 {
17945 basic_block bb = NULL;
17946 rtx_insn *next = start;
17947 rtx_insn *prev = NULL;
17948
17949 *found = false;
17950 *redefined = false;
17951
17952 if (start != NULL_RTX)
17953 {
17954 bb = BLOCK_FOR_INSN (start);
17955 if (start != BB_HEAD (bb))
17956 /* If insn and start belong to the same bb, set prev to insn,
17957 so the call to increase_distance will increase the distance
17958 between insns by 1. */
17959 prev = insn;
17960 }
17961
17962 while (next
17963 && next != insn
17964 && distance < LEA_SEARCH_THRESHOLD)
17965 {
17966 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17967 {
17968 distance = increase_distance(prev, next, distance);
17969 if (insn_uses_reg_mem (regno, next))
17970 {
17971 /* Return DISTANCE if OP0 is used in memory
17972 address in NEXT. */
17973 *found = true;
17974 return distance;
17975 }
17976
17977 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17978 {
17979 /* Return -1 if OP0 is set in NEXT. */
17980 *redefined = true;
17981 return -1;
17982 }
17983
17984 prev = next;
17985 }
17986
17987 if (next == BB_END (bb))
17988 break;
17989
17990 next = NEXT_INSN (next);
17991 }
17992
17993 return distance;
17994 }
17995
17996 /* Return the distance between INSN and the next insn that uses
17997 register number REGNO0 in memory address. Return -1 if no such
17998 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17999
18000 static int
18001 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18002 {
18003 basic_block bb = BLOCK_FOR_INSN (insn);
18004 int distance = 0;
18005 bool found = false;
18006 bool redefined = false;
18007
18008 if (insn != BB_END (bb))
18009 distance = distance_agu_use_in_bb (regno0, insn, distance,
18010 NEXT_INSN (insn),
18011 &found, &redefined);
18012
18013 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18014 {
18015 edge e;
18016 edge_iterator ei;
18017 bool simple_loop = false;
18018
18019 FOR_EACH_EDGE (e, ei, bb->succs)
18020 if (e->dest == bb)
18021 {
18022 simple_loop = true;
18023 break;
18024 }
18025
18026 if (simple_loop)
18027 distance = distance_agu_use_in_bb (regno0, insn,
18028 distance, BB_HEAD (bb),
18029 &found, &redefined);
18030 else
18031 {
18032 int shortest_dist = -1;
18033 bool found_in_bb = false;
18034 bool redefined_in_bb = false;
18035
18036 FOR_EACH_EDGE (e, ei, bb->succs)
18037 {
18038 int bb_dist
18039 = distance_agu_use_in_bb (regno0, insn,
18040 distance, BB_HEAD (e->dest),
18041 &found_in_bb, &redefined_in_bb);
18042 if (found_in_bb)
18043 {
18044 if (shortest_dist < 0)
18045 shortest_dist = bb_dist;
18046 else if (bb_dist > 0)
18047 shortest_dist = MIN (bb_dist, shortest_dist);
18048
18049 found = true;
18050 }
18051 }
18052
18053 distance = shortest_dist;
18054 }
18055 }
18056
18057 if (!found || redefined)
18058 return -1;
18059
18060 return distance >> 1;
18061 }
18062
18063 /* Define this macro to tune LEA priority vs ADD, it take effect when
18064 there is a dilemma of choicing LEA or ADD
18065 Negative value: ADD is more preferred than LEA
18066 Zero: Netrual
18067 Positive value: LEA is more preferred than ADD*/
18068 #define IX86_LEA_PRIORITY 0
18069
18070 /* Return true if usage of lea INSN has performance advantage
18071 over a sequence of instructions. Instructions sequence has
18072 SPLIT_COST cycles higher latency than lea latency. */
18073
18074 static bool
18075 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18076 unsigned int regno2, int split_cost, bool has_scale)
18077 {
18078 int dist_define, dist_use;
18079
18080 /* For Silvermont if using a 2-source or 3-source LEA for
18081 non-destructive destination purposes, or due to wanting
18082 ability to use SCALE, the use of LEA is justified. */
18083 if (TARGET_SILVERMONT || TARGET_INTEL)
18084 {
18085 if (has_scale)
18086 return true;
18087 if (split_cost < 1)
18088 return false;
18089 if (regno0 == regno1 || regno0 == regno2)
18090 return false;
18091 return true;
18092 }
18093
18094 dist_define = distance_non_agu_define (regno1, regno2, insn);
18095 dist_use = distance_agu_use (regno0, insn);
18096
18097 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18098 {
18099 /* If there is no non AGU operand definition, no AGU
18100 operand usage and split cost is 0 then both lea
18101 and non lea variants have same priority. Currently
18102 we prefer lea for 64 bit code and non lea on 32 bit
18103 code. */
18104 if (dist_use < 0 && split_cost == 0)
18105 return TARGET_64BIT || IX86_LEA_PRIORITY;
18106 else
18107 return true;
18108 }
18109
18110 /* With longer definitions distance lea is more preferable.
18111 Here we change it to take into account splitting cost and
18112 lea priority. */
18113 dist_define += split_cost + IX86_LEA_PRIORITY;
18114
18115 /* If there is no use in memory addess then we just check
18116 that split cost exceeds AGU stall. */
18117 if (dist_use < 0)
18118 return dist_define > LEA_MAX_STALL;
18119
18120 /* If this insn has both backward non-agu dependence and forward
18121 agu dependence, the one with short distance takes effect. */
18122 return dist_define >= dist_use;
18123 }
18124
18125 /* Return true if it is legal to clobber flags by INSN and
18126 false otherwise. */
18127
18128 static bool
18129 ix86_ok_to_clobber_flags (rtx_insn *insn)
18130 {
18131 basic_block bb = BLOCK_FOR_INSN (insn);
18132 df_ref use;
18133 bitmap live;
18134
18135 while (insn)
18136 {
18137 if (NONDEBUG_INSN_P (insn))
18138 {
18139 FOR_EACH_INSN_USE (use, insn)
18140 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18141 return false;
18142
18143 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18144 return true;
18145 }
18146
18147 if (insn == BB_END (bb))
18148 break;
18149
18150 insn = NEXT_INSN (insn);
18151 }
18152
18153 live = df_get_live_out(bb);
18154 return !REGNO_REG_SET_P (live, FLAGS_REG);
18155 }
18156
18157 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18158 move and add to avoid AGU stalls. */
18159
18160 bool
18161 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18162 {
18163 unsigned int regno0, regno1, regno2;
18164
18165 /* Check if we need to optimize. */
18166 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18167 return false;
18168
18169 /* Check it is correct to split here. */
18170 if (!ix86_ok_to_clobber_flags(insn))
18171 return false;
18172
18173 regno0 = true_regnum (operands[0]);
18174 regno1 = true_regnum (operands[1]);
18175 regno2 = true_regnum (operands[2]);
18176
18177 /* We need to split only adds with non destructive
18178 destination operand. */
18179 if (regno0 == regno1 || regno0 == regno2)
18180 return false;
18181 else
18182 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18183 }
18184
18185 /* Return true if we should emit lea instruction instead of mov
18186 instruction. */
18187
18188 bool
18189 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18190 {
18191 unsigned int regno0, regno1;
18192
18193 /* Check if we need to optimize. */
18194 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18195 return false;
18196
18197 /* Use lea for reg to reg moves only. */
18198 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18199 return false;
18200
18201 regno0 = true_regnum (operands[0]);
18202 regno1 = true_regnum (operands[1]);
18203
18204 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18205 }
18206
18207 /* Return true if we need to split lea into a sequence of
18208 instructions to avoid AGU stalls. */
18209
18210 bool
18211 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18212 {
18213 unsigned int regno0, regno1, regno2;
18214 int split_cost;
18215 struct ix86_address parts;
18216 int ok;
18217
18218 /* Check we need to optimize. */
18219 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18220 return false;
18221
18222 /* The "at least two components" test below might not catch simple
18223 move or zero extension insns if parts.base is non-NULL and parts.disp
18224 is const0_rtx as the only components in the address, e.g. if the
18225 register is %rbp or %r13. As this test is much cheaper and moves or
18226 zero extensions are the common case, do this check first. */
18227 if (REG_P (operands[1])
18228 || (SImode_address_operand (operands[1], VOIDmode)
18229 && REG_P (XEXP (operands[1], 0))))
18230 return false;
18231
18232 /* Check if it is OK to split here. */
18233 if (!ix86_ok_to_clobber_flags (insn))
18234 return false;
18235
18236 ok = ix86_decompose_address (operands[1], &parts);
18237 gcc_assert (ok);
18238
18239 /* There should be at least two components in the address. */
18240 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18241 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18242 return false;
18243
18244 /* We should not split into add if non legitimate pic
18245 operand is used as displacement. */
18246 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18247 return false;
18248
18249 regno0 = true_regnum (operands[0]) ;
18250 regno1 = INVALID_REGNUM;
18251 regno2 = INVALID_REGNUM;
18252
18253 if (parts.base)
18254 regno1 = true_regnum (parts.base);
18255 if (parts.index)
18256 regno2 = true_regnum (parts.index);
18257
18258 split_cost = 0;
18259
18260 /* Compute how many cycles we will add to execution time
18261 if split lea into a sequence of instructions. */
18262 if (parts.base || parts.index)
18263 {
18264 /* Have to use mov instruction if non desctructive
18265 destination form is used. */
18266 if (regno1 != regno0 && regno2 != regno0)
18267 split_cost += 1;
18268
18269 /* Have to add index to base if both exist. */
18270 if (parts.base && parts.index)
18271 split_cost += 1;
18272
18273 /* Have to use shift and adds if scale is 2 or greater. */
18274 if (parts.scale > 1)
18275 {
18276 if (regno0 != regno1)
18277 split_cost += 1;
18278 else if (regno2 == regno0)
18279 split_cost += 4;
18280 else
18281 split_cost += parts.scale;
18282 }
18283
18284 /* Have to use add instruction with immediate if
18285 disp is non zero. */
18286 if (parts.disp && parts.disp != const0_rtx)
18287 split_cost += 1;
18288
18289 /* Subtract the price of lea. */
18290 split_cost -= 1;
18291 }
18292
18293 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18294 parts.scale > 1);
18295 }
18296
18297 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18298 matches destination. RTX includes clobber of FLAGS_REG. */
18299
18300 static void
18301 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18302 rtx dst, rtx src)
18303 {
18304 rtx op, clob;
18305
18306 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18307 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18308
18309 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18310 }
18311
18312 /* Return true if regno1 def is nearest to the insn. */
18313
18314 static bool
18315 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18316 {
18317 rtx_insn *prev = insn;
18318 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18319
18320 if (insn == start)
18321 return false;
18322 while (prev && prev != start)
18323 {
18324 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18325 {
18326 prev = PREV_INSN (prev);
18327 continue;
18328 }
18329 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18330 return true;
18331 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18332 return false;
18333 prev = PREV_INSN (prev);
18334 }
18335
18336 /* None of the regs is defined in the bb. */
18337 return false;
18338 }
18339
18340 /* Split lea instructions into a sequence of instructions
18341 which are executed on ALU to avoid AGU stalls.
18342 It is assumed that it is allowed to clobber flags register
18343 at lea position. */
18344
18345 void
18346 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18347 {
18348 unsigned int regno0, regno1, regno2;
18349 struct ix86_address parts;
18350 rtx target, tmp;
18351 int ok, adds;
18352
18353 ok = ix86_decompose_address (operands[1], &parts);
18354 gcc_assert (ok);
18355
18356 target = gen_lowpart (mode, operands[0]);
18357
18358 regno0 = true_regnum (target);
18359 regno1 = INVALID_REGNUM;
18360 regno2 = INVALID_REGNUM;
18361
18362 if (parts.base)
18363 {
18364 parts.base = gen_lowpart (mode, parts.base);
18365 regno1 = true_regnum (parts.base);
18366 }
18367
18368 if (parts.index)
18369 {
18370 parts.index = gen_lowpart (mode, parts.index);
18371 regno2 = true_regnum (parts.index);
18372 }
18373
18374 if (parts.disp)
18375 parts.disp = gen_lowpart (mode, parts.disp);
18376
18377 if (parts.scale > 1)
18378 {
18379 /* Case r1 = r1 + ... */
18380 if (regno1 == regno0)
18381 {
18382 /* If we have a case r1 = r1 + C * r2 then we
18383 should use multiplication which is very
18384 expensive. Assume cost model is wrong if we
18385 have such case here. */
18386 gcc_assert (regno2 != regno0);
18387
18388 for (adds = parts.scale; adds > 0; adds--)
18389 ix86_emit_binop (PLUS, mode, target, parts.index);
18390 }
18391 else
18392 {
18393 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18394 if (regno0 != regno2)
18395 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18396
18397 /* Use shift for scaling. */
18398 ix86_emit_binop (ASHIFT, mode, target,
18399 GEN_INT (exact_log2 (parts.scale)));
18400
18401 if (parts.base)
18402 ix86_emit_binop (PLUS, mode, target, parts.base);
18403
18404 if (parts.disp && parts.disp != const0_rtx)
18405 ix86_emit_binop (PLUS, mode, target, parts.disp);
18406 }
18407 }
18408 else if (!parts.base && !parts.index)
18409 {
18410 gcc_assert(parts.disp);
18411 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18412 }
18413 else
18414 {
18415 if (!parts.base)
18416 {
18417 if (regno0 != regno2)
18418 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18419 }
18420 else if (!parts.index)
18421 {
18422 if (regno0 != regno1)
18423 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18424 }
18425 else
18426 {
18427 if (regno0 == regno1)
18428 tmp = parts.index;
18429 else if (regno0 == regno2)
18430 tmp = parts.base;
18431 else
18432 {
18433 rtx tmp1;
18434
18435 /* Find better operand for SET instruction, depending
18436 on which definition is farther from the insn. */
18437 if (find_nearest_reg_def (insn, regno1, regno2))
18438 tmp = parts.index, tmp1 = parts.base;
18439 else
18440 tmp = parts.base, tmp1 = parts.index;
18441
18442 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18443
18444 if (parts.disp && parts.disp != const0_rtx)
18445 ix86_emit_binop (PLUS, mode, target, parts.disp);
18446
18447 ix86_emit_binop (PLUS, mode, target, tmp1);
18448 return;
18449 }
18450
18451 ix86_emit_binop (PLUS, mode, target, tmp);
18452 }
18453
18454 if (parts.disp && parts.disp != const0_rtx)
18455 ix86_emit_binop (PLUS, mode, target, parts.disp);
18456 }
18457 }
18458
18459 /* Return true if it is ok to optimize an ADD operation to LEA
18460 operation to avoid flag register consumation. For most processors,
18461 ADD is faster than LEA. For the processors like BONNELL, if the
18462 destination register of LEA holds an actual address which will be
18463 used soon, LEA is better and otherwise ADD is better. */
18464
18465 bool
18466 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18467 {
18468 unsigned int regno0 = true_regnum (operands[0]);
18469 unsigned int regno1 = true_regnum (operands[1]);
18470 unsigned int regno2 = true_regnum (operands[2]);
18471
18472 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18473 if (regno0 != regno1 && regno0 != regno2)
18474 return true;
18475
18476 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18477 return false;
18478
18479 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18480 }
18481
18482 /* Return true if destination reg of SET_BODY is shift count of
18483 USE_BODY. */
18484
18485 static bool
18486 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18487 {
18488 rtx set_dest;
18489 rtx shift_rtx;
18490 int i;
18491
18492 /* Retrieve destination of SET_BODY. */
18493 switch (GET_CODE (set_body))
18494 {
18495 case SET:
18496 set_dest = SET_DEST (set_body);
18497 if (!set_dest || !REG_P (set_dest))
18498 return false;
18499 break;
18500 case PARALLEL:
18501 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18502 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18503 use_body))
18504 return true;
18505 default:
18506 return false;
18507 break;
18508 }
18509
18510 /* Retrieve shift count of USE_BODY. */
18511 switch (GET_CODE (use_body))
18512 {
18513 case SET:
18514 shift_rtx = XEXP (use_body, 1);
18515 break;
18516 case PARALLEL:
18517 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18518 if (ix86_dep_by_shift_count_body (set_body,
18519 XVECEXP (use_body, 0, i)))
18520 return true;
18521 default:
18522 return false;
18523 break;
18524 }
18525
18526 if (shift_rtx
18527 && (GET_CODE (shift_rtx) == ASHIFT
18528 || GET_CODE (shift_rtx) == LSHIFTRT
18529 || GET_CODE (shift_rtx) == ASHIFTRT
18530 || GET_CODE (shift_rtx) == ROTATE
18531 || GET_CODE (shift_rtx) == ROTATERT))
18532 {
18533 rtx shift_count = XEXP (shift_rtx, 1);
18534
18535 /* Return true if shift count is dest of SET_BODY. */
18536 if (REG_P (shift_count))
18537 {
18538 /* Add check since it can be invoked before register
18539 allocation in pre-reload schedule. */
18540 if (reload_completed
18541 && true_regnum (set_dest) == true_regnum (shift_count))
18542 return true;
18543 else if (REGNO(set_dest) == REGNO(shift_count))
18544 return true;
18545 }
18546 }
18547
18548 return false;
18549 }
18550
18551 /* Return true if destination reg of SET_INSN is shift count of
18552 USE_INSN. */
18553
18554 bool
18555 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18556 {
18557 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18558 PATTERN (use_insn));
18559 }
18560
18561 /* Return TRUE or FALSE depending on whether the unary operator meets the
18562 appropriate constraints. */
18563
18564 bool
18565 ix86_unary_operator_ok (enum rtx_code,
18566 enum machine_mode,
18567 rtx operands[2])
18568 {
18569 /* If one of operands is memory, source and destination must match. */
18570 if ((MEM_P (operands[0])
18571 || MEM_P (operands[1]))
18572 && ! rtx_equal_p (operands[0], operands[1]))
18573 return false;
18574 return true;
18575 }
18576
18577 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18578 are ok, keeping in mind the possible movddup alternative. */
18579
18580 bool
18581 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18582 {
18583 if (MEM_P (operands[0]))
18584 return rtx_equal_p (operands[0], operands[1 + high]);
18585 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18586 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18587 return true;
18588 }
18589
18590 /* Post-reload splitter for converting an SF or DFmode value in an
18591 SSE register into an unsigned SImode. */
18592
18593 void
18594 ix86_split_convert_uns_si_sse (rtx operands[])
18595 {
18596 enum machine_mode vecmode;
18597 rtx value, large, zero_or_two31, input, two31, x;
18598
18599 large = operands[1];
18600 zero_or_two31 = operands[2];
18601 input = operands[3];
18602 two31 = operands[4];
18603 vecmode = GET_MODE (large);
18604 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18605
18606 /* Load up the value into the low element. We must ensure that the other
18607 elements are valid floats -- zero is the easiest such value. */
18608 if (MEM_P (input))
18609 {
18610 if (vecmode == V4SFmode)
18611 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18612 else
18613 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18614 }
18615 else
18616 {
18617 input = gen_rtx_REG (vecmode, REGNO (input));
18618 emit_move_insn (value, CONST0_RTX (vecmode));
18619 if (vecmode == V4SFmode)
18620 emit_insn (gen_sse_movss (value, value, input));
18621 else
18622 emit_insn (gen_sse2_movsd (value, value, input));
18623 }
18624
18625 emit_move_insn (large, two31);
18626 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18627
18628 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18629 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18630
18631 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18632 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18633
18634 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18635 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18636
18637 large = gen_rtx_REG (V4SImode, REGNO (large));
18638 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18639
18640 x = gen_rtx_REG (V4SImode, REGNO (value));
18641 if (vecmode == V4SFmode)
18642 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18643 else
18644 emit_insn (gen_sse2_cvttpd2dq (x, value));
18645 value = x;
18646
18647 emit_insn (gen_xorv4si3 (value, value, large));
18648 }
18649
18650 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18651 Expects the 64-bit DImode to be supplied in a pair of integral
18652 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18653 -mfpmath=sse, !optimize_size only. */
18654
18655 void
18656 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18657 {
18658 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18659 rtx int_xmm, fp_xmm;
18660 rtx biases, exponents;
18661 rtx x;
18662
18663 int_xmm = gen_reg_rtx (V4SImode);
18664 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18665 emit_insn (gen_movdi_to_sse (int_xmm, input));
18666 else if (TARGET_SSE_SPLIT_REGS)
18667 {
18668 emit_clobber (int_xmm);
18669 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18670 }
18671 else
18672 {
18673 x = gen_reg_rtx (V2DImode);
18674 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18675 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18676 }
18677
18678 x = gen_rtx_CONST_VECTOR (V4SImode,
18679 gen_rtvec (4, GEN_INT (0x43300000UL),
18680 GEN_INT (0x45300000UL),
18681 const0_rtx, const0_rtx));
18682 exponents = validize_mem (force_const_mem (V4SImode, x));
18683
18684 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18685 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18686
18687 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18688 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18689 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18690 (0x1.0p84 + double(fp_value_hi_xmm)).
18691 Note these exponents differ by 32. */
18692
18693 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18694
18695 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18696 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18697 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18698 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18699 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18700 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18701 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18702 biases = validize_mem (force_const_mem (V2DFmode, biases));
18703 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18704
18705 /* Add the upper and lower DFmode values together. */
18706 if (TARGET_SSE3)
18707 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18708 else
18709 {
18710 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18711 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18712 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18713 }
18714
18715 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18716 }
18717
18718 /* Not used, but eases macroization of patterns. */
18719 void
18720 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18721 {
18722 gcc_unreachable ();
18723 }
18724
18725 /* Convert an unsigned SImode value into a DFmode. Only currently used
18726 for SSE, but applicable anywhere. */
18727
18728 void
18729 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18730 {
18731 REAL_VALUE_TYPE TWO31r;
18732 rtx x, fp;
18733
18734 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18735 NULL, 1, OPTAB_DIRECT);
18736
18737 fp = gen_reg_rtx (DFmode);
18738 emit_insn (gen_floatsidf2 (fp, x));
18739
18740 real_ldexp (&TWO31r, &dconst1, 31);
18741 x = const_double_from_real_value (TWO31r, DFmode);
18742
18743 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18744 if (x != target)
18745 emit_move_insn (target, x);
18746 }
18747
18748 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18749 32-bit mode; otherwise we have a direct convert instruction. */
18750
18751 void
18752 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18753 {
18754 REAL_VALUE_TYPE TWO32r;
18755 rtx fp_lo, fp_hi, x;
18756
18757 fp_lo = gen_reg_rtx (DFmode);
18758 fp_hi = gen_reg_rtx (DFmode);
18759
18760 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18761
18762 real_ldexp (&TWO32r, &dconst1, 32);
18763 x = const_double_from_real_value (TWO32r, DFmode);
18764 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18765
18766 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18767
18768 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18769 0, OPTAB_DIRECT);
18770 if (x != target)
18771 emit_move_insn (target, x);
18772 }
18773
18774 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18775 For x86_32, -mfpmath=sse, !optimize_size only. */
18776 void
18777 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18778 {
18779 REAL_VALUE_TYPE ONE16r;
18780 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18781
18782 real_ldexp (&ONE16r, &dconst1, 16);
18783 x = const_double_from_real_value (ONE16r, SFmode);
18784 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18785 NULL, 0, OPTAB_DIRECT);
18786 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18787 NULL, 0, OPTAB_DIRECT);
18788 fp_hi = gen_reg_rtx (SFmode);
18789 fp_lo = gen_reg_rtx (SFmode);
18790 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18791 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18792 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18793 0, OPTAB_DIRECT);
18794 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18795 0, OPTAB_DIRECT);
18796 if (!rtx_equal_p (target, fp_hi))
18797 emit_move_insn (target, fp_hi);
18798 }
18799
18800 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18801 a vector of unsigned ints VAL to vector of floats TARGET. */
18802
18803 void
18804 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18805 {
18806 rtx tmp[8];
18807 REAL_VALUE_TYPE TWO16r;
18808 enum machine_mode intmode = GET_MODE (val);
18809 enum machine_mode fltmode = GET_MODE (target);
18810 rtx (*cvt) (rtx, rtx);
18811
18812 if (intmode == V4SImode)
18813 cvt = gen_floatv4siv4sf2;
18814 else
18815 cvt = gen_floatv8siv8sf2;
18816 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18817 tmp[0] = force_reg (intmode, tmp[0]);
18818 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18819 OPTAB_DIRECT);
18820 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18821 NULL_RTX, 1, OPTAB_DIRECT);
18822 tmp[3] = gen_reg_rtx (fltmode);
18823 emit_insn (cvt (tmp[3], tmp[1]));
18824 tmp[4] = gen_reg_rtx (fltmode);
18825 emit_insn (cvt (tmp[4], tmp[2]));
18826 real_ldexp (&TWO16r, &dconst1, 16);
18827 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18828 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18829 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18830 OPTAB_DIRECT);
18831 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18832 OPTAB_DIRECT);
18833 if (tmp[7] != target)
18834 emit_move_insn (target, tmp[7]);
18835 }
18836
18837 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18838 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18839 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18840 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18841
18842 rtx
18843 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18844 {
18845 REAL_VALUE_TYPE TWO31r;
18846 rtx two31r, tmp[4];
18847 enum machine_mode mode = GET_MODE (val);
18848 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18849 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18850 rtx (*cmp) (rtx, rtx, rtx, rtx);
18851 int i;
18852
18853 for (i = 0; i < 3; i++)
18854 tmp[i] = gen_reg_rtx (mode);
18855 real_ldexp (&TWO31r, &dconst1, 31);
18856 two31r = const_double_from_real_value (TWO31r, scalarmode);
18857 two31r = ix86_build_const_vector (mode, 1, two31r);
18858 two31r = force_reg (mode, two31r);
18859 switch (mode)
18860 {
18861 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18862 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18863 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18864 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18865 default: gcc_unreachable ();
18866 }
18867 tmp[3] = gen_rtx_LE (mode, two31r, val);
18868 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18869 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18870 0, OPTAB_DIRECT);
18871 if (intmode == V4SImode || TARGET_AVX2)
18872 *xorp = expand_simple_binop (intmode, ASHIFT,
18873 gen_lowpart (intmode, tmp[0]),
18874 GEN_INT (31), NULL_RTX, 0,
18875 OPTAB_DIRECT);
18876 else
18877 {
18878 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18879 two31 = ix86_build_const_vector (intmode, 1, two31);
18880 *xorp = expand_simple_binop (intmode, AND,
18881 gen_lowpart (intmode, tmp[0]),
18882 two31, NULL_RTX, 0,
18883 OPTAB_DIRECT);
18884 }
18885 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18886 0, OPTAB_DIRECT);
18887 }
18888
18889 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18890 then replicate the value for all elements of the vector
18891 register. */
18892
18893 rtx
18894 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18895 {
18896 int i, n_elt;
18897 rtvec v;
18898 enum machine_mode scalar_mode;
18899
18900 switch (mode)
18901 {
18902 case V64QImode:
18903 case V32QImode:
18904 case V16QImode:
18905 case V32HImode:
18906 case V16HImode:
18907 case V8HImode:
18908 case V16SImode:
18909 case V8SImode:
18910 case V4SImode:
18911 case V8DImode:
18912 case V4DImode:
18913 case V2DImode:
18914 gcc_assert (vect);
18915 case V16SFmode:
18916 case V8SFmode:
18917 case V4SFmode:
18918 case V8DFmode:
18919 case V4DFmode:
18920 case V2DFmode:
18921 n_elt = GET_MODE_NUNITS (mode);
18922 v = rtvec_alloc (n_elt);
18923 scalar_mode = GET_MODE_INNER (mode);
18924
18925 RTVEC_ELT (v, 0) = value;
18926
18927 for (i = 1; i < n_elt; ++i)
18928 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18929
18930 return gen_rtx_CONST_VECTOR (mode, v);
18931
18932 default:
18933 gcc_unreachable ();
18934 }
18935 }
18936
18937 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18938 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18939 for an SSE register. If VECT is true, then replicate the mask for
18940 all elements of the vector register. If INVERT is true, then create
18941 a mask excluding the sign bit. */
18942
18943 rtx
18944 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18945 {
18946 enum machine_mode vec_mode, imode;
18947 HOST_WIDE_INT hi, lo;
18948 int shift = 63;
18949 rtx v;
18950 rtx mask;
18951
18952 /* Find the sign bit, sign extended to 2*HWI. */
18953 switch (mode)
18954 {
18955 case V16SImode:
18956 case V16SFmode:
18957 case V8SImode:
18958 case V4SImode:
18959 case V8SFmode:
18960 case V4SFmode:
18961 vec_mode = mode;
18962 mode = GET_MODE_INNER (mode);
18963 imode = SImode;
18964 lo = 0x80000000, hi = lo < 0;
18965 break;
18966
18967 case V8DImode:
18968 case V4DImode:
18969 case V2DImode:
18970 case V8DFmode:
18971 case V4DFmode:
18972 case V2DFmode:
18973 vec_mode = mode;
18974 mode = GET_MODE_INNER (mode);
18975 imode = DImode;
18976 if (HOST_BITS_PER_WIDE_INT >= 64)
18977 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18978 else
18979 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18980 break;
18981
18982 case TImode:
18983 case TFmode:
18984 vec_mode = VOIDmode;
18985 if (HOST_BITS_PER_WIDE_INT >= 64)
18986 {
18987 imode = TImode;
18988 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18989 }
18990 else
18991 {
18992 rtvec vec;
18993
18994 imode = DImode;
18995 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18996
18997 if (invert)
18998 {
18999 lo = ~lo, hi = ~hi;
19000 v = constm1_rtx;
19001 }
19002 else
19003 v = const0_rtx;
19004
19005 mask = immed_double_const (lo, hi, imode);
19006
19007 vec = gen_rtvec (2, v, mask);
19008 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19009 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19010
19011 return v;
19012 }
19013 break;
19014
19015 default:
19016 gcc_unreachable ();
19017 }
19018
19019 if (invert)
19020 lo = ~lo, hi = ~hi;
19021
19022 /* Force this value into the low part of a fp vector constant. */
19023 mask = immed_double_const (lo, hi, imode);
19024 mask = gen_lowpart (mode, mask);
19025
19026 if (vec_mode == VOIDmode)
19027 return force_reg (mode, mask);
19028
19029 v = ix86_build_const_vector (vec_mode, vect, mask);
19030 return force_reg (vec_mode, v);
19031 }
19032
19033 /* Generate code for floating point ABS or NEG. */
19034
19035 void
19036 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19037 rtx operands[])
19038 {
19039 rtx mask, set, dst, src;
19040 bool use_sse = false;
19041 bool vector_mode = VECTOR_MODE_P (mode);
19042 enum machine_mode vmode = mode;
19043
19044 if (vector_mode)
19045 use_sse = true;
19046 else if (mode == TFmode)
19047 use_sse = true;
19048 else if (TARGET_SSE_MATH)
19049 {
19050 use_sse = SSE_FLOAT_MODE_P (mode);
19051 if (mode == SFmode)
19052 vmode = V4SFmode;
19053 else if (mode == DFmode)
19054 vmode = V2DFmode;
19055 }
19056
19057 /* NEG and ABS performed with SSE use bitwise mask operations.
19058 Create the appropriate mask now. */
19059 if (use_sse)
19060 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19061 else
19062 mask = NULL_RTX;
19063
19064 dst = operands[0];
19065 src = operands[1];
19066
19067 set = gen_rtx_fmt_e (code, mode, src);
19068 set = gen_rtx_SET (VOIDmode, dst, set);
19069
19070 if (mask)
19071 {
19072 rtx use, clob;
19073 rtvec par;
19074
19075 use = gen_rtx_USE (VOIDmode, mask);
19076 if (vector_mode)
19077 par = gen_rtvec (2, set, use);
19078 else
19079 {
19080 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19081 par = gen_rtvec (3, set, use, clob);
19082 }
19083 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19084 }
19085 else
19086 emit_insn (set);
19087 }
19088
19089 /* Expand a copysign operation. Special case operand 0 being a constant. */
19090
19091 void
19092 ix86_expand_copysign (rtx operands[])
19093 {
19094 enum machine_mode mode, vmode;
19095 rtx dest, op0, op1, mask, nmask;
19096
19097 dest = operands[0];
19098 op0 = operands[1];
19099 op1 = operands[2];
19100
19101 mode = GET_MODE (dest);
19102
19103 if (mode == SFmode)
19104 vmode = V4SFmode;
19105 else if (mode == DFmode)
19106 vmode = V2DFmode;
19107 else
19108 vmode = mode;
19109
19110 if (GET_CODE (op0) == CONST_DOUBLE)
19111 {
19112 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19113
19114 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19115 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19116
19117 if (mode == SFmode || mode == DFmode)
19118 {
19119 if (op0 == CONST0_RTX (mode))
19120 op0 = CONST0_RTX (vmode);
19121 else
19122 {
19123 rtx v = ix86_build_const_vector (vmode, false, op0);
19124
19125 op0 = force_reg (vmode, v);
19126 }
19127 }
19128 else if (op0 != CONST0_RTX (mode))
19129 op0 = force_reg (mode, op0);
19130
19131 mask = ix86_build_signbit_mask (vmode, 0, 0);
19132
19133 if (mode == SFmode)
19134 copysign_insn = gen_copysignsf3_const;
19135 else if (mode == DFmode)
19136 copysign_insn = gen_copysigndf3_const;
19137 else
19138 copysign_insn = gen_copysigntf3_const;
19139
19140 emit_insn (copysign_insn (dest, op0, op1, mask));
19141 }
19142 else
19143 {
19144 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19145
19146 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19147 mask = ix86_build_signbit_mask (vmode, 0, 0);
19148
19149 if (mode == SFmode)
19150 copysign_insn = gen_copysignsf3_var;
19151 else if (mode == DFmode)
19152 copysign_insn = gen_copysigndf3_var;
19153 else
19154 copysign_insn = gen_copysigntf3_var;
19155
19156 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19157 }
19158 }
19159
19160 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19161 be a constant, and so has already been expanded into a vector constant. */
19162
19163 void
19164 ix86_split_copysign_const (rtx operands[])
19165 {
19166 enum machine_mode mode, vmode;
19167 rtx dest, op0, mask, x;
19168
19169 dest = operands[0];
19170 op0 = operands[1];
19171 mask = operands[3];
19172
19173 mode = GET_MODE (dest);
19174 vmode = GET_MODE (mask);
19175
19176 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19177 x = gen_rtx_AND (vmode, dest, mask);
19178 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19179
19180 if (op0 != CONST0_RTX (vmode))
19181 {
19182 x = gen_rtx_IOR (vmode, dest, op0);
19183 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19184 }
19185 }
19186
19187 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19188 so we have to do two masks. */
19189
19190 void
19191 ix86_split_copysign_var (rtx operands[])
19192 {
19193 enum machine_mode mode, vmode;
19194 rtx dest, scratch, op0, op1, mask, nmask, x;
19195
19196 dest = operands[0];
19197 scratch = operands[1];
19198 op0 = operands[2];
19199 op1 = operands[3];
19200 nmask = operands[4];
19201 mask = operands[5];
19202
19203 mode = GET_MODE (dest);
19204 vmode = GET_MODE (mask);
19205
19206 if (rtx_equal_p (op0, op1))
19207 {
19208 /* Shouldn't happen often (it's useless, obviously), but when it does
19209 we'd generate incorrect code if we continue below. */
19210 emit_move_insn (dest, op0);
19211 return;
19212 }
19213
19214 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19215 {
19216 gcc_assert (REGNO (op1) == REGNO (scratch));
19217
19218 x = gen_rtx_AND (vmode, scratch, mask);
19219 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19220
19221 dest = mask;
19222 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19223 x = gen_rtx_NOT (vmode, dest);
19224 x = gen_rtx_AND (vmode, x, op0);
19225 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19226 }
19227 else
19228 {
19229 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19230 {
19231 x = gen_rtx_AND (vmode, scratch, mask);
19232 }
19233 else /* alternative 2,4 */
19234 {
19235 gcc_assert (REGNO (mask) == REGNO (scratch));
19236 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19237 x = gen_rtx_AND (vmode, scratch, op1);
19238 }
19239 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19240
19241 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19242 {
19243 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19244 x = gen_rtx_AND (vmode, dest, nmask);
19245 }
19246 else /* alternative 3,4 */
19247 {
19248 gcc_assert (REGNO (nmask) == REGNO (dest));
19249 dest = nmask;
19250 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19251 x = gen_rtx_AND (vmode, dest, op0);
19252 }
19253 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19254 }
19255
19256 x = gen_rtx_IOR (vmode, dest, scratch);
19257 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19258 }
19259
19260 /* Return TRUE or FALSE depending on whether the first SET in INSN
19261 has source and destination with matching CC modes, and that the
19262 CC mode is at least as constrained as REQ_MODE. */
19263
19264 bool
19265 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19266 {
19267 rtx set;
19268 enum machine_mode set_mode;
19269
19270 set = PATTERN (insn);
19271 if (GET_CODE (set) == PARALLEL)
19272 set = XVECEXP (set, 0, 0);
19273 gcc_assert (GET_CODE (set) == SET);
19274 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19275
19276 set_mode = GET_MODE (SET_DEST (set));
19277 switch (set_mode)
19278 {
19279 case CCNOmode:
19280 if (req_mode != CCNOmode
19281 && (req_mode != CCmode
19282 || XEXP (SET_SRC (set), 1) != const0_rtx))
19283 return false;
19284 break;
19285 case CCmode:
19286 if (req_mode == CCGCmode)
19287 return false;
19288 /* FALLTHRU */
19289 case CCGCmode:
19290 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19291 return false;
19292 /* FALLTHRU */
19293 case CCGOCmode:
19294 if (req_mode == CCZmode)
19295 return false;
19296 /* FALLTHRU */
19297 case CCZmode:
19298 break;
19299
19300 case CCAmode:
19301 case CCCmode:
19302 case CCOmode:
19303 case CCSmode:
19304 if (set_mode != req_mode)
19305 return false;
19306 break;
19307
19308 default:
19309 gcc_unreachable ();
19310 }
19311
19312 return GET_MODE (SET_SRC (set)) == set_mode;
19313 }
19314
19315 /* Generate insn patterns to do an integer compare of OPERANDS. */
19316
19317 static rtx
19318 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19319 {
19320 enum machine_mode cmpmode;
19321 rtx tmp, flags;
19322
19323 cmpmode = SELECT_CC_MODE (code, op0, op1);
19324 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19325
19326 /* This is very simple, but making the interface the same as in the
19327 FP case makes the rest of the code easier. */
19328 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19329 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19330
19331 /* Return the test that should be put into the flags user, i.e.
19332 the bcc, scc, or cmov instruction. */
19333 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19334 }
19335
19336 /* Figure out whether to use ordered or unordered fp comparisons.
19337 Return the appropriate mode to use. */
19338
19339 enum machine_mode
19340 ix86_fp_compare_mode (enum rtx_code)
19341 {
19342 /* ??? In order to make all comparisons reversible, we do all comparisons
19343 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19344 all forms trapping and nontrapping comparisons, we can make inequality
19345 comparisons trapping again, since it results in better code when using
19346 FCOM based compares. */
19347 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19348 }
19349
19350 enum machine_mode
19351 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19352 {
19353 enum machine_mode mode = GET_MODE (op0);
19354
19355 if (SCALAR_FLOAT_MODE_P (mode))
19356 {
19357 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19358 return ix86_fp_compare_mode (code);
19359 }
19360
19361 switch (code)
19362 {
19363 /* Only zero flag is needed. */
19364 case EQ: /* ZF=0 */
19365 case NE: /* ZF!=0 */
19366 return CCZmode;
19367 /* Codes needing carry flag. */
19368 case GEU: /* CF=0 */
19369 case LTU: /* CF=1 */
19370 /* Detect overflow checks. They need just the carry flag. */
19371 if (GET_CODE (op0) == PLUS
19372 && rtx_equal_p (op1, XEXP (op0, 0)))
19373 return CCCmode;
19374 else
19375 return CCmode;
19376 case GTU: /* CF=0 & ZF=0 */
19377 case LEU: /* CF=1 | ZF=1 */
19378 return CCmode;
19379 /* Codes possibly doable only with sign flag when
19380 comparing against zero. */
19381 case GE: /* SF=OF or SF=0 */
19382 case LT: /* SF<>OF or SF=1 */
19383 if (op1 == const0_rtx)
19384 return CCGOCmode;
19385 else
19386 /* For other cases Carry flag is not required. */
19387 return CCGCmode;
19388 /* Codes doable only with sign flag when comparing
19389 against zero, but we miss jump instruction for it
19390 so we need to use relational tests against overflow
19391 that thus needs to be zero. */
19392 case GT: /* ZF=0 & SF=OF */
19393 case LE: /* ZF=1 | SF<>OF */
19394 if (op1 == const0_rtx)
19395 return CCNOmode;
19396 else
19397 return CCGCmode;
19398 /* strcmp pattern do (use flags) and combine may ask us for proper
19399 mode. */
19400 case USE:
19401 return CCmode;
19402 default:
19403 gcc_unreachable ();
19404 }
19405 }
19406
19407 /* Return the fixed registers used for condition codes. */
19408
19409 static bool
19410 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19411 {
19412 *p1 = FLAGS_REG;
19413 *p2 = FPSR_REG;
19414 return true;
19415 }
19416
19417 /* If two condition code modes are compatible, return a condition code
19418 mode which is compatible with both. Otherwise, return
19419 VOIDmode. */
19420
19421 static enum machine_mode
19422 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19423 {
19424 if (m1 == m2)
19425 return m1;
19426
19427 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19428 return VOIDmode;
19429
19430 if ((m1 == CCGCmode && m2 == CCGOCmode)
19431 || (m1 == CCGOCmode && m2 == CCGCmode))
19432 return CCGCmode;
19433
19434 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19435 return m2;
19436 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19437 return m1;
19438
19439 switch (m1)
19440 {
19441 default:
19442 gcc_unreachable ();
19443
19444 case CCmode:
19445 case CCGCmode:
19446 case CCGOCmode:
19447 case CCNOmode:
19448 case CCAmode:
19449 case CCCmode:
19450 case CCOmode:
19451 case CCSmode:
19452 case CCZmode:
19453 switch (m2)
19454 {
19455 default:
19456 return VOIDmode;
19457
19458 case CCmode:
19459 case CCGCmode:
19460 case CCGOCmode:
19461 case CCNOmode:
19462 case CCAmode:
19463 case CCCmode:
19464 case CCOmode:
19465 case CCSmode:
19466 case CCZmode:
19467 return CCmode;
19468 }
19469
19470 case CCFPmode:
19471 case CCFPUmode:
19472 /* These are only compatible with themselves, which we already
19473 checked above. */
19474 return VOIDmode;
19475 }
19476 }
19477
19478
19479 /* Return a comparison we can do and that it is equivalent to
19480 swap_condition (code) apart possibly from orderedness.
19481 But, never change orderedness if TARGET_IEEE_FP, returning
19482 UNKNOWN in that case if necessary. */
19483
19484 static enum rtx_code
19485 ix86_fp_swap_condition (enum rtx_code code)
19486 {
19487 switch (code)
19488 {
19489 case GT: /* GTU - CF=0 & ZF=0 */
19490 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19491 case GE: /* GEU - CF=0 */
19492 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19493 case UNLT: /* LTU - CF=1 */
19494 return TARGET_IEEE_FP ? UNKNOWN : GT;
19495 case UNLE: /* LEU - CF=1 | ZF=1 */
19496 return TARGET_IEEE_FP ? UNKNOWN : GE;
19497 default:
19498 return swap_condition (code);
19499 }
19500 }
19501
19502 /* Return cost of comparison CODE using the best strategy for performance.
19503 All following functions do use number of instructions as a cost metrics.
19504 In future this should be tweaked to compute bytes for optimize_size and
19505 take into account performance of various instructions on various CPUs. */
19506
19507 static int
19508 ix86_fp_comparison_cost (enum rtx_code code)
19509 {
19510 int arith_cost;
19511
19512 /* The cost of code using bit-twiddling on %ah. */
19513 switch (code)
19514 {
19515 case UNLE:
19516 case UNLT:
19517 case LTGT:
19518 case GT:
19519 case GE:
19520 case UNORDERED:
19521 case ORDERED:
19522 case UNEQ:
19523 arith_cost = 4;
19524 break;
19525 case LT:
19526 case NE:
19527 case EQ:
19528 case UNGE:
19529 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19530 break;
19531 case LE:
19532 case UNGT:
19533 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19534 break;
19535 default:
19536 gcc_unreachable ();
19537 }
19538
19539 switch (ix86_fp_comparison_strategy (code))
19540 {
19541 case IX86_FPCMP_COMI:
19542 return arith_cost > 4 ? 3 : 2;
19543 case IX86_FPCMP_SAHF:
19544 return arith_cost > 4 ? 4 : 3;
19545 default:
19546 return arith_cost;
19547 }
19548 }
19549
19550 /* Return strategy to use for floating-point. We assume that fcomi is always
19551 preferrable where available, since that is also true when looking at size
19552 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19553
19554 enum ix86_fpcmp_strategy
19555 ix86_fp_comparison_strategy (enum rtx_code)
19556 {
19557 /* Do fcomi/sahf based test when profitable. */
19558
19559 if (TARGET_CMOVE)
19560 return IX86_FPCMP_COMI;
19561
19562 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19563 return IX86_FPCMP_SAHF;
19564
19565 return IX86_FPCMP_ARITH;
19566 }
19567
19568 /* Swap, force into registers, or otherwise massage the two operands
19569 to a fp comparison. The operands are updated in place; the new
19570 comparison code is returned. */
19571
19572 static enum rtx_code
19573 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19574 {
19575 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19576 rtx op0 = *pop0, op1 = *pop1;
19577 enum machine_mode op_mode = GET_MODE (op0);
19578 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19579
19580 /* All of the unordered compare instructions only work on registers.
19581 The same is true of the fcomi compare instructions. The XFmode
19582 compare instructions require registers except when comparing
19583 against zero or when converting operand 1 from fixed point to
19584 floating point. */
19585
19586 if (!is_sse
19587 && (fpcmp_mode == CCFPUmode
19588 || (op_mode == XFmode
19589 && ! (standard_80387_constant_p (op0) == 1
19590 || standard_80387_constant_p (op1) == 1)
19591 && GET_CODE (op1) != FLOAT)
19592 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19593 {
19594 op0 = force_reg (op_mode, op0);
19595 op1 = force_reg (op_mode, op1);
19596 }
19597 else
19598 {
19599 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19600 things around if they appear profitable, otherwise force op0
19601 into a register. */
19602
19603 if (standard_80387_constant_p (op0) == 0
19604 || (MEM_P (op0)
19605 && ! (standard_80387_constant_p (op1) == 0
19606 || MEM_P (op1))))
19607 {
19608 enum rtx_code new_code = ix86_fp_swap_condition (code);
19609 if (new_code != UNKNOWN)
19610 {
19611 rtx tmp;
19612 tmp = op0, op0 = op1, op1 = tmp;
19613 code = new_code;
19614 }
19615 }
19616
19617 if (!REG_P (op0))
19618 op0 = force_reg (op_mode, op0);
19619
19620 if (CONSTANT_P (op1))
19621 {
19622 int tmp = standard_80387_constant_p (op1);
19623 if (tmp == 0)
19624 op1 = validize_mem (force_const_mem (op_mode, op1));
19625 else if (tmp == 1)
19626 {
19627 if (TARGET_CMOVE)
19628 op1 = force_reg (op_mode, op1);
19629 }
19630 else
19631 op1 = force_reg (op_mode, op1);
19632 }
19633 }
19634
19635 /* Try to rearrange the comparison to make it cheaper. */
19636 if (ix86_fp_comparison_cost (code)
19637 > ix86_fp_comparison_cost (swap_condition (code))
19638 && (REG_P (op1) || can_create_pseudo_p ()))
19639 {
19640 rtx tmp;
19641 tmp = op0, op0 = op1, op1 = tmp;
19642 code = swap_condition (code);
19643 if (!REG_P (op0))
19644 op0 = force_reg (op_mode, op0);
19645 }
19646
19647 *pop0 = op0;
19648 *pop1 = op1;
19649 return code;
19650 }
19651
19652 /* Convert comparison codes we use to represent FP comparison to integer
19653 code that will result in proper branch. Return UNKNOWN if no such code
19654 is available. */
19655
19656 enum rtx_code
19657 ix86_fp_compare_code_to_integer (enum rtx_code code)
19658 {
19659 switch (code)
19660 {
19661 case GT:
19662 return GTU;
19663 case GE:
19664 return GEU;
19665 case ORDERED:
19666 case UNORDERED:
19667 return code;
19668 break;
19669 case UNEQ:
19670 return EQ;
19671 break;
19672 case UNLT:
19673 return LTU;
19674 break;
19675 case UNLE:
19676 return LEU;
19677 break;
19678 case LTGT:
19679 return NE;
19680 break;
19681 default:
19682 return UNKNOWN;
19683 }
19684 }
19685
19686 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19687
19688 static rtx
19689 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19690 {
19691 enum machine_mode fpcmp_mode, intcmp_mode;
19692 rtx tmp, tmp2;
19693
19694 fpcmp_mode = ix86_fp_compare_mode (code);
19695 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19696
19697 /* Do fcomi/sahf based test when profitable. */
19698 switch (ix86_fp_comparison_strategy (code))
19699 {
19700 case IX86_FPCMP_COMI:
19701 intcmp_mode = fpcmp_mode;
19702 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19703 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19704 tmp);
19705 emit_insn (tmp);
19706 break;
19707
19708 case IX86_FPCMP_SAHF:
19709 intcmp_mode = fpcmp_mode;
19710 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19711 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19712 tmp);
19713
19714 if (!scratch)
19715 scratch = gen_reg_rtx (HImode);
19716 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19717 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19718 break;
19719
19720 case IX86_FPCMP_ARITH:
19721 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19722 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19723 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19724 if (!scratch)
19725 scratch = gen_reg_rtx (HImode);
19726 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19727
19728 /* In the unordered case, we have to check C2 for NaN's, which
19729 doesn't happen to work out to anything nice combination-wise.
19730 So do some bit twiddling on the value we've got in AH to come
19731 up with an appropriate set of condition codes. */
19732
19733 intcmp_mode = CCNOmode;
19734 switch (code)
19735 {
19736 case GT:
19737 case UNGT:
19738 if (code == GT || !TARGET_IEEE_FP)
19739 {
19740 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19741 code = EQ;
19742 }
19743 else
19744 {
19745 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19746 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19747 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19748 intcmp_mode = CCmode;
19749 code = GEU;
19750 }
19751 break;
19752 case LT:
19753 case UNLT:
19754 if (code == LT && TARGET_IEEE_FP)
19755 {
19756 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19757 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19758 intcmp_mode = CCmode;
19759 code = EQ;
19760 }
19761 else
19762 {
19763 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19764 code = NE;
19765 }
19766 break;
19767 case GE:
19768 case UNGE:
19769 if (code == GE || !TARGET_IEEE_FP)
19770 {
19771 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19772 code = EQ;
19773 }
19774 else
19775 {
19776 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19777 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19778 code = NE;
19779 }
19780 break;
19781 case LE:
19782 case UNLE:
19783 if (code == LE && TARGET_IEEE_FP)
19784 {
19785 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19786 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19787 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19788 intcmp_mode = CCmode;
19789 code = LTU;
19790 }
19791 else
19792 {
19793 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19794 code = NE;
19795 }
19796 break;
19797 case EQ:
19798 case UNEQ:
19799 if (code == EQ && TARGET_IEEE_FP)
19800 {
19801 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19802 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19803 intcmp_mode = CCmode;
19804 code = EQ;
19805 }
19806 else
19807 {
19808 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19809 code = NE;
19810 }
19811 break;
19812 case NE:
19813 case LTGT:
19814 if (code == NE && TARGET_IEEE_FP)
19815 {
19816 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19817 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19818 GEN_INT (0x40)));
19819 code = NE;
19820 }
19821 else
19822 {
19823 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19824 code = EQ;
19825 }
19826 break;
19827
19828 case UNORDERED:
19829 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19830 code = NE;
19831 break;
19832 case ORDERED:
19833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19834 code = EQ;
19835 break;
19836
19837 default:
19838 gcc_unreachable ();
19839 }
19840 break;
19841
19842 default:
19843 gcc_unreachable();
19844 }
19845
19846 /* Return the test that should be put into the flags user, i.e.
19847 the bcc, scc, or cmov instruction. */
19848 return gen_rtx_fmt_ee (code, VOIDmode,
19849 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19850 const0_rtx);
19851 }
19852
19853 static rtx
19854 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19855 {
19856 rtx ret;
19857
19858 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19859 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19860
19861 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19862 {
19863 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19864 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19865 }
19866 else
19867 ret = ix86_expand_int_compare (code, op0, op1);
19868
19869 return ret;
19870 }
19871
19872 void
19873 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19874 {
19875 enum machine_mode mode = GET_MODE (op0);
19876 rtx tmp;
19877
19878 switch (mode)
19879 {
19880 case SFmode:
19881 case DFmode:
19882 case XFmode:
19883 case QImode:
19884 case HImode:
19885 case SImode:
19886 simple:
19887 tmp = ix86_expand_compare (code, op0, op1);
19888 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19889 gen_rtx_LABEL_REF (VOIDmode, label),
19890 pc_rtx);
19891 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19892 return;
19893
19894 case DImode:
19895 if (TARGET_64BIT)
19896 goto simple;
19897 case TImode:
19898 /* Expand DImode branch into multiple compare+branch. */
19899 {
19900 rtx lo[2], hi[2];
19901 rtx_code_label *label2;
19902 enum rtx_code code1, code2, code3;
19903 enum machine_mode submode;
19904
19905 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19906 {
19907 tmp = op0, op0 = op1, op1 = tmp;
19908 code = swap_condition (code);
19909 }
19910
19911 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19912 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19913
19914 submode = mode == DImode ? SImode : DImode;
19915
19916 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19917 avoid two branches. This costs one extra insn, so disable when
19918 optimizing for size. */
19919
19920 if ((code == EQ || code == NE)
19921 && (!optimize_insn_for_size_p ()
19922 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19923 {
19924 rtx xor0, xor1;
19925
19926 xor1 = hi[0];
19927 if (hi[1] != const0_rtx)
19928 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19929 NULL_RTX, 0, OPTAB_WIDEN);
19930
19931 xor0 = lo[0];
19932 if (lo[1] != const0_rtx)
19933 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19934 NULL_RTX, 0, OPTAB_WIDEN);
19935
19936 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19937 NULL_RTX, 0, OPTAB_WIDEN);
19938
19939 ix86_expand_branch (code, tmp, const0_rtx, label);
19940 return;
19941 }
19942
19943 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19944 op1 is a constant and the low word is zero, then we can just
19945 examine the high word. Similarly for low word -1 and
19946 less-or-equal-than or greater-than. */
19947
19948 if (CONST_INT_P (hi[1]))
19949 switch (code)
19950 {
19951 case LT: case LTU: case GE: case GEU:
19952 if (lo[1] == const0_rtx)
19953 {
19954 ix86_expand_branch (code, hi[0], hi[1], label);
19955 return;
19956 }
19957 break;
19958 case LE: case LEU: case GT: case GTU:
19959 if (lo[1] == constm1_rtx)
19960 {
19961 ix86_expand_branch (code, hi[0], hi[1], label);
19962 return;
19963 }
19964 break;
19965 default:
19966 break;
19967 }
19968
19969 /* Otherwise, we need two or three jumps. */
19970
19971 label2 = gen_label_rtx ();
19972
19973 code1 = code;
19974 code2 = swap_condition (code);
19975 code3 = unsigned_condition (code);
19976
19977 switch (code)
19978 {
19979 case LT: case GT: case LTU: case GTU:
19980 break;
19981
19982 case LE: code1 = LT; code2 = GT; break;
19983 case GE: code1 = GT; code2 = LT; break;
19984 case LEU: code1 = LTU; code2 = GTU; break;
19985 case GEU: code1 = GTU; code2 = LTU; break;
19986
19987 case EQ: code1 = UNKNOWN; code2 = NE; break;
19988 case NE: code2 = UNKNOWN; break;
19989
19990 default:
19991 gcc_unreachable ();
19992 }
19993
19994 /*
19995 * a < b =>
19996 * if (hi(a) < hi(b)) goto true;
19997 * if (hi(a) > hi(b)) goto false;
19998 * if (lo(a) < lo(b)) goto true;
19999 * false:
20000 */
20001
20002 if (code1 != UNKNOWN)
20003 ix86_expand_branch (code1, hi[0], hi[1], label);
20004 if (code2 != UNKNOWN)
20005 ix86_expand_branch (code2, hi[0], hi[1], label2);
20006
20007 ix86_expand_branch (code3, lo[0], lo[1], label);
20008
20009 if (code2 != UNKNOWN)
20010 emit_label (label2);
20011 return;
20012 }
20013
20014 default:
20015 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20016 goto simple;
20017 }
20018 }
20019
20020 /* Split branch based on floating point condition. */
20021 void
20022 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20023 rtx target1, rtx target2, rtx tmp)
20024 {
20025 rtx condition;
20026 rtx i;
20027
20028 if (target2 != pc_rtx)
20029 {
20030 rtx tmp = target2;
20031 code = reverse_condition_maybe_unordered (code);
20032 target2 = target1;
20033 target1 = tmp;
20034 }
20035
20036 condition = ix86_expand_fp_compare (code, op1, op2,
20037 tmp);
20038
20039 i = emit_jump_insn (gen_rtx_SET
20040 (VOIDmode, pc_rtx,
20041 gen_rtx_IF_THEN_ELSE (VOIDmode,
20042 condition, target1, target2)));
20043 if (split_branch_probability >= 0)
20044 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20045 }
20046
20047 void
20048 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20049 {
20050 rtx ret;
20051
20052 gcc_assert (GET_MODE (dest) == QImode);
20053
20054 ret = ix86_expand_compare (code, op0, op1);
20055 PUT_MODE (ret, QImode);
20056 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20057 }
20058
20059 /* Expand comparison setting or clearing carry flag. Return true when
20060 successful and set pop for the operation. */
20061 static bool
20062 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20063 {
20064 enum machine_mode mode =
20065 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20066
20067 /* Do not handle double-mode compares that go through special path. */
20068 if (mode == (TARGET_64BIT ? TImode : DImode))
20069 return false;
20070
20071 if (SCALAR_FLOAT_MODE_P (mode))
20072 {
20073 rtx compare_op;
20074 rtx_insn *compare_seq;
20075
20076 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20077
20078 /* Shortcut: following common codes never translate
20079 into carry flag compares. */
20080 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20081 || code == ORDERED || code == UNORDERED)
20082 return false;
20083
20084 /* These comparisons require zero flag; swap operands so they won't. */
20085 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20086 && !TARGET_IEEE_FP)
20087 {
20088 rtx tmp = op0;
20089 op0 = op1;
20090 op1 = tmp;
20091 code = swap_condition (code);
20092 }
20093
20094 /* Try to expand the comparison and verify that we end up with
20095 carry flag based comparison. This fails to be true only when
20096 we decide to expand comparison using arithmetic that is not
20097 too common scenario. */
20098 start_sequence ();
20099 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20100 compare_seq = get_insns ();
20101 end_sequence ();
20102
20103 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20104 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20105 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20106 else
20107 code = GET_CODE (compare_op);
20108
20109 if (code != LTU && code != GEU)
20110 return false;
20111
20112 emit_insn (compare_seq);
20113 *pop = compare_op;
20114 return true;
20115 }
20116
20117 if (!INTEGRAL_MODE_P (mode))
20118 return false;
20119
20120 switch (code)
20121 {
20122 case LTU:
20123 case GEU:
20124 break;
20125
20126 /* Convert a==0 into (unsigned)a<1. */
20127 case EQ:
20128 case NE:
20129 if (op1 != const0_rtx)
20130 return false;
20131 op1 = const1_rtx;
20132 code = (code == EQ ? LTU : GEU);
20133 break;
20134
20135 /* Convert a>b into b<a or a>=b-1. */
20136 case GTU:
20137 case LEU:
20138 if (CONST_INT_P (op1))
20139 {
20140 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20141 /* Bail out on overflow. We still can swap operands but that
20142 would force loading of the constant into register. */
20143 if (op1 == const0_rtx
20144 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20145 return false;
20146 code = (code == GTU ? GEU : LTU);
20147 }
20148 else
20149 {
20150 rtx tmp = op1;
20151 op1 = op0;
20152 op0 = tmp;
20153 code = (code == GTU ? LTU : GEU);
20154 }
20155 break;
20156
20157 /* Convert a>=0 into (unsigned)a<0x80000000. */
20158 case LT:
20159 case GE:
20160 if (mode == DImode || op1 != const0_rtx)
20161 return false;
20162 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20163 code = (code == LT ? GEU : LTU);
20164 break;
20165 case LE:
20166 case GT:
20167 if (mode == DImode || op1 != constm1_rtx)
20168 return false;
20169 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20170 code = (code == LE ? GEU : LTU);
20171 break;
20172
20173 default:
20174 return false;
20175 }
20176 /* Swapping operands may cause constant to appear as first operand. */
20177 if (!nonimmediate_operand (op0, VOIDmode))
20178 {
20179 if (!can_create_pseudo_p ())
20180 return false;
20181 op0 = force_reg (mode, op0);
20182 }
20183 *pop = ix86_expand_compare (code, op0, op1);
20184 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20185 return true;
20186 }
20187
20188 bool
20189 ix86_expand_int_movcc (rtx operands[])
20190 {
20191 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20192 rtx_insn *compare_seq;
20193 rtx compare_op;
20194 enum machine_mode mode = GET_MODE (operands[0]);
20195 bool sign_bit_compare_p = false;
20196 rtx op0 = XEXP (operands[1], 0);
20197 rtx op1 = XEXP (operands[1], 1);
20198
20199 if (GET_MODE (op0) == TImode
20200 || (GET_MODE (op0) == DImode
20201 && !TARGET_64BIT))
20202 return false;
20203
20204 start_sequence ();
20205 compare_op = ix86_expand_compare (code, op0, op1);
20206 compare_seq = get_insns ();
20207 end_sequence ();
20208
20209 compare_code = GET_CODE (compare_op);
20210
20211 if ((op1 == const0_rtx && (code == GE || code == LT))
20212 || (op1 == constm1_rtx && (code == GT || code == LE)))
20213 sign_bit_compare_p = true;
20214
20215 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20216 HImode insns, we'd be swallowed in word prefix ops. */
20217
20218 if ((mode != HImode || TARGET_FAST_PREFIX)
20219 && (mode != (TARGET_64BIT ? TImode : DImode))
20220 && CONST_INT_P (operands[2])
20221 && CONST_INT_P (operands[3]))
20222 {
20223 rtx out = operands[0];
20224 HOST_WIDE_INT ct = INTVAL (operands[2]);
20225 HOST_WIDE_INT cf = INTVAL (operands[3]);
20226 HOST_WIDE_INT diff;
20227
20228 diff = ct - cf;
20229 /* Sign bit compares are better done using shifts than we do by using
20230 sbb. */
20231 if (sign_bit_compare_p
20232 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20233 {
20234 /* Detect overlap between destination and compare sources. */
20235 rtx tmp = out;
20236
20237 if (!sign_bit_compare_p)
20238 {
20239 rtx flags;
20240 bool fpcmp = false;
20241
20242 compare_code = GET_CODE (compare_op);
20243
20244 flags = XEXP (compare_op, 0);
20245
20246 if (GET_MODE (flags) == CCFPmode
20247 || GET_MODE (flags) == CCFPUmode)
20248 {
20249 fpcmp = true;
20250 compare_code
20251 = ix86_fp_compare_code_to_integer (compare_code);
20252 }
20253
20254 /* To simplify rest of code, restrict to the GEU case. */
20255 if (compare_code == LTU)
20256 {
20257 HOST_WIDE_INT tmp = ct;
20258 ct = cf;
20259 cf = tmp;
20260 compare_code = reverse_condition (compare_code);
20261 code = reverse_condition (code);
20262 }
20263 else
20264 {
20265 if (fpcmp)
20266 PUT_CODE (compare_op,
20267 reverse_condition_maybe_unordered
20268 (GET_CODE (compare_op)));
20269 else
20270 PUT_CODE (compare_op,
20271 reverse_condition (GET_CODE (compare_op)));
20272 }
20273 diff = ct - cf;
20274
20275 if (reg_overlap_mentioned_p (out, op0)
20276 || reg_overlap_mentioned_p (out, op1))
20277 tmp = gen_reg_rtx (mode);
20278
20279 if (mode == DImode)
20280 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20281 else
20282 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20283 flags, compare_op));
20284 }
20285 else
20286 {
20287 if (code == GT || code == GE)
20288 code = reverse_condition (code);
20289 else
20290 {
20291 HOST_WIDE_INT tmp = ct;
20292 ct = cf;
20293 cf = tmp;
20294 diff = ct - cf;
20295 }
20296 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20297 }
20298
20299 if (diff == 1)
20300 {
20301 /*
20302 * cmpl op0,op1
20303 * sbbl dest,dest
20304 * [addl dest, ct]
20305 *
20306 * Size 5 - 8.
20307 */
20308 if (ct)
20309 tmp = expand_simple_binop (mode, PLUS,
20310 tmp, GEN_INT (ct),
20311 copy_rtx (tmp), 1, OPTAB_DIRECT);
20312 }
20313 else if (cf == -1)
20314 {
20315 /*
20316 * cmpl op0,op1
20317 * sbbl dest,dest
20318 * orl $ct, dest
20319 *
20320 * Size 8.
20321 */
20322 tmp = expand_simple_binop (mode, IOR,
20323 tmp, GEN_INT (ct),
20324 copy_rtx (tmp), 1, OPTAB_DIRECT);
20325 }
20326 else if (diff == -1 && ct)
20327 {
20328 /*
20329 * cmpl op0,op1
20330 * sbbl dest,dest
20331 * notl dest
20332 * [addl dest, cf]
20333 *
20334 * Size 8 - 11.
20335 */
20336 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20337 if (cf)
20338 tmp = expand_simple_binop (mode, PLUS,
20339 copy_rtx (tmp), GEN_INT (cf),
20340 copy_rtx (tmp), 1, OPTAB_DIRECT);
20341 }
20342 else
20343 {
20344 /*
20345 * cmpl op0,op1
20346 * sbbl dest,dest
20347 * [notl dest]
20348 * andl cf - ct, dest
20349 * [addl dest, ct]
20350 *
20351 * Size 8 - 11.
20352 */
20353
20354 if (cf == 0)
20355 {
20356 cf = ct;
20357 ct = 0;
20358 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20359 }
20360
20361 tmp = expand_simple_binop (mode, AND,
20362 copy_rtx (tmp),
20363 gen_int_mode (cf - ct, mode),
20364 copy_rtx (tmp), 1, OPTAB_DIRECT);
20365 if (ct)
20366 tmp = expand_simple_binop (mode, PLUS,
20367 copy_rtx (tmp), GEN_INT (ct),
20368 copy_rtx (tmp), 1, OPTAB_DIRECT);
20369 }
20370
20371 if (!rtx_equal_p (tmp, out))
20372 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20373
20374 return true;
20375 }
20376
20377 if (diff < 0)
20378 {
20379 enum machine_mode cmp_mode = GET_MODE (op0);
20380
20381 HOST_WIDE_INT tmp;
20382 tmp = ct, ct = cf, cf = tmp;
20383 diff = -diff;
20384
20385 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20386 {
20387 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20388
20389 /* We may be reversing unordered compare to normal compare, that
20390 is not valid in general (we may convert non-trapping condition
20391 to trapping one), however on i386 we currently emit all
20392 comparisons unordered. */
20393 compare_code = reverse_condition_maybe_unordered (compare_code);
20394 code = reverse_condition_maybe_unordered (code);
20395 }
20396 else
20397 {
20398 compare_code = reverse_condition (compare_code);
20399 code = reverse_condition (code);
20400 }
20401 }
20402
20403 compare_code = UNKNOWN;
20404 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20405 && CONST_INT_P (op1))
20406 {
20407 if (op1 == const0_rtx
20408 && (code == LT || code == GE))
20409 compare_code = code;
20410 else if (op1 == constm1_rtx)
20411 {
20412 if (code == LE)
20413 compare_code = LT;
20414 else if (code == GT)
20415 compare_code = GE;
20416 }
20417 }
20418
20419 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20420 if (compare_code != UNKNOWN
20421 && GET_MODE (op0) == GET_MODE (out)
20422 && (cf == -1 || ct == -1))
20423 {
20424 /* If lea code below could be used, only optimize
20425 if it results in a 2 insn sequence. */
20426
20427 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20428 || diff == 3 || diff == 5 || diff == 9)
20429 || (compare_code == LT && ct == -1)
20430 || (compare_code == GE && cf == -1))
20431 {
20432 /*
20433 * notl op1 (if necessary)
20434 * sarl $31, op1
20435 * orl cf, op1
20436 */
20437 if (ct != -1)
20438 {
20439 cf = ct;
20440 ct = -1;
20441 code = reverse_condition (code);
20442 }
20443
20444 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20445
20446 out = expand_simple_binop (mode, IOR,
20447 out, GEN_INT (cf),
20448 out, 1, OPTAB_DIRECT);
20449 if (out != operands[0])
20450 emit_move_insn (operands[0], out);
20451
20452 return true;
20453 }
20454 }
20455
20456
20457 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20458 || diff == 3 || diff == 5 || diff == 9)
20459 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20460 && (mode != DImode
20461 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20462 {
20463 /*
20464 * xorl dest,dest
20465 * cmpl op1,op2
20466 * setcc dest
20467 * lea cf(dest*(ct-cf)),dest
20468 *
20469 * Size 14.
20470 *
20471 * This also catches the degenerate setcc-only case.
20472 */
20473
20474 rtx tmp;
20475 int nops;
20476
20477 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20478
20479 nops = 0;
20480 /* On x86_64 the lea instruction operates on Pmode, so we need
20481 to get arithmetics done in proper mode to match. */
20482 if (diff == 1)
20483 tmp = copy_rtx (out);
20484 else
20485 {
20486 rtx out1;
20487 out1 = copy_rtx (out);
20488 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20489 nops++;
20490 if (diff & 1)
20491 {
20492 tmp = gen_rtx_PLUS (mode, tmp, out1);
20493 nops++;
20494 }
20495 }
20496 if (cf != 0)
20497 {
20498 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20499 nops++;
20500 }
20501 if (!rtx_equal_p (tmp, out))
20502 {
20503 if (nops == 1)
20504 out = force_operand (tmp, copy_rtx (out));
20505 else
20506 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20507 }
20508 if (!rtx_equal_p (out, operands[0]))
20509 emit_move_insn (operands[0], copy_rtx (out));
20510
20511 return true;
20512 }
20513
20514 /*
20515 * General case: Jumpful:
20516 * xorl dest,dest cmpl op1, op2
20517 * cmpl op1, op2 movl ct, dest
20518 * setcc dest jcc 1f
20519 * decl dest movl cf, dest
20520 * andl (cf-ct),dest 1:
20521 * addl ct,dest
20522 *
20523 * Size 20. Size 14.
20524 *
20525 * This is reasonably steep, but branch mispredict costs are
20526 * high on modern cpus, so consider failing only if optimizing
20527 * for space.
20528 */
20529
20530 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20531 && BRANCH_COST (optimize_insn_for_speed_p (),
20532 false) >= 2)
20533 {
20534 if (cf == 0)
20535 {
20536 enum machine_mode cmp_mode = GET_MODE (op0);
20537
20538 cf = ct;
20539 ct = 0;
20540
20541 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20542 {
20543 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20544
20545 /* We may be reversing unordered compare to normal compare,
20546 that is not valid in general (we may convert non-trapping
20547 condition to trapping one), however on i386 we currently
20548 emit all comparisons unordered. */
20549 code = reverse_condition_maybe_unordered (code);
20550 }
20551 else
20552 {
20553 code = reverse_condition (code);
20554 if (compare_code != UNKNOWN)
20555 compare_code = reverse_condition (compare_code);
20556 }
20557 }
20558
20559 if (compare_code != UNKNOWN)
20560 {
20561 /* notl op1 (if needed)
20562 sarl $31, op1
20563 andl (cf-ct), op1
20564 addl ct, op1
20565
20566 For x < 0 (resp. x <= -1) there will be no notl,
20567 so if possible swap the constants to get rid of the
20568 complement.
20569 True/false will be -1/0 while code below (store flag
20570 followed by decrement) is 0/-1, so the constants need
20571 to be exchanged once more. */
20572
20573 if (compare_code == GE || !cf)
20574 {
20575 code = reverse_condition (code);
20576 compare_code = LT;
20577 }
20578 else
20579 {
20580 HOST_WIDE_INT tmp = cf;
20581 cf = ct;
20582 ct = tmp;
20583 }
20584
20585 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20586 }
20587 else
20588 {
20589 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20590
20591 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20592 constm1_rtx,
20593 copy_rtx (out), 1, OPTAB_DIRECT);
20594 }
20595
20596 out = expand_simple_binop (mode, AND, copy_rtx (out),
20597 gen_int_mode (cf - ct, mode),
20598 copy_rtx (out), 1, OPTAB_DIRECT);
20599 if (ct)
20600 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20601 copy_rtx (out), 1, OPTAB_DIRECT);
20602 if (!rtx_equal_p (out, operands[0]))
20603 emit_move_insn (operands[0], copy_rtx (out));
20604
20605 return true;
20606 }
20607 }
20608
20609 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20610 {
20611 /* Try a few things more with specific constants and a variable. */
20612
20613 optab op;
20614 rtx var, orig_out, out, tmp;
20615
20616 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20617 return false;
20618
20619 /* If one of the two operands is an interesting constant, load a
20620 constant with the above and mask it in with a logical operation. */
20621
20622 if (CONST_INT_P (operands[2]))
20623 {
20624 var = operands[3];
20625 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20626 operands[3] = constm1_rtx, op = and_optab;
20627 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20628 operands[3] = const0_rtx, op = ior_optab;
20629 else
20630 return false;
20631 }
20632 else if (CONST_INT_P (operands[3]))
20633 {
20634 var = operands[2];
20635 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20636 operands[2] = constm1_rtx, op = and_optab;
20637 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20638 operands[2] = const0_rtx, op = ior_optab;
20639 else
20640 return false;
20641 }
20642 else
20643 return false;
20644
20645 orig_out = operands[0];
20646 tmp = gen_reg_rtx (mode);
20647 operands[0] = tmp;
20648
20649 /* Recurse to get the constant loaded. */
20650 if (ix86_expand_int_movcc (operands) == 0)
20651 return false;
20652
20653 /* Mask in the interesting variable. */
20654 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20655 OPTAB_WIDEN);
20656 if (!rtx_equal_p (out, orig_out))
20657 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20658
20659 return true;
20660 }
20661
20662 /*
20663 * For comparison with above,
20664 *
20665 * movl cf,dest
20666 * movl ct,tmp
20667 * cmpl op1,op2
20668 * cmovcc tmp,dest
20669 *
20670 * Size 15.
20671 */
20672
20673 if (! nonimmediate_operand (operands[2], mode))
20674 operands[2] = force_reg (mode, operands[2]);
20675 if (! nonimmediate_operand (operands[3], mode))
20676 operands[3] = force_reg (mode, operands[3]);
20677
20678 if (! register_operand (operands[2], VOIDmode)
20679 && (mode == QImode
20680 || ! register_operand (operands[3], VOIDmode)))
20681 operands[2] = force_reg (mode, operands[2]);
20682
20683 if (mode == QImode
20684 && ! register_operand (operands[3], VOIDmode))
20685 operands[3] = force_reg (mode, operands[3]);
20686
20687 emit_insn (compare_seq);
20688 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20689 gen_rtx_IF_THEN_ELSE (mode,
20690 compare_op, operands[2],
20691 operands[3])));
20692 return true;
20693 }
20694
20695 /* Swap, force into registers, or otherwise massage the two operands
20696 to an sse comparison with a mask result. Thus we differ a bit from
20697 ix86_prepare_fp_compare_args which expects to produce a flags result.
20698
20699 The DEST operand exists to help determine whether to commute commutative
20700 operators. The POP0/POP1 operands are updated in place. The new
20701 comparison code is returned, or UNKNOWN if not implementable. */
20702
20703 static enum rtx_code
20704 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20705 rtx *pop0, rtx *pop1)
20706 {
20707 rtx tmp;
20708
20709 switch (code)
20710 {
20711 case LTGT:
20712 case UNEQ:
20713 /* AVX supports all the needed comparisons. */
20714 if (TARGET_AVX)
20715 break;
20716 /* We have no LTGT as an operator. We could implement it with
20717 NE & ORDERED, but this requires an extra temporary. It's
20718 not clear that it's worth it. */
20719 return UNKNOWN;
20720
20721 case LT:
20722 case LE:
20723 case UNGT:
20724 case UNGE:
20725 /* These are supported directly. */
20726 break;
20727
20728 case EQ:
20729 case NE:
20730 case UNORDERED:
20731 case ORDERED:
20732 /* AVX has 3 operand comparisons, no need to swap anything. */
20733 if (TARGET_AVX)
20734 break;
20735 /* For commutative operators, try to canonicalize the destination
20736 operand to be first in the comparison - this helps reload to
20737 avoid extra moves. */
20738 if (!dest || !rtx_equal_p (dest, *pop1))
20739 break;
20740 /* FALLTHRU */
20741
20742 case GE:
20743 case GT:
20744 case UNLE:
20745 case UNLT:
20746 /* These are not supported directly before AVX, and furthermore
20747 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20748 comparison operands to transform into something that is
20749 supported. */
20750 tmp = *pop0;
20751 *pop0 = *pop1;
20752 *pop1 = tmp;
20753 code = swap_condition (code);
20754 break;
20755
20756 default:
20757 gcc_unreachable ();
20758 }
20759
20760 return code;
20761 }
20762
20763 /* Detect conditional moves that exactly match min/max operational
20764 semantics. Note that this is IEEE safe, as long as we don't
20765 interchange the operands.
20766
20767 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20768 and TRUE if the operation is successful and instructions are emitted. */
20769
20770 static bool
20771 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20772 rtx cmp_op1, rtx if_true, rtx if_false)
20773 {
20774 enum machine_mode mode;
20775 bool is_min;
20776 rtx tmp;
20777
20778 if (code == LT)
20779 ;
20780 else if (code == UNGE)
20781 {
20782 tmp = if_true;
20783 if_true = if_false;
20784 if_false = tmp;
20785 }
20786 else
20787 return false;
20788
20789 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20790 is_min = true;
20791 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20792 is_min = false;
20793 else
20794 return false;
20795
20796 mode = GET_MODE (dest);
20797
20798 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20799 but MODE may be a vector mode and thus not appropriate. */
20800 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20801 {
20802 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20803 rtvec v;
20804
20805 if_true = force_reg (mode, if_true);
20806 v = gen_rtvec (2, if_true, if_false);
20807 tmp = gen_rtx_UNSPEC (mode, v, u);
20808 }
20809 else
20810 {
20811 code = is_min ? SMIN : SMAX;
20812 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20813 }
20814
20815 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20816 return true;
20817 }
20818
20819 /* Expand an sse vector comparison. Return the register with the result. */
20820
20821 static rtx
20822 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20823 rtx op_true, rtx op_false)
20824 {
20825 enum machine_mode mode = GET_MODE (dest);
20826 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20827
20828 /* In general case result of comparison can differ from operands' type. */
20829 enum machine_mode cmp_mode;
20830
20831 /* In AVX512F the result of comparison is an integer mask. */
20832 bool maskcmp = false;
20833 rtx x;
20834
20835 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20836 {
20837 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20838 gcc_assert (cmp_mode != BLKmode);
20839
20840 maskcmp = true;
20841 }
20842 else
20843 cmp_mode = cmp_ops_mode;
20844
20845
20846 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20847 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20848 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20849
20850 if (optimize
20851 || reg_overlap_mentioned_p (dest, op_true)
20852 || reg_overlap_mentioned_p (dest, op_false))
20853 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20854
20855 /* Compare patterns for int modes are unspec in AVX512F only. */
20856 if (maskcmp && (code == GT || code == EQ))
20857 {
20858 rtx (*gen)(rtx, rtx, rtx);
20859
20860 switch (cmp_ops_mode)
20861 {
20862 case V16SImode:
20863 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20864 break;
20865 case V8DImode:
20866 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20867 break;
20868 default:
20869 gen = NULL;
20870 }
20871
20872 if (gen)
20873 {
20874 emit_insn (gen (dest, cmp_op0, cmp_op1));
20875 return dest;
20876 }
20877 }
20878 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20879
20880 if (cmp_mode != mode && !maskcmp)
20881 {
20882 x = force_reg (cmp_ops_mode, x);
20883 convert_move (dest, x, false);
20884 }
20885 else
20886 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20887
20888 return dest;
20889 }
20890
20891 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20892 operations. This is used for both scalar and vector conditional moves. */
20893
20894 static void
20895 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20896 {
20897 enum machine_mode mode = GET_MODE (dest);
20898 enum machine_mode cmpmode = GET_MODE (cmp);
20899
20900 /* In AVX512F the result of comparison is an integer mask. */
20901 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20902
20903 rtx t2, t3, x;
20904
20905 if (vector_all_ones_operand (op_true, mode)
20906 && rtx_equal_p (op_false, CONST0_RTX (mode))
20907 && !maskcmp)
20908 {
20909 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20910 }
20911 else if (op_false == CONST0_RTX (mode)
20912 && !maskcmp)
20913 {
20914 op_true = force_reg (mode, op_true);
20915 x = gen_rtx_AND (mode, cmp, op_true);
20916 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20917 }
20918 else if (op_true == CONST0_RTX (mode)
20919 && !maskcmp)
20920 {
20921 op_false = force_reg (mode, op_false);
20922 x = gen_rtx_NOT (mode, cmp);
20923 x = gen_rtx_AND (mode, x, op_false);
20924 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20925 }
20926 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20927 && !maskcmp)
20928 {
20929 op_false = force_reg (mode, op_false);
20930 x = gen_rtx_IOR (mode, cmp, op_false);
20931 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20932 }
20933 else if (TARGET_XOP
20934 && !maskcmp)
20935 {
20936 op_true = force_reg (mode, op_true);
20937
20938 if (!nonimmediate_operand (op_false, mode))
20939 op_false = force_reg (mode, op_false);
20940
20941 emit_insn (gen_rtx_SET (mode, dest,
20942 gen_rtx_IF_THEN_ELSE (mode, cmp,
20943 op_true,
20944 op_false)));
20945 }
20946 else
20947 {
20948 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20949 rtx d = dest;
20950
20951 if (!nonimmediate_operand (op_true, mode))
20952 op_true = force_reg (mode, op_true);
20953
20954 op_false = force_reg (mode, op_false);
20955
20956 switch (mode)
20957 {
20958 case V4SFmode:
20959 if (TARGET_SSE4_1)
20960 gen = gen_sse4_1_blendvps;
20961 break;
20962 case V2DFmode:
20963 if (TARGET_SSE4_1)
20964 gen = gen_sse4_1_blendvpd;
20965 break;
20966 case V16QImode:
20967 case V8HImode:
20968 case V4SImode:
20969 case V2DImode:
20970 if (TARGET_SSE4_1)
20971 {
20972 gen = gen_sse4_1_pblendvb;
20973 if (mode != V16QImode)
20974 d = gen_reg_rtx (V16QImode);
20975 op_false = gen_lowpart (V16QImode, op_false);
20976 op_true = gen_lowpart (V16QImode, op_true);
20977 cmp = gen_lowpart (V16QImode, cmp);
20978 }
20979 break;
20980 case V8SFmode:
20981 if (TARGET_AVX)
20982 gen = gen_avx_blendvps256;
20983 break;
20984 case V4DFmode:
20985 if (TARGET_AVX)
20986 gen = gen_avx_blendvpd256;
20987 break;
20988 case V32QImode:
20989 case V16HImode:
20990 case V8SImode:
20991 case V4DImode:
20992 if (TARGET_AVX2)
20993 {
20994 gen = gen_avx2_pblendvb;
20995 if (mode != V32QImode)
20996 d = gen_reg_rtx (V32QImode);
20997 op_false = gen_lowpart (V32QImode, op_false);
20998 op_true = gen_lowpart (V32QImode, op_true);
20999 cmp = gen_lowpart (V32QImode, cmp);
21000 }
21001 break;
21002
21003 case V16SImode:
21004 gen = gen_avx512f_blendmv16si;
21005 break;
21006 case V8DImode:
21007 gen = gen_avx512f_blendmv8di;
21008 break;
21009 case V8DFmode:
21010 gen = gen_avx512f_blendmv8df;
21011 break;
21012 case V16SFmode:
21013 gen = gen_avx512f_blendmv16sf;
21014 break;
21015
21016 default:
21017 break;
21018 }
21019
21020 if (gen != NULL)
21021 {
21022 emit_insn (gen (d, op_false, op_true, cmp));
21023 if (d != dest)
21024 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21025 }
21026 else
21027 {
21028 op_true = force_reg (mode, op_true);
21029
21030 t2 = gen_reg_rtx (mode);
21031 if (optimize)
21032 t3 = gen_reg_rtx (mode);
21033 else
21034 t3 = dest;
21035
21036 x = gen_rtx_AND (mode, op_true, cmp);
21037 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21038
21039 x = gen_rtx_NOT (mode, cmp);
21040 x = gen_rtx_AND (mode, x, op_false);
21041 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21042
21043 x = gen_rtx_IOR (mode, t3, t2);
21044 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21045 }
21046 }
21047 }
21048
21049 /* Expand a floating-point conditional move. Return true if successful. */
21050
21051 bool
21052 ix86_expand_fp_movcc (rtx operands[])
21053 {
21054 enum machine_mode mode = GET_MODE (operands[0]);
21055 enum rtx_code code = GET_CODE (operands[1]);
21056 rtx tmp, compare_op;
21057 rtx op0 = XEXP (operands[1], 0);
21058 rtx op1 = XEXP (operands[1], 1);
21059
21060 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21061 {
21062 enum machine_mode cmode;
21063
21064 /* Since we've no cmove for sse registers, don't force bad register
21065 allocation just to gain access to it. Deny movcc when the
21066 comparison mode doesn't match the move mode. */
21067 cmode = GET_MODE (op0);
21068 if (cmode == VOIDmode)
21069 cmode = GET_MODE (op1);
21070 if (cmode != mode)
21071 return false;
21072
21073 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21074 if (code == UNKNOWN)
21075 return false;
21076
21077 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21078 operands[2], operands[3]))
21079 return true;
21080
21081 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21082 operands[2], operands[3]);
21083 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21084 return true;
21085 }
21086
21087 if (GET_MODE (op0) == TImode
21088 || (GET_MODE (op0) == DImode
21089 && !TARGET_64BIT))
21090 return false;
21091
21092 /* The floating point conditional move instructions don't directly
21093 support conditions resulting from a signed integer comparison. */
21094
21095 compare_op = ix86_expand_compare (code, op0, op1);
21096 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21097 {
21098 tmp = gen_reg_rtx (QImode);
21099 ix86_expand_setcc (tmp, code, op0, op1);
21100
21101 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21102 }
21103
21104 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21105 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21106 operands[2], operands[3])));
21107
21108 return true;
21109 }
21110
21111 /* Expand a floating-point vector conditional move; a vcond operation
21112 rather than a movcc operation. */
21113
21114 bool
21115 ix86_expand_fp_vcond (rtx operands[])
21116 {
21117 enum rtx_code code = GET_CODE (operands[3]);
21118 rtx cmp;
21119
21120 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21121 &operands[4], &operands[5]);
21122 if (code == UNKNOWN)
21123 {
21124 rtx temp;
21125 switch (GET_CODE (operands[3]))
21126 {
21127 case LTGT:
21128 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21129 operands[5], operands[0], operands[0]);
21130 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21131 operands[5], operands[1], operands[2]);
21132 code = AND;
21133 break;
21134 case UNEQ:
21135 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21136 operands[5], operands[0], operands[0]);
21137 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21138 operands[5], operands[1], operands[2]);
21139 code = IOR;
21140 break;
21141 default:
21142 gcc_unreachable ();
21143 }
21144 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21145 OPTAB_DIRECT);
21146 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21147 return true;
21148 }
21149
21150 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21151 operands[5], operands[1], operands[2]))
21152 return true;
21153
21154 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21155 operands[1], operands[2]);
21156 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21157 return true;
21158 }
21159
21160 /* Expand a signed/unsigned integral vector conditional move. */
21161
21162 bool
21163 ix86_expand_int_vcond (rtx operands[])
21164 {
21165 enum machine_mode data_mode = GET_MODE (operands[0]);
21166 enum machine_mode mode = GET_MODE (operands[4]);
21167 enum rtx_code code = GET_CODE (operands[3]);
21168 bool negate = false;
21169 rtx x, cop0, cop1;
21170
21171 cop0 = operands[4];
21172 cop1 = operands[5];
21173
21174 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21175 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21176 if ((code == LT || code == GE)
21177 && data_mode == mode
21178 && cop1 == CONST0_RTX (mode)
21179 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21180 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21181 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21182 && (GET_MODE_SIZE (data_mode) == 16
21183 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21184 {
21185 rtx negop = operands[2 - (code == LT)];
21186 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21187 if (negop == CONST1_RTX (data_mode))
21188 {
21189 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21190 operands[0], 1, OPTAB_DIRECT);
21191 if (res != operands[0])
21192 emit_move_insn (operands[0], res);
21193 return true;
21194 }
21195 else if (GET_MODE_INNER (data_mode) != DImode
21196 && vector_all_ones_operand (negop, data_mode))
21197 {
21198 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21199 operands[0], 0, OPTAB_DIRECT);
21200 if (res != operands[0])
21201 emit_move_insn (operands[0], res);
21202 return true;
21203 }
21204 }
21205
21206 if (!nonimmediate_operand (cop1, mode))
21207 cop1 = force_reg (mode, cop1);
21208 if (!general_operand (operands[1], data_mode))
21209 operands[1] = force_reg (data_mode, operands[1]);
21210 if (!general_operand (operands[2], data_mode))
21211 operands[2] = force_reg (data_mode, operands[2]);
21212
21213 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21214 if (TARGET_XOP
21215 && (mode == V16QImode || mode == V8HImode
21216 || mode == V4SImode || mode == V2DImode))
21217 ;
21218 else
21219 {
21220 /* Canonicalize the comparison to EQ, GT, GTU. */
21221 switch (code)
21222 {
21223 case EQ:
21224 case GT:
21225 case GTU:
21226 break;
21227
21228 case NE:
21229 case LE:
21230 case LEU:
21231 code = reverse_condition (code);
21232 negate = true;
21233 break;
21234
21235 case GE:
21236 case GEU:
21237 code = reverse_condition (code);
21238 negate = true;
21239 /* FALLTHRU */
21240
21241 case LT:
21242 case LTU:
21243 code = swap_condition (code);
21244 x = cop0, cop0 = cop1, cop1 = x;
21245 break;
21246
21247 default:
21248 gcc_unreachable ();
21249 }
21250
21251 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21252 if (mode == V2DImode)
21253 {
21254 switch (code)
21255 {
21256 case EQ:
21257 /* SSE4.1 supports EQ. */
21258 if (!TARGET_SSE4_1)
21259 return false;
21260 break;
21261
21262 case GT:
21263 case GTU:
21264 /* SSE4.2 supports GT/GTU. */
21265 if (!TARGET_SSE4_2)
21266 return false;
21267 break;
21268
21269 default:
21270 gcc_unreachable ();
21271 }
21272 }
21273
21274 /* Unsigned parallel compare is not supported by the hardware.
21275 Play some tricks to turn this into a signed comparison
21276 against 0. */
21277 if (code == GTU)
21278 {
21279 cop0 = force_reg (mode, cop0);
21280
21281 switch (mode)
21282 {
21283 case V16SImode:
21284 case V8DImode:
21285 case V8SImode:
21286 case V4DImode:
21287 case V4SImode:
21288 case V2DImode:
21289 {
21290 rtx t1, t2, mask;
21291 rtx (*gen_sub3) (rtx, rtx, rtx);
21292
21293 switch (mode)
21294 {
21295 case V16SImode: gen_sub3 = gen_subv16si3; break;
21296 case V8DImode: gen_sub3 = gen_subv8di3; break;
21297 case V8SImode: gen_sub3 = gen_subv8si3; break;
21298 case V4DImode: gen_sub3 = gen_subv4di3; break;
21299 case V4SImode: gen_sub3 = gen_subv4si3; break;
21300 case V2DImode: gen_sub3 = gen_subv2di3; break;
21301 default:
21302 gcc_unreachable ();
21303 }
21304 /* Subtract (-(INT MAX) - 1) from both operands to make
21305 them signed. */
21306 mask = ix86_build_signbit_mask (mode, true, false);
21307 t1 = gen_reg_rtx (mode);
21308 emit_insn (gen_sub3 (t1, cop0, mask));
21309
21310 t2 = gen_reg_rtx (mode);
21311 emit_insn (gen_sub3 (t2, cop1, mask));
21312
21313 cop0 = t1;
21314 cop1 = t2;
21315 code = GT;
21316 }
21317 break;
21318
21319 case V32QImode:
21320 case V16HImode:
21321 case V16QImode:
21322 case V8HImode:
21323 /* Perform a parallel unsigned saturating subtraction. */
21324 x = gen_reg_rtx (mode);
21325 emit_insn (gen_rtx_SET (VOIDmode, x,
21326 gen_rtx_US_MINUS (mode, cop0, cop1)));
21327
21328 cop0 = x;
21329 cop1 = CONST0_RTX (mode);
21330 code = EQ;
21331 negate = !negate;
21332 break;
21333
21334 default:
21335 gcc_unreachable ();
21336 }
21337 }
21338 }
21339
21340 /* Allow the comparison to be done in one mode, but the movcc to
21341 happen in another mode. */
21342 if (data_mode == mode)
21343 {
21344 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21345 operands[1+negate], operands[2-negate]);
21346 }
21347 else
21348 {
21349 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21350 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21351 operands[1+negate], operands[2-negate]);
21352 if (GET_MODE (x) == mode)
21353 x = gen_lowpart (data_mode, x);
21354 }
21355
21356 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21357 operands[2-negate]);
21358 return true;
21359 }
21360
21361 static bool
21362 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21363 {
21364 enum machine_mode mode = GET_MODE (op0);
21365 switch (mode)
21366 {
21367 case V16SImode:
21368 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21369 force_reg (V16SImode, mask),
21370 op1));
21371 return true;
21372 case V16SFmode:
21373 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21374 force_reg (V16SImode, mask),
21375 op1));
21376 return true;
21377 case V8DImode:
21378 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21379 force_reg (V8DImode, mask), op1));
21380 return true;
21381 case V8DFmode:
21382 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21383 force_reg (V8DImode, mask), op1));
21384 return true;
21385 default:
21386 return false;
21387 }
21388 }
21389
21390 /* Expand a variable vector permutation. */
21391
21392 void
21393 ix86_expand_vec_perm (rtx operands[])
21394 {
21395 rtx target = operands[0];
21396 rtx op0 = operands[1];
21397 rtx op1 = operands[2];
21398 rtx mask = operands[3];
21399 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21400 enum machine_mode mode = GET_MODE (op0);
21401 enum machine_mode maskmode = GET_MODE (mask);
21402 int w, e, i;
21403 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21404
21405 /* Number of elements in the vector. */
21406 w = GET_MODE_NUNITS (mode);
21407 e = GET_MODE_UNIT_SIZE (mode);
21408 gcc_assert (w <= 64);
21409
21410 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21411 return;
21412
21413 if (TARGET_AVX2)
21414 {
21415 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21416 {
21417 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21418 an constant shuffle operand. With a tiny bit of effort we can
21419 use VPERMD instead. A re-interpretation stall for V4DFmode is
21420 unfortunate but there's no avoiding it.
21421 Similarly for V16HImode we don't have instructions for variable
21422 shuffling, while for V32QImode we can use after preparing suitable
21423 masks vpshufb; vpshufb; vpermq; vpor. */
21424
21425 if (mode == V16HImode)
21426 {
21427 maskmode = mode = V32QImode;
21428 w = 32;
21429 e = 1;
21430 }
21431 else
21432 {
21433 maskmode = mode = V8SImode;
21434 w = 8;
21435 e = 4;
21436 }
21437 t1 = gen_reg_rtx (maskmode);
21438
21439 /* Replicate the low bits of the V4DImode mask into V8SImode:
21440 mask = { A B C D }
21441 t1 = { A A B B C C D D }. */
21442 for (i = 0; i < w / 2; ++i)
21443 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21444 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21445 vt = force_reg (maskmode, vt);
21446 mask = gen_lowpart (maskmode, mask);
21447 if (maskmode == V8SImode)
21448 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21449 else
21450 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21451
21452 /* Multiply the shuffle indicies by two. */
21453 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21454 OPTAB_DIRECT);
21455
21456 /* Add one to the odd shuffle indicies:
21457 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21458 for (i = 0; i < w / 2; ++i)
21459 {
21460 vec[i * 2] = const0_rtx;
21461 vec[i * 2 + 1] = const1_rtx;
21462 }
21463 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21464 vt = validize_mem (force_const_mem (maskmode, vt));
21465 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21466 OPTAB_DIRECT);
21467
21468 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21469 operands[3] = mask = t1;
21470 target = gen_reg_rtx (mode);
21471 op0 = gen_lowpart (mode, op0);
21472 op1 = gen_lowpart (mode, op1);
21473 }
21474
21475 switch (mode)
21476 {
21477 case V8SImode:
21478 /* The VPERMD and VPERMPS instructions already properly ignore
21479 the high bits of the shuffle elements. No need for us to
21480 perform an AND ourselves. */
21481 if (one_operand_shuffle)
21482 {
21483 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21484 if (target != operands[0])
21485 emit_move_insn (operands[0],
21486 gen_lowpart (GET_MODE (operands[0]), target));
21487 }
21488 else
21489 {
21490 t1 = gen_reg_rtx (V8SImode);
21491 t2 = gen_reg_rtx (V8SImode);
21492 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21493 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21494 goto merge_two;
21495 }
21496 return;
21497
21498 case V8SFmode:
21499 mask = gen_lowpart (V8SImode, mask);
21500 if (one_operand_shuffle)
21501 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21502 else
21503 {
21504 t1 = gen_reg_rtx (V8SFmode);
21505 t2 = gen_reg_rtx (V8SFmode);
21506 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21507 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21508 goto merge_two;
21509 }
21510 return;
21511
21512 case V4SImode:
21513 /* By combining the two 128-bit input vectors into one 256-bit
21514 input vector, we can use VPERMD and VPERMPS for the full
21515 two-operand shuffle. */
21516 t1 = gen_reg_rtx (V8SImode);
21517 t2 = gen_reg_rtx (V8SImode);
21518 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21519 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21520 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21521 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21522 return;
21523
21524 case V4SFmode:
21525 t1 = gen_reg_rtx (V8SFmode);
21526 t2 = gen_reg_rtx (V8SImode);
21527 mask = gen_lowpart (V4SImode, mask);
21528 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21529 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21530 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21531 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21532 return;
21533
21534 case V32QImode:
21535 t1 = gen_reg_rtx (V32QImode);
21536 t2 = gen_reg_rtx (V32QImode);
21537 t3 = gen_reg_rtx (V32QImode);
21538 vt2 = GEN_INT (-128);
21539 for (i = 0; i < 32; i++)
21540 vec[i] = vt2;
21541 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21542 vt = force_reg (V32QImode, vt);
21543 for (i = 0; i < 32; i++)
21544 vec[i] = i < 16 ? vt2 : const0_rtx;
21545 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21546 vt2 = force_reg (V32QImode, vt2);
21547 /* From mask create two adjusted masks, which contain the same
21548 bits as mask in the low 7 bits of each vector element.
21549 The first mask will have the most significant bit clear
21550 if it requests element from the same 128-bit lane
21551 and MSB set if it requests element from the other 128-bit lane.
21552 The second mask will have the opposite values of the MSB,
21553 and additionally will have its 128-bit lanes swapped.
21554 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21555 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21556 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21557 stands for other 12 bytes. */
21558 /* The bit whether element is from the same lane or the other
21559 lane is bit 4, so shift it up by 3 to the MSB position. */
21560 t5 = gen_reg_rtx (V4DImode);
21561 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21562 GEN_INT (3)));
21563 /* Clear MSB bits from the mask just in case it had them set. */
21564 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21565 /* After this t1 will have MSB set for elements from other lane. */
21566 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21567 /* Clear bits other than MSB. */
21568 emit_insn (gen_andv32qi3 (t1, t1, vt));
21569 /* Or in the lower bits from mask into t3. */
21570 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21571 /* And invert MSB bits in t1, so MSB is set for elements from the same
21572 lane. */
21573 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21574 /* Swap 128-bit lanes in t3. */
21575 t6 = gen_reg_rtx (V4DImode);
21576 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21577 const2_rtx, GEN_INT (3),
21578 const0_rtx, const1_rtx));
21579 /* And or in the lower bits from mask into t1. */
21580 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21581 if (one_operand_shuffle)
21582 {
21583 /* Each of these shuffles will put 0s in places where
21584 element from the other 128-bit lane is needed, otherwise
21585 will shuffle in the requested value. */
21586 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21587 gen_lowpart (V32QImode, t6)));
21588 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21589 /* For t3 the 128-bit lanes are swapped again. */
21590 t7 = gen_reg_rtx (V4DImode);
21591 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21592 const2_rtx, GEN_INT (3),
21593 const0_rtx, const1_rtx));
21594 /* And oring both together leads to the result. */
21595 emit_insn (gen_iorv32qi3 (target, t1,
21596 gen_lowpart (V32QImode, t7)));
21597 if (target != operands[0])
21598 emit_move_insn (operands[0],
21599 gen_lowpart (GET_MODE (operands[0]), target));
21600 return;
21601 }
21602
21603 t4 = gen_reg_rtx (V32QImode);
21604 /* Similarly to the above one_operand_shuffle code,
21605 just for repeated twice for each operand. merge_two:
21606 code will merge the two results together. */
21607 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21608 gen_lowpart (V32QImode, t6)));
21609 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21610 gen_lowpart (V32QImode, t6)));
21611 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21612 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21613 t7 = gen_reg_rtx (V4DImode);
21614 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21615 const2_rtx, GEN_INT (3),
21616 const0_rtx, const1_rtx));
21617 t8 = gen_reg_rtx (V4DImode);
21618 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21619 const2_rtx, GEN_INT (3),
21620 const0_rtx, const1_rtx));
21621 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21622 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21623 t1 = t4;
21624 t2 = t3;
21625 goto merge_two;
21626
21627 default:
21628 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21629 break;
21630 }
21631 }
21632
21633 if (TARGET_XOP)
21634 {
21635 /* The XOP VPPERM insn supports three inputs. By ignoring the
21636 one_operand_shuffle special case, we avoid creating another
21637 set of constant vectors in memory. */
21638 one_operand_shuffle = false;
21639
21640 /* mask = mask & {2*w-1, ...} */
21641 vt = GEN_INT (2*w - 1);
21642 }
21643 else
21644 {
21645 /* mask = mask & {w-1, ...} */
21646 vt = GEN_INT (w - 1);
21647 }
21648
21649 for (i = 0; i < w; i++)
21650 vec[i] = vt;
21651 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21652 mask = expand_simple_binop (maskmode, AND, mask, vt,
21653 NULL_RTX, 0, OPTAB_DIRECT);
21654
21655 /* For non-QImode operations, convert the word permutation control
21656 into a byte permutation control. */
21657 if (mode != V16QImode)
21658 {
21659 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21660 GEN_INT (exact_log2 (e)),
21661 NULL_RTX, 0, OPTAB_DIRECT);
21662
21663 /* Convert mask to vector of chars. */
21664 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21665
21666 /* Replicate each of the input bytes into byte positions:
21667 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21668 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21669 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21670 for (i = 0; i < 16; ++i)
21671 vec[i] = GEN_INT (i/e * e);
21672 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21673 vt = validize_mem (force_const_mem (V16QImode, vt));
21674 if (TARGET_XOP)
21675 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21676 else
21677 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21678
21679 /* Convert it into the byte positions by doing
21680 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21681 for (i = 0; i < 16; ++i)
21682 vec[i] = GEN_INT (i % e);
21683 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21684 vt = validize_mem (force_const_mem (V16QImode, vt));
21685 emit_insn (gen_addv16qi3 (mask, mask, vt));
21686 }
21687
21688 /* The actual shuffle operations all operate on V16QImode. */
21689 op0 = gen_lowpart (V16QImode, op0);
21690 op1 = gen_lowpart (V16QImode, op1);
21691
21692 if (TARGET_XOP)
21693 {
21694 if (GET_MODE (target) != V16QImode)
21695 target = gen_reg_rtx (V16QImode);
21696 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21697 if (target != operands[0])
21698 emit_move_insn (operands[0],
21699 gen_lowpart (GET_MODE (operands[0]), target));
21700 }
21701 else if (one_operand_shuffle)
21702 {
21703 if (GET_MODE (target) != V16QImode)
21704 target = gen_reg_rtx (V16QImode);
21705 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21706 if (target != operands[0])
21707 emit_move_insn (operands[0],
21708 gen_lowpart (GET_MODE (operands[0]), target));
21709 }
21710 else
21711 {
21712 rtx xops[6];
21713 bool ok;
21714
21715 /* Shuffle the two input vectors independently. */
21716 t1 = gen_reg_rtx (V16QImode);
21717 t2 = gen_reg_rtx (V16QImode);
21718 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21719 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21720
21721 merge_two:
21722 /* Then merge them together. The key is whether any given control
21723 element contained a bit set that indicates the second word. */
21724 mask = operands[3];
21725 vt = GEN_INT (w);
21726 if (maskmode == V2DImode && !TARGET_SSE4_1)
21727 {
21728 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21729 more shuffle to convert the V2DI input mask into a V4SI
21730 input mask. At which point the masking that expand_int_vcond
21731 will work as desired. */
21732 rtx t3 = gen_reg_rtx (V4SImode);
21733 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21734 const0_rtx, const0_rtx,
21735 const2_rtx, const2_rtx));
21736 mask = t3;
21737 maskmode = V4SImode;
21738 e = w = 4;
21739 }
21740
21741 for (i = 0; i < w; i++)
21742 vec[i] = vt;
21743 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21744 vt = force_reg (maskmode, vt);
21745 mask = expand_simple_binop (maskmode, AND, mask, vt,
21746 NULL_RTX, 0, OPTAB_DIRECT);
21747
21748 if (GET_MODE (target) != mode)
21749 target = gen_reg_rtx (mode);
21750 xops[0] = target;
21751 xops[1] = gen_lowpart (mode, t2);
21752 xops[2] = gen_lowpart (mode, t1);
21753 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21754 xops[4] = mask;
21755 xops[5] = vt;
21756 ok = ix86_expand_int_vcond (xops);
21757 gcc_assert (ok);
21758 if (target != operands[0])
21759 emit_move_insn (operands[0],
21760 gen_lowpart (GET_MODE (operands[0]), target));
21761 }
21762 }
21763
21764 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21765 true if we should do zero extension, else sign extension. HIGH_P is
21766 true if we want the N/2 high elements, else the low elements. */
21767
21768 void
21769 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21770 {
21771 enum machine_mode imode = GET_MODE (src);
21772 rtx tmp;
21773
21774 if (TARGET_SSE4_1)
21775 {
21776 rtx (*unpack)(rtx, rtx);
21777 rtx (*extract)(rtx, rtx) = NULL;
21778 enum machine_mode halfmode = BLKmode;
21779
21780 switch (imode)
21781 {
21782 case V32QImode:
21783 if (unsigned_p)
21784 unpack = gen_avx2_zero_extendv16qiv16hi2;
21785 else
21786 unpack = gen_avx2_sign_extendv16qiv16hi2;
21787 halfmode = V16QImode;
21788 extract
21789 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21790 break;
21791 case V32HImode:
21792 if (unsigned_p)
21793 unpack = gen_avx512f_zero_extendv16hiv16si2;
21794 else
21795 unpack = gen_avx512f_sign_extendv16hiv16si2;
21796 halfmode = V16HImode;
21797 extract
21798 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21799 break;
21800 case V16HImode:
21801 if (unsigned_p)
21802 unpack = gen_avx2_zero_extendv8hiv8si2;
21803 else
21804 unpack = gen_avx2_sign_extendv8hiv8si2;
21805 halfmode = V8HImode;
21806 extract
21807 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21808 break;
21809 case V16SImode:
21810 if (unsigned_p)
21811 unpack = gen_avx512f_zero_extendv8siv8di2;
21812 else
21813 unpack = gen_avx512f_sign_extendv8siv8di2;
21814 halfmode = V8SImode;
21815 extract
21816 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21817 break;
21818 case V8SImode:
21819 if (unsigned_p)
21820 unpack = gen_avx2_zero_extendv4siv4di2;
21821 else
21822 unpack = gen_avx2_sign_extendv4siv4di2;
21823 halfmode = V4SImode;
21824 extract
21825 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21826 break;
21827 case V16QImode:
21828 if (unsigned_p)
21829 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21830 else
21831 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21832 break;
21833 case V8HImode:
21834 if (unsigned_p)
21835 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21836 else
21837 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21838 break;
21839 case V4SImode:
21840 if (unsigned_p)
21841 unpack = gen_sse4_1_zero_extendv2siv2di2;
21842 else
21843 unpack = gen_sse4_1_sign_extendv2siv2di2;
21844 break;
21845 default:
21846 gcc_unreachable ();
21847 }
21848
21849 if (GET_MODE_SIZE (imode) >= 32)
21850 {
21851 tmp = gen_reg_rtx (halfmode);
21852 emit_insn (extract (tmp, src));
21853 }
21854 else if (high_p)
21855 {
21856 /* Shift higher 8 bytes to lower 8 bytes. */
21857 tmp = gen_reg_rtx (V1TImode);
21858 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21859 GEN_INT (64)));
21860 tmp = gen_lowpart (imode, tmp);
21861 }
21862 else
21863 tmp = src;
21864
21865 emit_insn (unpack (dest, tmp));
21866 }
21867 else
21868 {
21869 rtx (*unpack)(rtx, rtx, rtx);
21870
21871 switch (imode)
21872 {
21873 case V16QImode:
21874 if (high_p)
21875 unpack = gen_vec_interleave_highv16qi;
21876 else
21877 unpack = gen_vec_interleave_lowv16qi;
21878 break;
21879 case V8HImode:
21880 if (high_p)
21881 unpack = gen_vec_interleave_highv8hi;
21882 else
21883 unpack = gen_vec_interleave_lowv8hi;
21884 break;
21885 case V4SImode:
21886 if (high_p)
21887 unpack = gen_vec_interleave_highv4si;
21888 else
21889 unpack = gen_vec_interleave_lowv4si;
21890 break;
21891 default:
21892 gcc_unreachable ();
21893 }
21894
21895 if (unsigned_p)
21896 tmp = force_reg (imode, CONST0_RTX (imode));
21897 else
21898 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21899 src, pc_rtx, pc_rtx);
21900
21901 rtx tmp2 = gen_reg_rtx (imode);
21902 emit_insn (unpack (tmp2, src, tmp));
21903 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21904 }
21905 }
21906
21907 /* Expand conditional increment or decrement using adb/sbb instructions.
21908 The default case using setcc followed by the conditional move can be
21909 done by generic code. */
21910 bool
21911 ix86_expand_int_addcc (rtx operands[])
21912 {
21913 enum rtx_code code = GET_CODE (operands[1]);
21914 rtx flags;
21915 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21916 rtx compare_op;
21917 rtx val = const0_rtx;
21918 bool fpcmp = false;
21919 enum machine_mode mode;
21920 rtx op0 = XEXP (operands[1], 0);
21921 rtx op1 = XEXP (operands[1], 1);
21922
21923 if (operands[3] != const1_rtx
21924 && operands[3] != constm1_rtx)
21925 return false;
21926 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21927 return false;
21928 code = GET_CODE (compare_op);
21929
21930 flags = XEXP (compare_op, 0);
21931
21932 if (GET_MODE (flags) == CCFPmode
21933 || GET_MODE (flags) == CCFPUmode)
21934 {
21935 fpcmp = true;
21936 code = ix86_fp_compare_code_to_integer (code);
21937 }
21938
21939 if (code != LTU)
21940 {
21941 val = constm1_rtx;
21942 if (fpcmp)
21943 PUT_CODE (compare_op,
21944 reverse_condition_maybe_unordered
21945 (GET_CODE (compare_op)));
21946 else
21947 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21948 }
21949
21950 mode = GET_MODE (operands[0]);
21951
21952 /* Construct either adc or sbb insn. */
21953 if ((code == LTU) == (operands[3] == constm1_rtx))
21954 {
21955 switch (mode)
21956 {
21957 case QImode:
21958 insn = gen_subqi3_carry;
21959 break;
21960 case HImode:
21961 insn = gen_subhi3_carry;
21962 break;
21963 case SImode:
21964 insn = gen_subsi3_carry;
21965 break;
21966 case DImode:
21967 insn = gen_subdi3_carry;
21968 break;
21969 default:
21970 gcc_unreachable ();
21971 }
21972 }
21973 else
21974 {
21975 switch (mode)
21976 {
21977 case QImode:
21978 insn = gen_addqi3_carry;
21979 break;
21980 case HImode:
21981 insn = gen_addhi3_carry;
21982 break;
21983 case SImode:
21984 insn = gen_addsi3_carry;
21985 break;
21986 case DImode:
21987 insn = gen_adddi3_carry;
21988 break;
21989 default:
21990 gcc_unreachable ();
21991 }
21992 }
21993 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21994
21995 return true;
21996 }
21997
21998
21999 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22000 but works for floating pointer parameters and nonoffsetable memories.
22001 For pushes, it returns just stack offsets; the values will be saved
22002 in the right order. Maximally three parts are generated. */
22003
22004 static int
22005 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22006 {
22007 int size;
22008
22009 if (!TARGET_64BIT)
22010 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22011 else
22012 size = (GET_MODE_SIZE (mode) + 4) / 8;
22013
22014 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22015 gcc_assert (size >= 2 && size <= 4);
22016
22017 /* Optimize constant pool reference to immediates. This is used by fp
22018 moves, that force all constants to memory to allow combining. */
22019 if (MEM_P (operand) && MEM_READONLY_P (operand))
22020 {
22021 rtx tmp = maybe_get_pool_constant (operand);
22022 if (tmp)
22023 operand = tmp;
22024 }
22025
22026 if (MEM_P (operand) && !offsettable_memref_p (operand))
22027 {
22028 /* The only non-offsetable memories we handle are pushes. */
22029 int ok = push_operand (operand, VOIDmode);
22030
22031 gcc_assert (ok);
22032
22033 operand = copy_rtx (operand);
22034 PUT_MODE (operand, word_mode);
22035 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22036 return size;
22037 }
22038
22039 if (GET_CODE (operand) == CONST_VECTOR)
22040 {
22041 enum machine_mode imode = int_mode_for_mode (mode);
22042 /* Caution: if we looked through a constant pool memory above,
22043 the operand may actually have a different mode now. That's
22044 ok, since we want to pun this all the way back to an integer. */
22045 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22046 gcc_assert (operand != NULL);
22047 mode = imode;
22048 }
22049
22050 if (!TARGET_64BIT)
22051 {
22052 if (mode == DImode)
22053 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22054 else
22055 {
22056 int i;
22057
22058 if (REG_P (operand))
22059 {
22060 gcc_assert (reload_completed);
22061 for (i = 0; i < size; i++)
22062 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22063 }
22064 else if (offsettable_memref_p (operand))
22065 {
22066 operand = adjust_address (operand, SImode, 0);
22067 parts[0] = operand;
22068 for (i = 1; i < size; i++)
22069 parts[i] = adjust_address (operand, SImode, 4 * i);
22070 }
22071 else if (GET_CODE (operand) == CONST_DOUBLE)
22072 {
22073 REAL_VALUE_TYPE r;
22074 long l[4];
22075
22076 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22077 switch (mode)
22078 {
22079 case TFmode:
22080 real_to_target (l, &r, mode);
22081 parts[3] = gen_int_mode (l[3], SImode);
22082 parts[2] = gen_int_mode (l[2], SImode);
22083 break;
22084 case XFmode:
22085 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22086 long double may not be 80-bit. */
22087 real_to_target (l, &r, mode);
22088 parts[2] = gen_int_mode (l[2], SImode);
22089 break;
22090 case DFmode:
22091 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22092 break;
22093 default:
22094 gcc_unreachable ();
22095 }
22096 parts[1] = gen_int_mode (l[1], SImode);
22097 parts[0] = gen_int_mode (l[0], SImode);
22098 }
22099 else
22100 gcc_unreachable ();
22101 }
22102 }
22103 else
22104 {
22105 if (mode == TImode)
22106 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22107 if (mode == XFmode || mode == TFmode)
22108 {
22109 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22110 if (REG_P (operand))
22111 {
22112 gcc_assert (reload_completed);
22113 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22114 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22115 }
22116 else if (offsettable_memref_p (operand))
22117 {
22118 operand = adjust_address (operand, DImode, 0);
22119 parts[0] = operand;
22120 parts[1] = adjust_address (operand, upper_mode, 8);
22121 }
22122 else if (GET_CODE (operand) == CONST_DOUBLE)
22123 {
22124 REAL_VALUE_TYPE r;
22125 long l[4];
22126
22127 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22128 real_to_target (l, &r, mode);
22129
22130 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22131 if (HOST_BITS_PER_WIDE_INT >= 64)
22132 parts[0]
22133 = gen_int_mode
22134 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22135 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22136 DImode);
22137 else
22138 parts[0] = immed_double_const (l[0], l[1], DImode);
22139
22140 if (upper_mode == SImode)
22141 parts[1] = gen_int_mode (l[2], SImode);
22142 else if (HOST_BITS_PER_WIDE_INT >= 64)
22143 parts[1]
22144 = gen_int_mode
22145 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22146 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22147 DImode);
22148 else
22149 parts[1] = immed_double_const (l[2], l[3], DImode);
22150 }
22151 else
22152 gcc_unreachable ();
22153 }
22154 }
22155
22156 return size;
22157 }
22158
22159 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22160 Return false when normal moves are needed; true when all required
22161 insns have been emitted. Operands 2-4 contain the input values
22162 int the correct order; operands 5-7 contain the output values. */
22163
22164 void
22165 ix86_split_long_move (rtx operands[])
22166 {
22167 rtx part[2][4];
22168 int nparts, i, j;
22169 int push = 0;
22170 int collisions = 0;
22171 enum machine_mode mode = GET_MODE (operands[0]);
22172 bool collisionparts[4];
22173
22174 /* The DFmode expanders may ask us to move double.
22175 For 64bit target this is single move. By hiding the fact
22176 here we simplify i386.md splitters. */
22177 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22178 {
22179 /* Optimize constant pool reference to immediates. This is used by
22180 fp moves, that force all constants to memory to allow combining. */
22181
22182 if (MEM_P (operands[1])
22183 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22184 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22185 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22186 if (push_operand (operands[0], VOIDmode))
22187 {
22188 operands[0] = copy_rtx (operands[0]);
22189 PUT_MODE (operands[0], word_mode);
22190 }
22191 else
22192 operands[0] = gen_lowpart (DImode, operands[0]);
22193 operands[1] = gen_lowpart (DImode, operands[1]);
22194 emit_move_insn (operands[0], operands[1]);
22195 return;
22196 }
22197
22198 /* The only non-offsettable memory we handle is push. */
22199 if (push_operand (operands[0], VOIDmode))
22200 push = 1;
22201 else
22202 gcc_assert (!MEM_P (operands[0])
22203 || offsettable_memref_p (operands[0]));
22204
22205 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22206 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22207
22208 /* When emitting push, take care for source operands on the stack. */
22209 if (push && MEM_P (operands[1])
22210 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22211 {
22212 rtx src_base = XEXP (part[1][nparts - 1], 0);
22213
22214 /* Compensate for the stack decrement by 4. */
22215 if (!TARGET_64BIT && nparts == 3
22216 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22217 src_base = plus_constant (Pmode, src_base, 4);
22218
22219 /* src_base refers to the stack pointer and is
22220 automatically decreased by emitted push. */
22221 for (i = 0; i < nparts; i++)
22222 part[1][i] = change_address (part[1][i],
22223 GET_MODE (part[1][i]), src_base);
22224 }
22225
22226 /* We need to do copy in the right order in case an address register
22227 of the source overlaps the destination. */
22228 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22229 {
22230 rtx tmp;
22231
22232 for (i = 0; i < nparts; i++)
22233 {
22234 collisionparts[i]
22235 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22236 if (collisionparts[i])
22237 collisions++;
22238 }
22239
22240 /* Collision in the middle part can be handled by reordering. */
22241 if (collisions == 1 && nparts == 3 && collisionparts [1])
22242 {
22243 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22244 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22245 }
22246 else if (collisions == 1
22247 && nparts == 4
22248 && (collisionparts [1] || collisionparts [2]))
22249 {
22250 if (collisionparts [1])
22251 {
22252 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22253 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22254 }
22255 else
22256 {
22257 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22258 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22259 }
22260 }
22261
22262 /* If there are more collisions, we can't handle it by reordering.
22263 Do an lea to the last part and use only one colliding move. */
22264 else if (collisions > 1)
22265 {
22266 rtx base;
22267
22268 collisions = 1;
22269
22270 base = part[0][nparts - 1];
22271
22272 /* Handle the case when the last part isn't valid for lea.
22273 Happens in 64-bit mode storing the 12-byte XFmode. */
22274 if (GET_MODE (base) != Pmode)
22275 base = gen_rtx_REG (Pmode, REGNO (base));
22276
22277 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22278 part[1][0] = replace_equiv_address (part[1][0], base);
22279 for (i = 1; i < nparts; i++)
22280 {
22281 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22282 part[1][i] = replace_equiv_address (part[1][i], tmp);
22283 }
22284 }
22285 }
22286
22287 if (push)
22288 {
22289 if (!TARGET_64BIT)
22290 {
22291 if (nparts == 3)
22292 {
22293 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22294 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22295 stack_pointer_rtx, GEN_INT (-4)));
22296 emit_move_insn (part[0][2], part[1][2]);
22297 }
22298 else if (nparts == 4)
22299 {
22300 emit_move_insn (part[0][3], part[1][3]);
22301 emit_move_insn (part[0][2], part[1][2]);
22302 }
22303 }
22304 else
22305 {
22306 /* In 64bit mode we don't have 32bit push available. In case this is
22307 register, it is OK - we will just use larger counterpart. We also
22308 retype memory - these comes from attempt to avoid REX prefix on
22309 moving of second half of TFmode value. */
22310 if (GET_MODE (part[1][1]) == SImode)
22311 {
22312 switch (GET_CODE (part[1][1]))
22313 {
22314 case MEM:
22315 part[1][1] = adjust_address (part[1][1], DImode, 0);
22316 break;
22317
22318 case REG:
22319 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22320 break;
22321
22322 default:
22323 gcc_unreachable ();
22324 }
22325
22326 if (GET_MODE (part[1][0]) == SImode)
22327 part[1][0] = part[1][1];
22328 }
22329 }
22330 emit_move_insn (part[0][1], part[1][1]);
22331 emit_move_insn (part[0][0], part[1][0]);
22332 return;
22333 }
22334
22335 /* Choose correct order to not overwrite the source before it is copied. */
22336 if ((REG_P (part[0][0])
22337 && REG_P (part[1][1])
22338 && (REGNO (part[0][0]) == REGNO (part[1][1])
22339 || (nparts == 3
22340 && REGNO (part[0][0]) == REGNO (part[1][2]))
22341 || (nparts == 4
22342 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22343 || (collisions > 0
22344 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22345 {
22346 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22347 {
22348 operands[2 + i] = part[0][j];
22349 operands[6 + i] = part[1][j];
22350 }
22351 }
22352 else
22353 {
22354 for (i = 0; i < nparts; i++)
22355 {
22356 operands[2 + i] = part[0][i];
22357 operands[6 + i] = part[1][i];
22358 }
22359 }
22360
22361 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22362 if (optimize_insn_for_size_p ())
22363 {
22364 for (j = 0; j < nparts - 1; j++)
22365 if (CONST_INT_P (operands[6 + j])
22366 && operands[6 + j] != const0_rtx
22367 && REG_P (operands[2 + j]))
22368 for (i = j; i < nparts - 1; i++)
22369 if (CONST_INT_P (operands[7 + i])
22370 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22371 operands[7 + i] = operands[2 + j];
22372 }
22373
22374 for (i = 0; i < nparts; i++)
22375 emit_move_insn (operands[2 + i], operands[6 + i]);
22376
22377 return;
22378 }
22379
22380 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22381 left shift by a constant, either using a single shift or
22382 a sequence of add instructions. */
22383
22384 static void
22385 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22386 {
22387 rtx (*insn)(rtx, rtx, rtx);
22388
22389 if (count == 1
22390 || (count * ix86_cost->add <= ix86_cost->shift_const
22391 && !optimize_insn_for_size_p ()))
22392 {
22393 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22394 while (count-- > 0)
22395 emit_insn (insn (operand, operand, operand));
22396 }
22397 else
22398 {
22399 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22400 emit_insn (insn (operand, operand, GEN_INT (count)));
22401 }
22402 }
22403
22404 void
22405 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22406 {
22407 rtx (*gen_ashl3)(rtx, rtx, rtx);
22408 rtx (*gen_shld)(rtx, rtx, rtx);
22409 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22410
22411 rtx low[2], high[2];
22412 int count;
22413
22414 if (CONST_INT_P (operands[2]))
22415 {
22416 split_double_mode (mode, operands, 2, low, high);
22417 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22418
22419 if (count >= half_width)
22420 {
22421 emit_move_insn (high[0], low[1]);
22422 emit_move_insn (low[0], const0_rtx);
22423
22424 if (count > half_width)
22425 ix86_expand_ashl_const (high[0], count - half_width, mode);
22426 }
22427 else
22428 {
22429 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22430
22431 if (!rtx_equal_p (operands[0], operands[1]))
22432 emit_move_insn (operands[0], operands[1]);
22433
22434 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22435 ix86_expand_ashl_const (low[0], count, mode);
22436 }
22437 return;
22438 }
22439
22440 split_double_mode (mode, operands, 1, low, high);
22441
22442 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22443
22444 if (operands[1] == const1_rtx)
22445 {
22446 /* Assuming we've chosen a QImode capable registers, then 1 << N
22447 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22448 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22449 {
22450 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22451
22452 ix86_expand_clear (low[0]);
22453 ix86_expand_clear (high[0]);
22454 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22455
22456 d = gen_lowpart (QImode, low[0]);
22457 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22458 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22459 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22460
22461 d = gen_lowpart (QImode, high[0]);
22462 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22463 s = gen_rtx_NE (QImode, flags, const0_rtx);
22464 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22465 }
22466
22467 /* Otherwise, we can get the same results by manually performing
22468 a bit extract operation on bit 5/6, and then performing the two
22469 shifts. The two methods of getting 0/1 into low/high are exactly
22470 the same size. Avoiding the shift in the bit extract case helps
22471 pentium4 a bit; no one else seems to care much either way. */
22472 else
22473 {
22474 enum machine_mode half_mode;
22475 rtx (*gen_lshr3)(rtx, rtx, rtx);
22476 rtx (*gen_and3)(rtx, rtx, rtx);
22477 rtx (*gen_xor3)(rtx, rtx, rtx);
22478 HOST_WIDE_INT bits;
22479 rtx x;
22480
22481 if (mode == DImode)
22482 {
22483 half_mode = SImode;
22484 gen_lshr3 = gen_lshrsi3;
22485 gen_and3 = gen_andsi3;
22486 gen_xor3 = gen_xorsi3;
22487 bits = 5;
22488 }
22489 else
22490 {
22491 half_mode = DImode;
22492 gen_lshr3 = gen_lshrdi3;
22493 gen_and3 = gen_anddi3;
22494 gen_xor3 = gen_xordi3;
22495 bits = 6;
22496 }
22497
22498 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22499 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22500 else
22501 x = gen_lowpart (half_mode, operands[2]);
22502 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22503
22504 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22505 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22506 emit_move_insn (low[0], high[0]);
22507 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22508 }
22509
22510 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22511 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22512 return;
22513 }
22514
22515 if (operands[1] == constm1_rtx)
22516 {
22517 /* For -1 << N, we can avoid the shld instruction, because we
22518 know that we're shifting 0...31/63 ones into a -1. */
22519 emit_move_insn (low[0], constm1_rtx);
22520 if (optimize_insn_for_size_p ())
22521 emit_move_insn (high[0], low[0]);
22522 else
22523 emit_move_insn (high[0], constm1_rtx);
22524 }
22525 else
22526 {
22527 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22528
22529 if (!rtx_equal_p (operands[0], operands[1]))
22530 emit_move_insn (operands[0], operands[1]);
22531
22532 split_double_mode (mode, operands, 1, low, high);
22533 emit_insn (gen_shld (high[0], low[0], operands[2]));
22534 }
22535
22536 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22537
22538 if (TARGET_CMOVE && scratch)
22539 {
22540 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22541 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22542
22543 ix86_expand_clear (scratch);
22544 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22545 }
22546 else
22547 {
22548 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22549 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22550
22551 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22552 }
22553 }
22554
22555 void
22556 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22557 {
22558 rtx (*gen_ashr3)(rtx, rtx, rtx)
22559 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22560 rtx (*gen_shrd)(rtx, rtx, rtx);
22561 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22562
22563 rtx low[2], high[2];
22564 int count;
22565
22566 if (CONST_INT_P (operands[2]))
22567 {
22568 split_double_mode (mode, operands, 2, low, high);
22569 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22570
22571 if (count == GET_MODE_BITSIZE (mode) - 1)
22572 {
22573 emit_move_insn (high[0], high[1]);
22574 emit_insn (gen_ashr3 (high[0], high[0],
22575 GEN_INT (half_width - 1)));
22576 emit_move_insn (low[0], high[0]);
22577
22578 }
22579 else if (count >= half_width)
22580 {
22581 emit_move_insn (low[0], high[1]);
22582 emit_move_insn (high[0], low[0]);
22583 emit_insn (gen_ashr3 (high[0], high[0],
22584 GEN_INT (half_width - 1)));
22585
22586 if (count > half_width)
22587 emit_insn (gen_ashr3 (low[0], low[0],
22588 GEN_INT (count - half_width)));
22589 }
22590 else
22591 {
22592 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22593
22594 if (!rtx_equal_p (operands[0], operands[1]))
22595 emit_move_insn (operands[0], operands[1]);
22596
22597 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22598 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22599 }
22600 }
22601 else
22602 {
22603 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22604
22605 if (!rtx_equal_p (operands[0], operands[1]))
22606 emit_move_insn (operands[0], operands[1]);
22607
22608 split_double_mode (mode, operands, 1, low, high);
22609
22610 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22611 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22612
22613 if (TARGET_CMOVE && scratch)
22614 {
22615 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22616 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22617
22618 emit_move_insn (scratch, high[0]);
22619 emit_insn (gen_ashr3 (scratch, scratch,
22620 GEN_INT (half_width - 1)));
22621 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22622 scratch));
22623 }
22624 else
22625 {
22626 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22627 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22628
22629 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22630 }
22631 }
22632 }
22633
22634 void
22635 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22636 {
22637 rtx (*gen_lshr3)(rtx, rtx, rtx)
22638 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22639 rtx (*gen_shrd)(rtx, rtx, rtx);
22640 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22641
22642 rtx low[2], high[2];
22643 int count;
22644
22645 if (CONST_INT_P (operands[2]))
22646 {
22647 split_double_mode (mode, operands, 2, low, high);
22648 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22649
22650 if (count >= half_width)
22651 {
22652 emit_move_insn (low[0], high[1]);
22653 ix86_expand_clear (high[0]);
22654
22655 if (count > half_width)
22656 emit_insn (gen_lshr3 (low[0], low[0],
22657 GEN_INT (count - half_width)));
22658 }
22659 else
22660 {
22661 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22662
22663 if (!rtx_equal_p (operands[0], operands[1]))
22664 emit_move_insn (operands[0], operands[1]);
22665
22666 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22667 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22668 }
22669 }
22670 else
22671 {
22672 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22673
22674 if (!rtx_equal_p (operands[0], operands[1]))
22675 emit_move_insn (operands[0], operands[1]);
22676
22677 split_double_mode (mode, operands, 1, low, high);
22678
22679 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22680 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22681
22682 if (TARGET_CMOVE && scratch)
22683 {
22684 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22685 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22686
22687 ix86_expand_clear (scratch);
22688 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22689 scratch));
22690 }
22691 else
22692 {
22693 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22694 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22695
22696 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22697 }
22698 }
22699 }
22700
22701 /* Predict just emitted jump instruction to be taken with probability PROB. */
22702 static void
22703 predict_jump (int prob)
22704 {
22705 rtx insn = get_last_insn ();
22706 gcc_assert (JUMP_P (insn));
22707 add_int_reg_note (insn, REG_BR_PROB, prob);
22708 }
22709
22710 /* Helper function for the string operations below. Dest VARIABLE whether
22711 it is aligned to VALUE bytes. If true, jump to the label. */
22712 static rtx_code_label *
22713 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22714 {
22715 rtx_code_label *label = gen_label_rtx ();
22716 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22717 if (GET_MODE (variable) == DImode)
22718 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22719 else
22720 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22721 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22722 1, label);
22723 if (epilogue)
22724 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22725 else
22726 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22727 return label;
22728 }
22729
22730 /* Adjust COUNTER by the VALUE. */
22731 static void
22732 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22733 {
22734 rtx (*gen_add)(rtx, rtx, rtx)
22735 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22736
22737 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22738 }
22739
22740 /* Zero extend possibly SImode EXP to Pmode register. */
22741 rtx
22742 ix86_zero_extend_to_Pmode (rtx exp)
22743 {
22744 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22745 }
22746
22747 /* Divide COUNTREG by SCALE. */
22748 static rtx
22749 scale_counter (rtx countreg, int scale)
22750 {
22751 rtx sc;
22752
22753 if (scale == 1)
22754 return countreg;
22755 if (CONST_INT_P (countreg))
22756 return GEN_INT (INTVAL (countreg) / scale);
22757 gcc_assert (REG_P (countreg));
22758
22759 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22760 GEN_INT (exact_log2 (scale)),
22761 NULL, 1, OPTAB_DIRECT);
22762 return sc;
22763 }
22764
22765 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22766 DImode for constant loop counts. */
22767
22768 static enum machine_mode
22769 counter_mode (rtx count_exp)
22770 {
22771 if (GET_MODE (count_exp) != VOIDmode)
22772 return GET_MODE (count_exp);
22773 if (!CONST_INT_P (count_exp))
22774 return Pmode;
22775 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22776 return DImode;
22777 return SImode;
22778 }
22779
22780 /* Copy the address to a Pmode register. This is used for x32 to
22781 truncate DImode TLS address to a SImode register. */
22782
22783 static rtx
22784 ix86_copy_addr_to_reg (rtx addr)
22785 {
22786 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22787 return copy_addr_to_reg (addr);
22788 else
22789 {
22790 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22791 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22792 }
22793 }
22794
22795 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22796 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22797 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22798 memory by VALUE (supposed to be in MODE).
22799
22800 The size is rounded down to whole number of chunk size moved at once.
22801 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22802
22803
22804 static void
22805 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22806 rtx destptr, rtx srcptr, rtx value,
22807 rtx count, enum machine_mode mode, int unroll,
22808 int expected_size, bool issetmem)
22809 {
22810 rtx_code_label *out_label, *top_label;
22811 rtx iter, tmp;
22812 enum machine_mode iter_mode = counter_mode (count);
22813 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22814 rtx piece_size = GEN_INT (piece_size_n);
22815 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22816 rtx size;
22817 int i;
22818
22819 top_label = gen_label_rtx ();
22820 out_label = gen_label_rtx ();
22821 iter = gen_reg_rtx (iter_mode);
22822
22823 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22824 NULL, 1, OPTAB_DIRECT);
22825 /* Those two should combine. */
22826 if (piece_size == const1_rtx)
22827 {
22828 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22829 true, out_label);
22830 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22831 }
22832 emit_move_insn (iter, const0_rtx);
22833
22834 emit_label (top_label);
22835
22836 tmp = convert_modes (Pmode, iter_mode, iter, true);
22837
22838 /* This assert could be relaxed - in this case we'll need to compute
22839 smallest power of two, containing in PIECE_SIZE_N and pass it to
22840 offset_address. */
22841 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22842 destmem = offset_address (destmem, tmp, piece_size_n);
22843 destmem = adjust_address (destmem, mode, 0);
22844
22845 if (!issetmem)
22846 {
22847 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22848 srcmem = adjust_address (srcmem, mode, 0);
22849
22850 /* When unrolling for chips that reorder memory reads and writes,
22851 we can save registers by using single temporary.
22852 Also using 4 temporaries is overkill in 32bit mode. */
22853 if (!TARGET_64BIT && 0)
22854 {
22855 for (i = 0; i < unroll; i++)
22856 {
22857 if (i)
22858 {
22859 destmem =
22860 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22861 srcmem =
22862 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22863 }
22864 emit_move_insn (destmem, srcmem);
22865 }
22866 }
22867 else
22868 {
22869 rtx tmpreg[4];
22870 gcc_assert (unroll <= 4);
22871 for (i = 0; i < unroll; i++)
22872 {
22873 tmpreg[i] = gen_reg_rtx (mode);
22874 if (i)
22875 {
22876 srcmem =
22877 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22878 }
22879 emit_move_insn (tmpreg[i], srcmem);
22880 }
22881 for (i = 0; i < unroll; i++)
22882 {
22883 if (i)
22884 {
22885 destmem =
22886 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22887 }
22888 emit_move_insn (destmem, tmpreg[i]);
22889 }
22890 }
22891 }
22892 else
22893 for (i = 0; i < unroll; i++)
22894 {
22895 if (i)
22896 destmem =
22897 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22898 emit_move_insn (destmem, value);
22899 }
22900
22901 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22902 true, OPTAB_LIB_WIDEN);
22903 if (tmp != iter)
22904 emit_move_insn (iter, tmp);
22905
22906 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22907 true, top_label);
22908 if (expected_size != -1)
22909 {
22910 expected_size /= GET_MODE_SIZE (mode) * unroll;
22911 if (expected_size == 0)
22912 predict_jump (0);
22913 else if (expected_size > REG_BR_PROB_BASE)
22914 predict_jump (REG_BR_PROB_BASE - 1);
22915 else
22916 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22917 }
22918 else
22919 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22920 iter = ix86_zero_extend_to_Pmode (iter);
22921 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22922 true, OPTAB_LIB_WIDEN);
22923 if (tmp != destptr)
22924 emit_move_insn (destptr, tmp);
22925 if (!issetmem)
22926 {
22927 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22928 true, OPTAB_LIB_WIDEN);
22929 if (tmp != srcptr)
22930 emit_move_insn (srcptr, tmp);
22931 }
22932 emit_label (out_label);
22933 }
22934
22935 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22936 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22937 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22938 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22939 ORIG_VALUE is the original value passed to memset to fill the memory with.
22940 Other arguments have same meaning as for previous function. */
22941
22942 static void
22943 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22944 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22945 rtx count,
22946 enum machine_mode mode, bool issetmem)
22947 {
22948 rtx destexp;
22949 rtx srcexp;
22950 rtx countreg;
22951 HOST_WIDE_INT rounded_count;
22952
22953 /* If possible, it is shorter to use rep movs.
22954 TODO: Maybe it is better to move this logic to decide_alg. */
22955 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22956 && (!issetmem || orig_value == const0_rtx))
22957 mode = SImode;
22958
22959 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22960 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22961
22962 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22963 GET_MODE_SIZE (mode)));
22964 if (mode != QImode)
22965 {
22966 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22967 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22968 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22969 }
22970 else
22971 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22972 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22973 {
22974 rounded_count = (INTVAL (count)
22975 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22976 destmem = shallow_copy_rtx (destmem);
22977 set_mem_size (destmem, rounded_count);
22978 }
22979 else if (MEM_SIZE_KNOWN_P (destmem))
22980 clear_mem_size (destmem);
22981
22982 if (issetmem)
22983 {
22984 value = force_reg (mode, gen_lowpart (mode, value));
22985 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22986 }
22987 else
22988 {
22989 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22990 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22991 if (mode != QImode)
22992 {
22993 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22994 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22995 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22996 }
22997 else
22998 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22999 if (CONST_INT_P (count))
23000 {
23001 rounded_count = (INTVAL (count)
23002 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23003 srcmem = shallow_copy_rtx (srcmem);
23004 set_mem_size (srcmem, rounded_count);
23005 }
23006 else
23007 {
23008 if (MEM_SIZE_KNOWN_P (srcmem))
23009 clear_mem_size (srcmem);
23010 }
23011 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23012 destexp, srcexp));
23013 }
23014 }
23015
23016 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23017 DESTMEM.
23018 SRC is passed by pointer to be updated on return.
23019 Return value is updated DST. */
23020 static rtx
23021 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23022 HOST_WIDE_INT size_to_move)
23023 {
23024 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23025 enum insn_code code;
23026 enum machine_mode move_mode;
23027 int piece_size, i;
23028
23029 /* Find the widest mode in which we could perform moves.
23030 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23031 it until move of such size is supported. */
23032 piece_size = 1 << floor_log2 (size_to_move);
23033 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23034 code = optab_handler (mov_optab, move_mode);
23035 while (code == CODE_FOR_nothing && piece_size > 1)
23036 {
23037 piece_size >>= 1;
23038 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23039 code = optab_handler (mov_optab, move_mode);
23040 }
23041
23042 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23043 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23044 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23045 {
23046 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23047 move_mode = mode_for_vector (word_mode, nunits);
23048 code = optab_handler (mov_optab, move_mode);
23049 if (code == CODE_FOR_nothing)
23050 {
23051 move_mode = word_mode;
23052 piece_size = GET_MODE_SIZE (move_mode);
23053 code = optab_handler (mov_optab, move_mode);
23054 }
23055 }
23056 gcc_assert (code != CODE_FOR_nothing);
23057
23058 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23059 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23060
23061 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23062 gcc_assert (size_to_move % piece_size == 0);
23063 adjust = GEN_INT (piece_size);
23064 for (i = 0; i < size_to_move; i += piece_size)
23065 {
23066 /* We move from memory to memory, so we'll need to do it via
23067 a temporary register. */
23068 tempreg = gen_reg_rtx (move_mode);
23069 emit_insn (GEN_FCN (code) (tempreg, src));
23070 emit_insn (GEN_FCN (code) (dst, tempreg));
23071
23072 emit_move_insn (destptr,
23073 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23074 emit_move_insn (srcptr,
23075 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23076
23077 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23078 piece_size);
23079 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23080 piece_size);
23081 }
23082
23083 /* Update DST and SRC rtx. */
23084 *srcmem = src;
23085 return dst;
23086 }
23087
23088 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23089 static void
23090 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23091 rtx destptr, rtx srcptr, rtx count, int max_size)
23092 {
23093 rtx src, dest;
23094 if (CONST_INT_P (count))
23095 {
23096 HOST_WIDE_INT countval = INTVAL (count);
23097 HOST_WIDE_INT epilogue_size = countval % max_size;
23098 int i;
23099
23100 /* For now MAX_SIZE should be a power of 2. This assert could be
23101 relaxed, but it'll require a bit more complicated epilogue
23102 expanding. */
23103 gcc_assert ((max_size & (max_size - 1)) == 0);
23104 for (i = max_size; i >= 1; i >>= 1)
23105 {
23106 if (epilogue_size & i)
23107 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23108 }
23109 return;
23110 }
23111 if (max_size > 8)
23112 {
23113 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23114 count, 1, OPTAB_DIRECT);
23115 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23116 count, QImode, 1, 4, false);
23117 return;
23118 }
23119
23120 /* When there are stringops, we can cheaply increase dest and src pointers.
23121 Otherwise we save code size by maintaining offset (zero is readily
23122 available from preceding rep operation) and using x86 addressing modes.
23123 */
23124 if (TARGET_SINGLE_STRINGOP)
23125 {
23126 if (max_size > 4)
23127 {
23128 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23129 src = change_address (srcmem, SImode, srcptr);
23130 dest = change_address (destmem, SImode, destptr);
23131 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23132 emit_label (label);
23133 LABEL_NUSES (label) = 1;
23134 }
23135 if (max_size > 2)
23136 {
23137 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23138 src = change_address (srcmem, HImode, srcptr);
23139 dest = change_address (destmem, HImode, destptr);
23140 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23141 emit_label (label);
23142 LABEL_NUSES (label) = 1;
23143 }
23144 if (max_size > 1)
23145 {
23146 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23147 src = change_address (srcmem, QImode, srcptr);
23148 dest = change_address (destmem, QImode, destptr);
23149 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23150 emit_label (label);
23151 LABEL_NUSES (label) = 1;
23152 }
23153 }
23154 else
23155 {
23156 rtx offset = force_reg (Pmode, const0_rtx);
23157 rtx tmp;
23158
23159 if (max_size > 4)
23160 {
23161 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23162 src = change_address (srcmem, SImode, srcptr);
23163 dest = change_address (destmem, SImode, destptr);
23164 emit_move_insn (dest, src);
23165 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23166 true, OPTAB_LIB_WIDEN);
23167 if (tmp != offset)
23168 emit_move_insn (offset, tmp);
23169 emit_label (label);
23170 LABEL_NUSES (label) = 1;
23171 }
23172 if (max_size > 2)
23173 {
23174 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23175 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23176 src = change_address (srcmem, HImode, tmp);
23177 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23178 dest = change_address (destmem, HImode, tmp);
23179 emit_move_insn (dest, src);
23180 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23181 true, OPTAB_LIB_WIDEN);
23182 if (tmp != offset)
23183 emit_move_insn (offset, tmp);
23184 emit_label (label);
23185 LABEL_NUSES (label) = 1;
23186 }
23187 if (max_size > 1)
23188 {
23189 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23190 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23191 src = change_address (srcmem, QImode, tmp);
23192 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23193 dest = change_address (destmem, QImode, tmp);
23194 emit_move_insn (dest, src);
23195 emit_label (label);
23196 LABEL_NUSES (label) = 1;
23197 }
23198 }
23199 }
23200
23201 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23202 with value PROMOTED_VAL.
23203 SRC is passed by pointer to be updated on return.
23204 Return value is updated DST. */
23205 static rtx
23206 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23207 HOST_WIDE_INT size_to_move)
23208 {
23209 rtx dst = destmem, adjust;
23210 enum insn_code code;
23211 enum machine_mode move_mode;
23212 int piece_size, i;
23213
23214 /* Find the widest mode in which we could perform moves.
23215 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23216 it until move of such size is supported. */
23217 move_mode = GET_MODE (promoted_val);
23218 if (move_mode == VOIDmode)
23219 move_mode = QImode;
23220 if (size_to_move < GET_MODE_SIZE (move_mode))
23221 {
23222 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23223 promoted_val = gen_lowpart (move_mode, promoted_val);
23224 }
23225 piece_size = GET_MODE_SIZE (move_mode);
23226 code = optab_handler (mov_optab, move_mode);
23227 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23228
23229 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23230
23231 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23232 gcc_assert (size_to_move % piece_size == 0);
23233 adjust = GEN_INT (piece_size);
23234 for (i = 0; i < size_to_move; i += piece_size)
23235 {
23236 if (piece_size <= GET_MODE_SIZE (word_mode))
23237 {
23238 emit_insn (gen_strset (destptr, dst, promoted_val));
23239 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23240 piece_size);
23241 continue;
23242 }
23243
23244 emit_insn (GEN_FCN (code) (dst, promoted_val));
23245
23246 emit_move_insn (destptr,
23247 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23248
23249 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23250 piece_size);
23251 }
23252
23253 /* Update DST rtx. */
23254 return dst;
23255 }
23256 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23257 static void
23258 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23259 rtx count, int max_size)
23260 {
23261 count =
23262 expand_simple_binop (counter_mode (count), AND, count,
23263 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23264 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23265 gen_lowpart (QImode, value), count, QImode,
23266 1, max_size / 2, true);
23267 }
23268
23269 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23270 static void
23271 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23272 rtx count, int max_size)
23273 {
23274 rtx dest;
23275
23276 if (CONST_INT_P (count))
23277 {
23278 HOST_WIDE_INT countval = INTVAL (count);
23279 HOST_WIDE_INT epilogue_size = countval % max_size;
23280 int i;
23281
23282 /* For now MAX_SIZE should be a power of 2. This assert could be
23283 relaxed, but it'll require a bit more complicated epilogue
23284 expanding. */
23285 gcc_assert ((max_size & (max_size - 1)) == 0);
23286 for (i = max_size; i >= 1; i >>= 1)
23287 {
23288 if (epilogue_size & i)
23289 {
23290 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23291 destmem = emit_memset (destmem, destptr, vec_value, i);
23292 else
23293 destmem = emit_memset (destmem, destptr, value, i);
23294 }
23295 }
23296 return;
23297 }
23298 if (max_size > 32)
23299 {
23300 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23301 return;
23302 }
23303 if (max_size > 16)
23304 {
23305 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23306 if (TARGET_64BIT)
23307 {
23308 dest = change_address (destmem, DImode, destptr);
23309 emit_insn (gen_strset (destptr, dest, value));
23310 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23311 emit_insn (gen_strset (destptr, dest, value));
23312 }
23313 else
23314 {
23315 dest = change_address (destmem, SImode, destptr);
23316 emit_insn (gen_strset (destptr, dest, value));
23317 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23318 emit_insn (gen_strset (destptr, dest, value));
23319 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23320 emit_insn (gen_strset (destptr, dest, value));
23321 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23322 emit_insn (gen_strset (destptr, dest, value));
23323 }
23324 emit_label (label);
23325 LABEL_NUSES (label) = 1;
23326 }
23327 if (max_size > 8)
23328 {
23329 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23330 if (TARGET_64BIT)
23331 {
23332 dest = change_address (destmem, DImode, destptr);
23333 emit_insn (gen_strset (destptr, dest, value));
23334 }
23335 else
23336 {
23337 dest = change_address (destmem, SImode, destptr);
23338 emit_insn (gen_strset (destptr, dest, value));
23339 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23340 emit_insn (gen_strset (destptr, dest, value));
23341 }
23342 emit_label (label);
23343 LABEL_NUSES (label) = 1;
23344 }
23345 if (max_size > 4)
23346 {
23347 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23348 dest = change_address (destmem, SImode, destptr);
23349 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23350 emit_label (label);
23351 LABEL_NUSES (label) = 1;
23352 }
23353 if (max_size > 2)
23354 {
23355 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23356 dest = change_address (destmem, HImode, destptr);
23357 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23358 emit_label (label);
23359 LABEL_NUSES (label) = 1;
23360 }
23361 if (max_size > 1)
23362 {
23363 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23364 dest = change_address (destmem, QImode, destptr);
23365 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23366 emit_label (label);
23367 LABEL_NUSES (label) = 1;
23368 }
23369 }
23370
23371 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23372 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23373 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23374 ignored.
23375 Return value is updated DESTMEM. */
23376 static rtx
23377 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23378 rtx destptr, rtx srcptr, rtx value,
23379 rtx vec_value, rtx count, int align,
23380 int desired_alignment, bool issetmem)
23381 {
23382 int i;
23383 for (i = 1; i < desired_alignment; i <<= 1)
23384 {
23385 if (align <= i)
23386 {
23387 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23388 if (issetmem)
23389 {
23390 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23391 destmem = emit_memset (destmem, destptr, vec_value, i);
23392 else
23393 destmem = emit_memset (destmem, destptr, value, i);
23394 }
23395 else
23396 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23397 ix86_adjust_counter (count, i);
23398 emit_label (label);
23399 LABEL_NUSES (label) = 1;
23400 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23401 }
23402 }
23403 return destmem;
23404 }
23405
23406 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23407 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23408 and jump to DONE_LABEL. */
23409 static void
23410 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23411 rtx destptr, rtx srcptr,
23412 rtx value, rtx vec_value,
23413 rtx count, int size,
23414 rtx done_label, bool issetmem)
23415 {
23416 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23417 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23418 rtx modesize;
23419 int n;
23420
23421 /* If we do not have vector value to copy, we must reduce size. */
23422 if (issetmem)
23423 {
23424 if (!vec_value)
23425 {
23426 if (GET_MODE (value) == VOIDmode && size > 8)
23427 mode = Pmode;
23428 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23429 mode = GET_MODE (value);
23430 }
23431 else
23432 mode = GET_MODE (vec_value), value = vec_value;
23433 }
23434 else
23435 {
23436 /* Choose appropriate vector mode. */
23437 if (size >= 32)
23438 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23439 else if (size >= 16)
23440 mode = TARGET_SSE ? V16QImode : DImode;
23441 srcmem = change_address (srcmem, mode, srcptr);
23442 }
23443 destmem = change_address (destmem, mode, destptr);
23444 modesize = GEN_INT (GET_MODE_SIZE (mode));
23445 gcc_assert (GET_MODE_SIZE (mode) <= size);
23446 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23447 {
23448 if (issetmem)
23449 emit_move_insn (destmem, gen_lowpart (mode, value));
23450 else
23451 {
23452 emit_move_insn (destmem, srcmem);
23453 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23454 }
23455 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23456 }
23457
23458 destmem = offset_address (destmem, count, 1);
23459 destmem = offset_address (destmem, GEN_INT (-2 * size),
23460 GET_MODE_SIZE (mode));
23461 if (!issetmem)
23462 {
23463 srcmem = offset_address (srcmem, count, 1);
23464 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23465 GET_MODE_SIZE (mode));
23466 }
23467 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23468 {
23469 if (issetmem)
23470 emit_move_insn (destmem, gen_lowpart (mode, value));
23471 else
23472 {
23473 emit_move_insn (destmem, srcmem);
23474 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23475 }
23476 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23477 }
23478 emit_jump_insn (gen_jump (done_label));
23479 emit_barrier ();
23480
23481 emit_label (label);
23482 LABEL_NUSES (label) = 1;
23483 }
23484
23485 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23486 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23487 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23488 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23489 DONE_LABEL is a label after the whole copying sequence. The label is created
23490 on demand if *DONE_LABEL is NULL.
23491 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23492 bounds after the initial copies.
23493
23494 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23495 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23496 we will dispatch to a library call for large blocks.
23497
23498 In pseudocode we do:
23499
23500 if (COUNT < SIZE)
23501 {
23502 Assume that SIZE is 4. Bigger sizes are handled analogously
23503 if (COUNT & 4)
23504 {
23505 copy 4 bytes from SRCPTR to DESTPTR
23506 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23507 goto done_label
23508 }
23509 if (!COUNT)
23510 goto done_label;
23511 copy 1 byte from SRCPTR to DESTPTR
23512 if (COUNT & 2)
23513 {
23514 copy 2 bytes from SRCPTR to DESTPTR
23515 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23516 }
23517 }
23518 else
23519 {
23520 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23521 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23522
23523 OLD_DESPTR = DESTPTR;
23524 Align DESTPTR up to DESIRED_ALIGN
23525 SRCPTR += DESTPTR - OLD_DESTPTR
23526 COUNT -= DEST_PTR - OLD_DESTPTR
23527 if (DYNAMIC_CHECK)
23528 Round COUNT down to multiple of SIZE
23529 << optional caller supplied zero size guard is here >>
23530 << optional caller suppplied dynamic check is here >>
23531 << caller supplied main copy loop is here >>
23532 }
23533 done_label:
23534 */
23535 static void
23536 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23537 rtx *destptr, rtx *srcptr,
23538 enum machine_mode mode,
23539 rtx value, rtx vec_value,
23540 rtx *count,
23541 rtx_code_label **done_label,
23542 int size,
23543 int desired_align,
23544 int align,
23545 unsigned HOST_WIDE_INT *min_size,
23546 bool dynamic_check,
23547 bool issetmem)
23548 {
23549 rtx_code_label *loop_label = NULL, *label;
23550 int n;
23551 rtx modesize;
23552 int prolog_size = 0;
23553 rtx mode_value;
23554
23555 /* Chose proper value to copy. */
23556 if (issetmem && VECTOR_MODE_P (mode))
23557 mode_value = vec_value;
23558 else
23559 mode_value = value;
23560 gcc_assert (GET_MODE_SIZE (mode) <= size);
23561
23562 /* See if block is big or small, handle small blocks. */
23563 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23564 {
23565 int size2 = size;
23566 loop_label = gen_label_rtx ();
23567
23568 if (!*done_label)
23569 *done_label = gen_label_rtx ();
23570
23571 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23572 1, loop_label);
23573 size2 >>= 1;
23574
23575 /* Handle sizes > 3. */
23576 for (;size2 > 2; size2 >>= 1)
23577 expand_small_movmem_or_setmem (destmem, srcmem,
23578 *destptr, *srcptr,
23579 value, vec_value,
23580 *count,
23581 size2, *done_label, issetmem);
23582 /* Nothing to copy? Jump to DONE_LABEL if so */
23583 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23584 1, *done_label);
23585
23586 /* Do a byte copy. */
23587 destmem = change_address (destmem, QImode, *destptr);
23588 if (issetmem)
23589 emit_move_insn (destmem, gen_lowpart (QImode, value));
23590 else
23591 {
23592 srcmem = change_address (srcmem, QImode, *srcptr);
23593 emit_move_insn (destmem, srcmem);
23594 }
23595
23596 /* Handle sizes 2 and 3. */
23597 label = ix86_expand_aligntest (*count, 2, false);
23598 destmem = change_address (destmem, HImode, *destptr);
23599 destmem = offset_address (destmem, *count, 1);
23600 destmem = offset_address (destmem, GEN_INT (-2), 2);
23601 if (issetmem)
23602 emit_move_insn (destmem, gen_lowpart (HImode, value));
23603 else
23604 {
23605 srcmem = change_address (srcmem, HImode, *srcptr);
23606 srcmem = offset_address (srcmem, *count, 1);
23607 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23608 emit_move_insn (destmem, srcmem);
23609 }
23610
23611 emit_label (label);
23612 LABEL_NUSES (label) = 1;
23613 emit_jump_insn (gen_jump (*done_label));
23614 emit_barrier ();
23615 }
23616 else
23617 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23618 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23619
23620 /* Start memcpy for COUNT >= SIZE. */
23621 if (loop_label)
23622 {
23623 emit_label (loop_label);
23624 LABEL_NUSES (loop_label) = 1;
23625 }
23626
23627 /* Copy first desired_align bytes. */
23628 if (!issetmem)
23629 srcmem = change_address (srcmem, mode, *srcptr);
23630 destmem = change_address (destmem, mode, *destptr);
23631 modesize = GEN_INT (GET_MODE_SIZE (mode));
23632 for (n = 0; prolog_size < desired_align - align; n++)
23633 {
23634 if (issetmem)
23635 emit_move_insn (destmem, mode_value);
23636 else
23637 {
23638 emit_move_insn (destmem, srcmem);
23639 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23640 }
23641 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23642 prolog_size += GET_MODE_SIZE (mode);
23643 }
23644
23645
23646 /* Copy last SIZE bytes. */
23647 destmem = offset_address (destmem, *count, 1);
23648 destmem = offset_address (destmem,
23649 GEN_INT (-size - prolog_size),
23650 1);
23651 if (issetmem)
23652 emit_move_insn (destmem, mode_value);
23653 else
23654 {
23655 srcmem = offset_address (srcmem, *count, 1);
23656 srcmem = offset_address (srcmem,
23657 GEN_INT (-size - prolog_size),
23658 1);
23659 emit_move_insn (destmem, srcmem);
23660 }
23661 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23662 {
23663 destmem = offset_address (destmem, modesize, 1);
23664 if (issetmem)
23665 emit_move_insn (destmem, mode_value);
23666 else
23667 {
23668 srcmem = offset_address (srcmem, modesize, 1);
23669 emit_move_insn (destmem, srcmem);
23670 }
23671 }
23672
23673 /* Align destination. */
23674 if (desired_align > 1 && desired_align > align)
23675 {
23676 rtx saveddest = *destptr;
23677
23678 gcc_assert (desired_align <= size);
23679 /* Align destptr up, place it to new register. */
23680 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23681 GEN_INT (prolog_size),
23682 NULL_RTX, 1, OPTAB_DIRECT);
23683 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23684 GEN_INT (-desired_align),
23685 *destptr, 1, OPTAB_DIRECT);
23686 /* See how many bytes we skipped. */
23687 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23688 *destptr,
23689 saveddest, 1, OPTAB_DIRECT);
23690 /* Adjust srcptr and count. */
23691 if (!issetmem)
23692 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23693 *srcptr, 1, OPTAB_DIRECT);
23694 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23695 saveddest, *count, 1, OPTAB_DIRECT);
23696 /* We copied at most size + prolog_size. */
23697 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23698 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23699 else
23700 *min_size = 0;
23701
23702 /* Our loops always round down the bock size, but for dispatch to library
23703 we need precise value. */
23704 if (dynamic_check)
23705 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23706 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23707 }
23708 else
23709 {
23710 gcc_assert (prolog_size == 0);
23711 /* Decrease count, so we won't end up copying last word twice. */
23712 if (!CONST_INT_P (*count))
23713 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23714 constm1_rtx, *count, 1, OPTAB_DIRECT);
23715 else
23716 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23717 if (*min_size)
23718 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23719 }
23720 }
23721
23722
23723 /* This function is like the previous one, except here we know how many bytes
23724 need to be copied. That allows us to update alignment not only of DST, which
23725 is returned, but also of SRC, which is passed as a pointer for that
23726 reason. */
23727 static rtx
23728 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23729 rtx srcreg, rtx value, rtx vec_value,
23730 int desired_align, int align_bytes,
23731 bool issetmem)
23732 {
23733 rtx src = NULL;
23734 rtx orig_dst = dst;
23735 rtx orig_src = NULL;
23736 int piece_size = 1;
23737 int copied_bytes = 0;
23738
23739 if (!issetmem)
23740 {
23741 gcc_assert (srcp != NULL);
23742 src = *srcp;
23743 orig_src = src;
23744 }
23745
23746 for (piece_size = 1;
23747 piece_size <= desired_align && copied_bytes < align_bytes;
23748 piece_size <<= 1)
23749 {
23750 if (align_bytes & piece_size)
23751 {
23752 if (issetmem)
23753 {
23754 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23755 dst = emit_memset (dst, destreg, vec_value, piece_size);
23756 else
23757 dst = emit_memset (dst, destreg, value, piece_size);
23758 }
23759 else
23760 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23761 copied_bytes += piece_size;
23762 }
23763 }
23764 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23765 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23766 if (MEM_SIZE_KNOWN_P (orig_dst))
23767 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23768
23769 if (!issetmem)
23770 {
23771 int src_align_bytes = get_mem_align_offset (src, desired_align
23772 * BITS_PER_UNIT);
23773 if (src_align_bytes >= 0)
23774 src_align_bytes = desired_align - src_align_bytes;
23775 if (src_align_bytes >= 0)
23776 {
23777 unsigned int src_align;
23778 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23779 {
23780 if ((src_align_bytes & (src_align - 1))
23781 == (align_bytes & (src_align - 1)))
23782 break;
23783 }
23784 if (src_align > (unsigned int) desired_align)
23785 src_align = desired_align;
23786 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23787 set_mem_align (src, src_align * BITS_PER_UNIT);
23788 }
23789 if (MEM_SIZE_KNOWN_P (orig_src))
23790 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23791 *srcp = src;
23792 }
23793
23794 return dst;
23795 }
23796
23797 /* Return true if ALG can be used in current context.
23798 Assume we expand memset if MEMSET is true. */
23799 static bool
23800 alg_usable_p (enum stringop_alg alg, bool memset)
23801 {
23802 if (alg == no_stringop)
23803 return false;
23804 if (alg == vector_loop)
23805 return TARGET_SSE || TARGET_AVX;
23806 /* Algorithms using the rep prefix want at least edi and ecx;
23807 additionally, memset wants eax and memcpy wants esi. Don't
23808 consider such algorithms if the user has appropriated those
23809 registers for their own purposes. */
23810 if (alg == rep_prefix_1_byte
23811 || alg == rep_prefix_4_byte
23812 || alg == rep_prefix_8_byte)
23813 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23814 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23815 return true;
23816 }
23817
23818 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23819 static enum stringop_alg
23820 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23821 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23822 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23823 {
23824 const struct stringop_algs * algs;
23825 bool optimize_for_speed;
23826 int max = 0;
23827 const struct processor_costs *cost;
23828 int i;
23829 bool any_alg_usable_p = false;
23830
23831 *noalign = false;
23832 *dynamic_check = -1;
23833
23834 /* Even if the string operation call is cold, we still might spend a lot
23835 of time processing large blocks. */
23836 if (optimize_function_for_size_p (cfun)
23837 || (optimize_insn_for_size_p ()
23838 && (max_size < 256
23839 || (expected_size != -1 && expected_size < 256))))
23840 optimize_for_speed = false;
23841 else
23842 optimize_for_speed = true;
23843
23844 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23845 if (memset)
23846 algs = &cost->memset[TARGET_64BIT != 0];
23847 else
23848 algs = &cost->memcpy[TARGET_64BIT != 0];
23849
23850 /* See maximal size for user defined algorithm. */
23851 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23852 {
23853 enum stringop_alg candidate = algs->size[i].alg;
23854 bool usable = alg_usable_p (candidate, memset);
23855 any_alg_usable_p |= usable;
23856
23857 if (candidate != libcall && candidate && usable)
23858 max = algs->size[i].max;
23859 }
23860
23861 /* If expected size is not known but max size is small enough
23862 so inline version is a win, set expected size into
23863 the range. */
23864 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23865 && expected_size == -1)
23866 expected_size = min_size / 2 + max_size / 2;
23867
23868 /* If user specified the algorithm, honnor it if possible. */
23869 if (ix86_stringop_alg != no_stringop
23870 && alg_usable_p (ix86_stringop_alg, memset))
23871 return ix86_stringop_alg;
23872 /* rep; movq or rep; movl is the smallest variant. */
23873 else if (!optimize_for_speed)
23874 {
23875 *noalign = true;
23876 if (!count || (count & 3) || (memset && !zero_memset))
23877 return alg_usable_p (rep_prefix_1_byte, memset)
23878 ? rep_prefix_1_byte : loop_1_byte;
23879 else
23880 return alg_usable_p (rep_prefix_4_byte, memset)
23881 ? rep_prefix_4_byte : loop;
23882 }
23883 /* Very tiny blocks are best handled via the loop, REP is expensive to
23884 setup. */
23885 else if (expected_size != -1 && expected_size < 4)
23886 return loop_1_byte;
23887 else if (expected_size != -1)
23888 {
23889 enum stringop_alg alg = libcall;
23890 bool alg_noalign = false;
23891 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23892 {
23893 /* We get here if the algorithms that were not libcall-based
23894 were rep-prefix based and we are unable to use rep prefixes
23895 based on global register usage. Break out of the loop and
23896 use the heuristic below. */
23897 if (algs->size[i].max == 0)
23898 break;
23899 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23900 {
23901 enum stringop_alg candidate = algs->size[i].alg;
23902
23903 if (candidate != libcall && alg_usable_p (candidate, memset))
23904 {
23905 alg = candidate;
23906 alg_noalign = algs->size[i].noalign;
23907 }
23908 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23909 last non-libcall inline algorithm. */
23910 if (TARGET_INLINE_ALL_STRINGOPS)
23911 {
23912 /* When the current size is best to be copied by a libcall,
23913 but we are still forced to inline, run the heuristic below
23914 that will pick code for medium sized blocks. */
23915 if (alg != libcall)
23916 {
23917 *noalign = alg_noalign;
23918 return alg;
23919 }
23920 break;
23921 }
23922 else if (alg_usable_p (candidate, memset))
23923 {
23924 *noalign = algs->size[i].noalign;
23925 return candidate;
23926 }
23927 }
23928 }
23929 }
23930 /* When asked to inline the call anyway, try to pick meaningful choice.
23931 We look for maximal size of block that is faster to copy by hand and
23932 take blocks of at most of that size guessing that average size will
23933 be roughly half of the block.
23934
23935 If this turns out to be bad, we might simply specify the preferred
23936 choice in ix86_costs. */
23937 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23938 && (algs->unknown_size == libcall
23939 || !alg_usable_p (algs->unknown_size, memset)))
23940 {
23941 enum stringop_alg alg;
23942
23943 /* If there aren't any usable algorithms, then recursing on
23944 smaller sizes isn't going to find anything. Just return the
23945 simple byte-at-a-time copy loop. */
23946 if (!any_alg_usable_p)
23947 {
23948 /* Pick something reasonable. */
23949 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23950 *dynamic_check = 128;
23951 return loop_1_byte;
23952 }
23953 if (max <= 0)
23954 max = 4096;
23955 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23956 zero_memset, dynamic_check, noalign);
23957 gcc_assert (*dynamic_check == -1);
23958 gcc_assert (alg != libcall);
23959 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23960 *dynamic_check = max;
23961 return alg;
23962 }
23963 return (alg_usable_p (algs->unknown_size, memset)
23964 ? algs->unknown_size : libcall);
23965 }
23966
23967 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23968 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23969 static int
23970 decide_alignment (int align,
23971 enum stringop_alg alg,
23972 int expected_size,
23973 enum machine_mode move_mode)
23974 {
23975 int desired_align = 0;
23976
23977 gcc_assert (alg != no_stringop);
23978
23979 if (alg == libcall)
23980 return 0;
23981 if (move_mode == VOIDmode)
23982 return 0;
23983
23984 desired_align = GET_MODE_SIZE (move_mode);
23985 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23986 copying whole cacheline at once. */
23987 if (TARGET_PENTIUMPRO
23988 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23989 desired_align = 8;
23990
23991 if (optimize_size)
23992 desired_align = 1;
23993 if (desired_align < align)
23994 desired_align = align;
23995 if (expected_size != -1 && expected_size < 4)
23996 desired_align = align;
23997
23998 return desired_align;
23999 }
24000
24001
24002 /* Helper function for memcpy. For QImode value 0xXY produce
24003 0xXYXYXYXY of wide specified by MODE. This is essentially
24004 a * 0x10101010, but we can do slightly better than
24005 synth_mult by unwinding the sequence by hand on CPUs with
24006 slow multiply. */
24007 static rtx
24008 promote_duplicated_reg (enum machine_mode mode, rtx val)
24009 {
24010 enum machine_mode valmode = GET_MODE (val);
24011 rtx tmp;
24012 int nops = mode == DImode ? 3 : 2;
24013
24014 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24015 if (val == const0_rtx)
24016 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24017 if (CONST_INT_P (val))
24018 {
24019 HOST_WIDE_INT v = INTVAL (val) & 255;
24020
24021 v |= v << 8;
24022 v |= v << 16;
24023 if (mode == DImode)
24024 v |= (v << 16) << 16;
24025 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24026 }
24027
24028 if (valmode == VOIDmode)
24029 valmode = QImode;
24030 if (valmode != QImode)
24031 val = gen_lowpart (QImode, val);
24032 if (mode == QImode)
24033 return val;
24034 if (!TARGET_PARTIAL_REG_STALL)
24035 nops--;
24036 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24037 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24038 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24039 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24040 {
24041 rtx reg = convert_modes (mode, QImode, val, true);
24042 tmp = promote_duplicated_reg (mode, const1_rtx);
24043 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24044 OPTAB_DIRECT);
24045 }
24046 else
24047 {
24048 rtx reg = convert_modes (mode, QImode, val, true);
24049
24050 if (!TARGET_PARTIAL_REG_STALL)
24051 if (mode == SImode)
24052 emit_insn (gen_movsi_insv_1 (reg, reg));
24053 else
24054 emit_insn (gen_movdi_insv_1 (reg, reg));
24055 else
24056 {
24057 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24058 NULL, 1, OPTAB_DIRECT);
24059 reg =
24060 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24061 }
24062 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24063 NULL, 1, OPTAB_DIRECT);
24064 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24065 if (mode == SImode)
24066 return reg;
24067 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24068 NULL, 1, OPTAB_DIRECT);
24069 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24070 return reg;
24071 }
24072 }
24073
24074 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24075 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24076 alignment from ALIGN to DESIRED_ALIGN. */
24077 static rtx
24078 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24079 int align)
24080 {
24081 rtx promoted_val;
24082
24083 if (TARGET_64BIT
24084 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24085 promoted_val = promote_duplicated_reg (DImode, val);
24086 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24087 promoted_val = promote_duplicated_reg (SImode, val);
24088 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24089 promoted_val = promote_duplicated_reg (HImode, val);
24090 else
24091 promoted_val = val;
24092
24093 return promoted_val;
24094 }
24095
24096 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24097 operations when profitable. The code depends upon architecture, block size
24098 and alignment, but always has one of the following overall structures:
24099
24100 Aligned move sequence:
24101
24102 1) Prologue guard: Conditional that jumps up to epilogues for small
24103 blocks that can be handled by epilogue alone. This is faster
24104 but also needed for correctness, since prologue assume the block
24105 is larger than the desired alignment.
24106
24107 Optional dynamic check for size and libcall for large
24108 blocks is emitted here too, with -minline-stringops-dynamically.
24109
24110 2) Prologue: copy first few bytes in order to get destination
24111 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24112 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24113 copied. We emit either a jump tree on power of two sized
24114 blocks, or a byte loop.
24115
24116 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24117 with specified algorithm.
24118
24119 4) Epilogue: code copying tail of the block that is too small to be
24120 handled by main body (or up to size guarded by prologue guard).
24121
24122 Misaligned move sequence
24123
24124 1) missaligned move prologue/epilogue containing:
24125 a) Prologue handling small memory blocks and jumping to done_label
24126 (skipped if blocks are known to be large enough)
24127 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24128 needed by single possibly misaligned move
24129 (skipped if alignment is not needed)
24130 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24131
24132 2) Zero size guard dispatching to done_label, if needed
24133
24134 3) dispatch to library call, if needed,
24135
24136 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24137 with specified algorithm. */
24138 bool
24139 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24140 rtx align_exp, rtx expected_align_exp,
24141 rtx expected_size_exp, rtx min_size_exp,
24142 rtx max_size_exp, rtx probable_max_size_exp,
24143 bool issetmem)
24144 {
24145 rtx destreg;
24146 rtx srcreg = NULL;
24147 rtx_code_label *label = NULL;
24148 rtx tmp;
24149 rtx_code_label *jump_around_label = NULL;
24150 HOST_WIDE_INT align = 1;
24151 unsigned HOST_WIDE_INT count = 0;
24152 HOST_WIDE_INT expected_size = -1;
24153 int size_needed = 0, epilogue_size_needed;
24154 int desired_align = 0, align_bytes = 0;
24155 enum stringop_alg alg;
24156 rtx promoted_val = NULL;
24157 rtx vec_promoted_val = NULL;
24158 bool force_loopy_epilogue = false;
24159 int dynamic_check;
24160 bool need_zero_guard = false;
24161 bool noalign;
24162 enum machine_mode move_mode = VOIDmode;
24163 int unroll_factor = 1;
24164 /* TODO: Once value ranges are available, fill in proper data. */
24165 unsigned HOST_WIDE_INT min_size = 0;
24166 unsigned HOST_WIDE_INT max_size = -1;
24167 unsigned HOST_WIDE_INT probable_max_size = -1;
24168 bool misaligned_prologue_used = false;
24169
24170 if (CONST_INT_P (align_exp))
24171 align = INTVAL (align_exp);
24172 /* i386 can do misaligned access on reasonably increased cost. */
24173 if (CONST_INT_P (expected_align_exp)
24174 && INTVAL (expected_align_exp) > align)
24175 align = INTVAL (expected_align_exp);
24176 /* ALIGN is the minimum of destination and source alignment, but we care here
24177 just about destination alignment. */
24178 else if (!issetmem
24179 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24180 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24181
24182 if (CONST_INT_P (count_exp))
24183 {
24184 min_size = max_size = probable_max_size = count = expected_size
24185 = INTVAL (count_exp);
24186 /* When COUNT is 0, there is nothing to do. */
24187 if (!count)
24188 return true;
24189 }
24190 else
24191 {
24192 if (min_size_exp)
24193 min_size = INTVAL (min_size_exp);
24194 if (max_size_exp)
24195 max_size = INTVAL (max_size_exp);
24196 if (probable_max_size_exp)
24197 probable_max_size = INTVAL (probable_max_size_exp);
24198 if (CONST_INT_P (expected_size_exp))
24199 expected_size = INTVAL (expected_size_exp);
24200 }
24201
24202 /* Make sure we don't need to care about overflow later on. */
24203 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24204 return false;
24205
24206 /* Step 0: Decide on preferred algorithm, desired alignment and
24207 size of chunks to be copied by main loop. */
24208 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24209 issetmem,
24210 issetmem && val_exp == const0_rtx,
24211 &dynamic_check, &noalign);
24212 if (alg == libcall)
24213 return false;
24214 gcc_assert (alg != no_stringop);
24215
24216 /* For now vector-version of memset is generated only for memory zeroing, as
24217 creating of promoted vector value is very cheap in this case. */
24218 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24219 alg = unrolled_loop;
24220
24221 if (!count)
24222 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24223 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24224 if (!issetmem)
24225 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24226
24227 unroll_factor = 1;
24228 move_mode = word_mode;
24229 switch (alg)
24230 {
24231 case libcall:
24232 case no_stringop:
24233 case last_alg:
24234 gcc_unreachable ();
24235 case loop_1_byte:
24236 need_zero_guard = true;
24237 move_mode = QImode;
24238 break;
24239 case loop:
24240 need_zero_guard = true;
24241 break;
24242 case unrolled_loop:
24243 need_zero_guard = true;
24244 unroll_factor = (TARGET_64BIT ? 4 : 2);
24245 break;
24246 case vector_loop:
24247 need_zero_guard = true;
24248 unroll_factor = 4;
24249 /* Find the widest supported mode. */
24250 move_mode = word_mode;
24251 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24252 != CODE_FOR_nothing)
24253 move_mode = GET_MODE_WIDER_MODE (move_mode);
24254
24255 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24256 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24257 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24258 {
24259 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24260 move_mode = mode_for_vector (word_mode, nunits);
24261 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24262 move_mode = word_mode;
24263 }
24264 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24265 break;
24266 case rep_prefix_8_byte:
24267 move_mode = DImode;
24268 break;
24269 case rep_prefix_4_byte:
24270 move_mode = SImode;
24271 break;
24272 case rep_prefix_1_byte:
24273 move_mode = QImode;
24274 break;
24275 }
24276 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24277 epilogue_size_needed = size_needed;
24278
24279 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24280 if (!TARGET_ALIGN_STRINGOPS || noalign)
24281 align = desired_align;
24282
24283 /* Step 1: Prologue guard. */
24284
24285 /* Alignment code needs count to be in register. */
24286 if (CONST_INT_P (count_exp) && desired_align > align)
24287 {
24288 if (INTVAL (count_exp) > desired_align
24289 && INTVAL (count_exp) > size_needed)
24290 {
24291 align_bytes
24292 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24293 if (align_bytes <= 0)
24294 align_bytes = 0;
24295 else
24296 align_bytes = desired_align - align_bytes;
24297 }
24298 if (align_bytes == 0)
24299 count_exp = force_reg (counter_mode (count_exp), count_exp);
24300 }
24301 gcc_assert (desired_align >= 1 && align >= 1);
24302
24303 /* Misaligned move sequences handle both prologue and epilogue at once.
24304 Default code generation results in a smaller code for large alignments
24305 and also avoids redundant job when sizes are known precisely. */
24306 misaligned_prologue_used
24307 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24308 && MAX (desired_align, epilogue_size_needed) <= 32
24309 && desired_align <= epilogue_size_needed
24310 && ((desired_align > align && !align_bytes)
24311 || (!count && epilogue_size_needed > 1)));
24312
24313 /* Do the cheap promotion to allow better CSE across the
24314 main loop and epilogue (ie one load of the big constant in the
24315 front of all code.
24316 For now the misaligned move sequences do not have fast path
24317 without broadcasting. */
24318 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24319 {
24320 if (alg == vector_loop)
24321 {
24322 gcc_assert (val_exp == const0_rtx);
24323 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24324 promoted_val = promote_duplicated_reg_to_size (val_exp,
24325 GET_MODE_SIZE (word_mode),
24326 desired_align, align);
24327 }
24328 else
24329 {
24330 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24331 desired_align, align);
24332 }
24333 }
24334 /* Misaligned move sequences handles both prologues and epilogues at once.
24335 Default code generation results in smaller code for large alignments and
24336 also avoids redundant job when sizes are known precisely. */
24337 if (misaligned_prologue_used)
24338 {
24339 /* Misaligned move prologue handled small blocks by itself. */
24340 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24341 (dst, src, &destreg, &srcreg,
24342 move_mode, promoted_val, vec_promoted_val,
24343 &count_exp,
24344 &jump_around_label,
24345 desired_align < align
24346 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24347 desired_align, align, &min_size, dynamic_check, issetmem);
24348 if (!issetmem)
24349 src = change_address (src, BLKmode, srcreg);
24350 dst = change_address (dst, BLKmode, destreg);
24351 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24352 epilogue_size_needed = 0;
24353 if (need_zero_guard && !min_size)
24354 {
24355 /* It is possible that we copied enough so the main loop will not
24356 execute. */
24357 gcc_assert (size_needed > 1);
24358 if (jump_around_label == NULL_RTX)
24359 jump_around_label = gen_label_rtx ();
24360 emit_cmp_and_jump_insns (count_exp,
24361 GEN_INT (size_needed),
24362 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24363 if (expected_size == -1
24364 || expected_size < (desired_align - align) / 2 + size_needed)
24365 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24366 else
24367 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24368 }
24369 }
24370 /* Ensure that alignment prologue won't copy past end of block. */
24371 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24372 {
24373 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24374 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24375 Make sure it is power of 2. */
24376 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24377
24378 /* To improve performance of small blocks, we jump around the VAL
24379 promoting mode. This mean that if the promoted VAL is not constant,
24380 we might not use it in the epilogue and have to use byte
24381 loop variant. */
24382 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24383 force_loopy_epilogue = true;
24384 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24385 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24386 {
24387 /* If main algorithm works on QImode, no epilogue is needed.
24388 For small sizes just don't align anything. */
24389 if (size_needed == 1)
24390 desired_align = align;
24391 else
24392 goto epilogue;
24393 }
24394 else if (!count
24395 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24396 {
24397 label = gen_label_rtx ();
24398 emit_cmp_and_jump_insns (count_exp,
24399 GEN_INT (epilogue_size_needed),
24400 LTU, 0, counter_mode (count_exp), 1, label);
24401 if (expected_size == -1 || expected_size < epilogue_size_needed)
24402 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24403 else
24404 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24405 }
24406 }
24407
24408 /* Emit code to decide on runtime whether library call or inline should be
24409 used. */
24410 if (dynamic_check != -1)
24411 {
24412 if (!issetmem && CONST_INT_P (count_exp))
24413 {
24414 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24415 {
24416 emit_block_move_via_libcall (dst, src, count_exp, false);
24417 count_exp = const0_rtx;
24418 goto epilogue;
24419 }
24420 }
24421 else
24422 {
24423 rtx_code_label *hot_label = gen_label_rtx ();
24424 if (jump_around_label == NULL_RTX)
24425 jump_around_label = gen_label_rtx ();
24426 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24427 LEU, 0, counter_mode (count_exp),
24428 1, hot_label);
24429 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24430 if (issetmem)
24431 set_storage_via_libcall (dst, count_exp, val_exp, false);
24432 else
24433 emit_block_move_via_libcall (dst, src, count_exp, false);
24434 emit_jump (jump_around_label);
24435 emit_label (hot_label);
24436 }
24437 }
24438
24439 /* Step 2: Alignment prologue. */
24440 /* Do the expensive promotion once we branched off the small blocks. */
24441 if (issetmem && !promoted_val)
24442 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24443 desired_align, align);
24444
24445 if (desired_align > align && !misaligned_prologue_used)
24446 {
24447 if (align_bytes == 0)
24448 {
24449 /* Except for the first move in prologue, we no longer know
24450 constant offset in aliasing info. It don't seems to worth
24451 the pain to maintain it for the first move, so throw away
24452 the info early. */
24453 dst = change_address (dst, BLKmode, destreg);
24454 if (!issetmem)
24455 src = change_address (src, BLKmode, srcreg);
24456 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24457 promoted_val, vec_promoted_val,
24458 count_exp, align, desired_align,
24459 issetmem);
24460 /* At most desired_align - align bytes are copied. */
24461 if (min_size < (unsigned)(desired_align - align))
24462 min_size = 0;
24463 else
24464 min_size -= desired_align - align;
24465 }
24466 else
24467 {
24468 /* If we know how many bytes need to be stored before dst is
24469 sufficiently aligned, maintain aliasing info accurately. */
24470 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24471 srcreg,
24472 promoted_val,
24473 vec_promoted_val,
24474 desired_align,
24475 align_bytes,
24476 issetmem);
24477
24478 count_exp = plus_constant (counter_mode (count_exp),
24479 count_exp, -align_bytes);
24480 count -= align_bytes;
24481 min_size -= align_bytes;
24482 max_size -= align_bytes;
24483 }
24484 if (need_zero_guard
24485 && !min_size
24486 && (count < (unsigned HOST_WIDE_INT) size_needed
24487 || (align_bytes == 0
24488 && count < ((unsigned HOST_WIDE_INT) size_needed
24489 + desired_align - align))))
24490 {
24491 /* It is possible that we copied enough so the main loop will not
24492 execute. */
24493 gcc_assert (size_needed > 1);
24494 if (label == NULL_RTX)
24495 label = gen_label_rtx ();
24496 emit_cmp_and_jump_insns (count_exp,
24497 GEN_INT (size_needed),
24498 LTU, 0, counter_mode (count_exp), 1, label);
24499 if (expected_size == -1
24500 || expected_size < (desired_align - align) / 2 + size_needed)
24501 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24502 else
24503 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24504 }
24505 }
24506 if (label && size_needed == 1)
24507 {
24508 emit_label (label);
24509 LABEL_NUSES (label) = 1;
24510 label = NULL;
24511 epilogue_size_needed = 1;
24512 if (issetmem)
24513 promoted_val = val_exp;
24514 }
24515 else if (label == NULL_RTX && !misaligned_prologue_used)
24516 epilogue_size_needed = size_needed;
24517
24518 /* Step 3: Main loop. */
24519
24520 switch (alg)
24521 {
24522 case libcall:
24523 case no_stringop:
24524 case last_alg:
24525 gcc_unreachable ();
24526 case loop_1_byte:
24527 case loop:
24528 case unrolled_loop:
24529 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24530 count_exp, move_mode, unroll_factor,
24531 expected_size, issetmem);
24532 break;
24533 case vector_loop:
24534 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24535 vec_promoted_val, count_exp, move_mode,
24536 unroll_factor, expected_size, issetmem);
24537 break;
24538 case rep_prefix_8_byte:
24539 case rep_prefix_4_byte:
24540 case rep_prefix_1_byte:
24541 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24542 val_exp, count_exp, move_mode, issetmem);
24543 break;
24544 }
24545 /* Adjust properly the offset of src and dest memory for aliasing. */
24546 if (CONST_INT_P (count_exp))
24547 {
24548 if (!issetmem)
24549 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24550 (count / size_needed) * size_needed);
24551 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24552 (count / size_needed) * size_needed);
24553 }
24554 else
24555 {
24556 if (!issetmem)
24557 src = change_address (src, BLKmode, srcreg);
24558 dst = change_address (dst, BLKmode, destreg);
24559 }
24560
24561 /* Step 4: Epilogue to copy the remaining bytes. */
24562 epilogue:
24563 if (label)
24564 {
24565 /* When the main loop is done, COUNT_EXP might hold original count,
24566 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24567 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24568 bytes. Compensate if needed. */
24569
24570 if (size_needed < epilogue_size_needed)
24571 {
24572 tmp =
24573 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24574 GEN_INT (size_needed - 1), count_exp, 1,
24575 OPTAB_DIRECT);
24576 if (tmp != count_exp)
24577 emit_move_insn (count_exp, tmp);
24578 }
24579 emit_label (label);
24580 LABEL_NUSES (label) = 1;
24581 }
24582
24583 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24584 {
24585 if (force_loopy_epilogue)
24586 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24587 epilogue_size_needed);
24588 else
24589 {
24590 if (issetmem)
24591 expand_setmem_epilogue (dst, destreg, promoted_val,
24592 vec_promoted_val, count_exp,
24593 epilogue_size_needed);
24594 else
24595 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24596 epilogue_size_needed);
24597 }
24598 }
24599 if (jump_around_label)
24600 emit_label (jump_around_label);
24601 return true;
24602 }
24603
24604
24605 /* Expand the appropriate insns for doing strlen if not just doing
24606 repnz; scasb
24607
24608 out = result, initialized with the start address
24609 align_rtx = alignment of the address.
24610 scratch = scratch register, initialized with the startaddress when
24611 not aligned, otherwise undefined
24612
24613 This is just the body. It needs the initializations mentioned above and
24614 some address computing at the end. These things are done in i386.md. */
24615
24616 static void
24617 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24618 {
24619 int align;
24620 rtx tmp;
24621 rtx_code_label *align_2_label = NULL;
24622 rtx_code_label *align_3_label = NULL;
24623 rtx_code_label *align_4_label = gen_label_rtx ();
24624 rtx_code_label *end_0_label = gen_label_rtx ();
24625 rtx mem;
24626 rtx tmpreg = gen_reg_rtx (SImode);
24627 rtx scratch = gen_reg_rtx (SImode);
24628 rtx cmp;
24629
24630 align = 0;
24631 if (CONST_INT_P (align_rtx))
24632 align = INTVAL (align_rtx);
24633
24634 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24635
24636 /* Is there a known alignment and is it less than 4? */
24637 if (align < 4)
24638 {
24639 rtx scratch1 = gen_reg_rtx (Pmode);
24640 emit_move_insn (scratch1, out);
24641 /* Is there a known alignment and is it not 2? */
24642 if (align != 2)
24643 {
24644 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24645 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24646
24647 /* Leave just the 3 lower bits. */
24648 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24649 NULL_RTX, 0, OPTAB_WIDEN);
24650
24651 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24652 Pmode, 1, align_4_label);
24653 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24654 Pmode, 1, align_2_label);
24655 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24656 Pmode, 1, align_3_label);
24657 }
24658 else
24659 {
24660 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24661 check if is aligned to 4 - byte. */
24662
24663 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24664 NULL_RTX, 0, OPTAB_WIDEN);
24665
24666 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24667 Pmode, 1, align_4_label);
24668 }
24669
24670 mem = change_address (src, QImode, out);
24671
24672 /* Now compare the bytes. */
24673
24674 /* Compare the first n unaligned byte on a byte per byte basis. */
24675 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24676 QImode, 1, end_0_label);
24677
24678 /* Increment the address. */
24679 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24680
24681 /* Not needed with an alignment of 2 */
24682 if (align != 2)
24683 {
24684 emit_label (align_2_label);
24685
24686 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24687 end_0_label);
24688
24689 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24690
24691 emit_label (align_3_label);
24692 }
24693
24694 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24695 end_0_label);
24696
24697 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24698 }
24699
24700 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24701 align this loop. It gives only huge programs, but does not help to
24702 speed up. */
24703 emit_label (align_4_label);
24704
24705 mem = change_address (src, SImode, out);
24706 emit_move_insn (scratch, mem);
24707 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24708
24709 /* This formula yields a nonzero result iff one of the bytes is zero.
24710 This saves three branches inside loop and many cycles. */
24711
24712 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24713 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24714 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24715 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24716 gen_int_mode (0x80808080, SImode)));
24717 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24718 align_4_label);
24719
24720 if (TARGET_CMOVE)
24721 {
24722 rtx reg = gen_reg_rtx (SImode);
24723 rtx reg2 = gen_reg_rtx (Pmode);
24724 emit_move_insn (reg, tmpreg);
24725 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24726
24727 /* If zero is not in the first two bytes, move two bytes forward. */
24728 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24729 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24730 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24731 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24732 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24733 reg,
24734 tmpreg)));
24735 /* Emit lea manually to avoid clobbering of flags. */
24736 emit_insn (gen_rtx_SET (SImode, reg2,
24737 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24738
24739 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24740 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24741 emit_insn (gen_rtx_SET (VOIDmode, out,
24742 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24743 reg2,
24744 out)));
24745 }
24746 else
24747 {
24748 rtx_code_label *end_2_label = gen_label_rtx ();
24749 /* Is zero in the first two bytes? */
24750
24751 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24752 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24753 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24754 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24755 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24756 pc_rtx);
24757 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24758 JUMP_LABEL (tmp) = end_2_label;
24759
24760 /* Not in the first two. Move two bytes forward. */
24761 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24762 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24763
24764 emit_label (end_2_label);
24765
24766 }
24767
24768 /* Avoid branch in fixing the byte. */
24769 tmpreg = gen_lowpart (QImode, tmpreg);
24770 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24771 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24772 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24773 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24774
24775 emit_label (end_0_label);
24776 }
24777
24778 /* Expand strlen. */
24779
24780 bool
24781 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24782 {
24783 rtx addr, scratch1, scratch2, scratch3, scratch4;
24784
24785 /* The generic case of strlen expander is long. Avoid it's
24786 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24787
24788 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24789 && !TARGET_INLINE_ALL_STRINGOPS
24790 && !optimize_insn_for_size_p ()
24791 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24792 return false;
24793
24794 addr = force_reg (Pmode, XEXP (src, 0));
24795 scratch1 = gen_reg_rtx (Pmode);
24796
24797 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24798 && !optimize_insn_for_size_p ())
24799 {
24800 /* Well it seems that some optimizer does not combine a call like
24801 foo(strlen(bar), strlen(bar));
24802 when the move and the subtraction is done here. It does calculate
24803 the length just once when these instructions are done inside of
24804 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24805 often used and I use one fewer register for the lifetime of
24806 output_strlen_unroll() this is better. */
24807
24808 emit_move_insn (out, addr);
24809
24810 ix86_expand_strlensi_unroll_1 (out, src, align);
24811
24812 /* strlensi_unroll_1 returns the address of the zero at the end of
24813 the string, like memchr(), so compute the length by subtracting
24814 the start address. */
24815 emit_insn (ix86_gen_sub3 (out, out, addr));
24816 }
24817 else
24818 {
24819 rtx unspec;
24820
24821 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24822 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24823 return false;
24824
24825 scratch2 = gen_reg_rtx (Pmode);
24826 scratch3 = gen_reg_rtx (Pmode);
24827 scratch4 = force_reg (Pmode, constm1_rtx);
24828
24829 emit_move_insn (scratch3, addr);
24830 eoschar = force_reg (QImode, eoschar);
24831
24832 src = replace_equiv_address_nv (src, scratch3);
24833
24834 /* If .md starts supporting :P, this can be done in .md. */
24835 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24836 scratch4), UNSPEC_SCAS);
24837 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24838 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24839 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24840 }
24841 return true;
24842 }
24843
24844 /* For given symbol (function) construct code to compute address of it's PLT
24845 entry in large x86-64 PIC model. */
24846 static rtx
24847 construct_plt_address (rtx symbol)
24848 {
24849 rtx tmp, unspec;
24850
24851 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24852 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24853 gcc_assert (Pmode == DImode);
24854
24855 tmp = gen_reg_rtx (Pmode);
24856 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24857
24858 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24859 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24860 return tmp;
24861 }
24862
24863 rtx
24864 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24865 rtx callarg2,
24866 rtx pop, bool sibcall)
24867 {
24868 rtx vec[3];
24869 rtx use = NULL, call;
24870 unsigned int vec_len = 0;
24871
24872 if (pop == const0_rtx)
24873 pop = NULL;
24874 gcc_assert (!TARGET_64BIT || !pop);
24875
24876 if (TARGET_MACHO && !TARGET_64BIT)
24877 {
24878 #if TARGET_MACHO
24879 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24880 fnaddr = machopic_indirect_call_target (fnaddr);
24881 #endif
24882 }
24883 else
24884 {
24885 /* Static functions and indirect calls don't need the pic register. */
24886 if (flag_pic
24887 && (!TARGET_64BIT
24888 || (ix86_cmodel == CM_LARGE_PIC
24889 && DEFAULT_ABI != MS_ABI))
24890 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24891 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24892 use_reg (&use, pic_offset_table_rtx);
24893 }
24894
24895 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24896 {
24897 rtx al = gen_rtx_REG (QImode, AX_REG);
24898 emit_move_insn (al, callarg2);
24899 use_reg (&use, al);
24900 }
24901
24902 if (ix86_cmodel == CM_LARGE_PIC
24903 && !TARGET_PECOFF
24904 && MEM_P (fnaddr)
24905 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24906 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24907 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24908 else if (sibcall
24909 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24910 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24911 {
24912 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24913 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24914 }
24915
24916 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24917 if (retval)
24918 call = gen_rtx_SET (VOIDmode, retval, call);
24919 vec[vec_len++] = call;
24920
24921 if (pop)
24922 {
24923 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24924 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24925 vec[vec_len++] = pop;
24926 }
24927
24928 if (TARGET_64BIT_MS_ABI
24929 && (!callarg2 || INTVAL (callarg2) != -2))
24930 {
24931 int const cregs_size
24932 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24933 int i;
24934
24935 for (i = 0; i < cregs_size; i++)
24936 {
24937 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24938 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24939
24940 clobber_reg (&use, gen_rtx_REG (mode, regno));
24941 }
24942 }
24943
24944 if (vec_len > 1)
24945 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24946 call = emit_call_insn (call);
24947 if (use)
24948 CALL_INSN_FUNCTION_USAGE (call) = use;
24949
24950 return call;
24951 }
24952
24953 /* Output the assembly for a call instruction. */
24954
24955 const char *
24956 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
24957 {
24958 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24959 bool seh_nop_p = false;
24960 const char *xasm;
24961
24962 if (SIBLING_CALL_P (insn))
24963 {
24964 if (direct_p)
24965 xasm = "jmp\t%P0";
24966 /* SEH epilogue detection requires the indirect branch case
24967 to include REX.W. */
24968 else if (TARGET_SEH)
24969 xasm = "rex.W jmp %A0";
24970 else
24971 xasm = "jmp\t%A0";
24972
24973 output_asm_insn (xasm, &call_op);
24974 return "";
24975 }
24976
24977 /* SEH unwinding can require an extra nop to be emitted in several
24978 circumstances. Determine if we have one of those. */
24979 if (TARGET_SEH)
24980 {
24981 rtx_insn *i;
24982
24983 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24984 {
24985 /* If we get to another real insn, we don't need the nop. */
24986 if (INSN_P (i))
24987 break;
24988
24989 /* If we get to the epilogue note, prevent a catch region from
24990 being adjacent to the standard epilogue sequence. If non-
24991 call-exceptions, we'll have done this during epilogue emission. */
24992 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24993 && !flag_non_call_exceptions
24994 && !can_throw_internal (insn))
24995 {
24996 seh_nop_p = true;
24997 break;
24998 }
24999 }
25000
25001 /* If we didn't find a real insn following the call, prevent the
25002 unwinder from looking into the next function. */
25003 if (i == NULL)
25004 seh_nop_p = true;
25005 }
25006
25007 if (direct_p)
25008 xasm = "call\t%P0";
25009 else
25010 xasm = "call\t%A0";
25011
25012 output_asm_insn (xasm, &call_op);
25013
25014 if (seh_nop_p)
25015 return "nop";
25016
25017 return "";
25018 }
25019 \f
25020 /* Clear stack slot assignments remembered from previous functions.
25021 This is called from INIT_EXPANDERS once before RTL is emitted for each
25022 function. */
25023
25024 static struct machine_function *
25025 ix86_init_machine_status (void)
25026 {
25027 struct machine_function *f;
25028
25029 f = ggc_cleared_alloc<machine_function> ();
25030 f->use_fast_prologue_epilogue_nregs = -1;
25031 f->call_abi = ix86_abi;
25032
25033 return f;
25034 }
25035
25036 /* Return a MEM corresponding to a stack slot with mode MODE.
25037 Allocate a new slot if necessary.
25038
25039 The RTL for a function can have several slots available: N is
25040 which slot to use. */
25041
25042 rtx
25043 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25044 {
25045 struct stack_local_entry *s;
25046
25047 gcc_assert (n < MAX_386_STACK_LOCALS);
25048
25049 for (s = ix86_stack_locals; s; s = s->next)
25050 if (s->mode == mode && s->n == n)
25051 return validize_mem (copy_rtx (s->rtl));
25052
25053 s = ggc_alloc<stack_local_entry> ();
25054 s->n = n;
25055 s->mode = mode;
25056 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25057
25058 s->next = ix86_stack_locals;
25059 ix86_stack_locals = s;
25060 return validize_mem (copy_rtx (s->rtl));
25061 }
25062
25063 static void
25064 ix86_instantiate_decls (void)
25065 {
25066 struct stack_local_entry *s;
25067
25068 for (s = ix86_stack_locals; s; s = s->next)
25069 if (s->rtl != NULL_RTX)
25070 instantiate_decl_rtl (s->rtl);
25071 }
25072 \f
25073 /* Check whether x86 address PARTS is a pc-relative address. */
25074
25075 static bool
25076 rip_relative_addr_p (struct ix86_address *parts)
25077 {
25078 rtx base, index, disp;
25079
25080 base = parts->base;
25081 index = parts->index;
25082 disp = parts->disp;
25083
25084 if (disp && !base && !index)
25085 {
25086 if (TARGET_64BIT)
25087 {
25088 rtx symbol = disp;
25089
25090 if (GET_CODE (disp) == CONST)
25091 symbol = XEXP (disp, 0);
25092 if (GET_CODE (symbol) == PLUS
25093 && CONST_INT_P (XEXP (symbol, 1)))
25094 symbol = XEXP (symbol, 0);
25095
25096 if (GET_CODE (symbol) == LABEL_REF
25097 || (GET_CODE (symbol) == SYMBOL_REF
25098 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25099 || (GET_CODE (symbol) == UNSPEC
25100 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25101 || XINT (symbol, 1) == UNSPEC_PCREL
25102 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25103 return true;
25104 }
25105 }
25106 return false;
25107 }
25108
25109 /* Calculate the length of the memory address in the instruction encoding.
25110 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25111 or other prefixes. We never generate addr32 prefix for LEA insn. */
25112
25113 int
25114 memory_address_length (rtx addr, bool lea)
25115 {
25116 struct ix86_address parts;
25117 rtx base, index, disp;
25118 int len;
25119 int ok;
25120
25121 if (GET_CODE (addr) == PRE_DEC
25122 || GET_CODE (addr) == POST_INC
25123 || GET_CODE (addr) == PRE_MODIFY
25124 || GET_CODE (addr) == POST_MODIFY)
25125 return 0;
25126
25127 ok = ix86_decompose_address (addr, &parts);
25128 gcc_assert (ok);
25129
25130 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25131
25132 /* If this is not LEA instruction, add the length of addr32 prefix. */
25133 if (TARGET_64BIT && !lea
25134 && (SImode_address_operand (addr, VOIDmode)
25135 || (parts.base && GET_MODE (parts.base) == SImode)
25136 || (parts.index && GET_MODE (parts.index) == SImode)))
25137 len++;
25138
25139 base = parts.base;
25140 index = parts.index;
25141 disp = parts.disp;
25142
25143 if (base && GET_CODE (base) == SUBREG)
25144 base = SUBREG_REG (base);
25145 if (index && GET_CODE (index) == SUBREG)
25146 index = SUBREG_REG (index);
25147
25148 gcc_assert (base == NULL_RTX || REG_P (base));
25149 gcc_assert (index == NULL_RTX || REG_P (index));
25150
25151 /* Rule of thumb:
25152 - esp as the base always wants an index,
25153 - ebp as the base always wants a displacement,
25154 - r12 as the base always wants an index,
25155 - r13 as the base always wants a displacement. */
25156
25157 /* Register Indirect. */
25158 if (base && !index && !disp)
25159 {
25160 /* esp (for its index) and ebp (for its displacement) need
25161 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25162 code. */
25163 if (base == arg_pointer_rtx
25164 || base == frame_pointer_rtx
25165 || REGNO (base) == SP_REG
25166 || REGNO (base) == BP_REG
25167 || REGNO (base) == R12_REG
25168 || REGNO (base) == R13_REG)
25169 len++;
25170 }
25171
25172 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25173 is not disp32, but disp32(%rip), so for disp32
25174 SIB byte is needed, unless print_operand_address
25175 optimizes it into disp32(%rip) or (%rip) is implied
25176 by UNSPEC. */
25177 else if (disp && !base && !index)
25178 {
25179 len += 4;
25180 if (rip_relative_addr_p (&parts))
25181 len++;
25182 }
25183 else
25184 {
25185 /* Find the length of the displacement constant. */
25186 if (disp)
25187 {
25188 if (base && satisfies_constraint_K (disp))
25189 len += 1;
25190 else
25191 len += 4;
25192 }
25193 /* ebp always wants a displacement. Similarly r13. */
25194 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25195 len++;
25196
25197 /* An index requires the two-byte modrm form.... */
25198 if (index
25199 /* ...like esp (or r12), which always wants an index. */
25200 || base == arg_pointer_rtx
25201 || base == frame_pointer_rtx
25202 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25203 len++;
25204 }
25205
25206 return len;
25207 }
25208
25209 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25210 is set, expect that insn have 8bit immediate alternative. */
25211 int
25212 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25213 {
25214 int len = 0;
25215 int i;
25216 extract_insn_cached (insn);
25217 for (i = recog_data.n_operands - 1; i >= 0; --i)
25218 if (CONSTANT_P (recog_data.operand[i]))
25219 {
25220 enum attr_mode mode = get_attr_mode (insn);
25221
25222 gcc_assert (!len);
25223 if (shortform && CONST_INT_P (recog_data.operand[i]))
25224 {
25225 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25226 switch (mode)
25227 {
25228 case MODE_QI:
25229 len = 1;
25230 continue;
25231 case MODE_HI:
25232 ival = trunc_int_for_mode (ival, HImode);
25233 break;
25234 case MODE_SI:
25235 ival = trunc_int_for_mode (ival, SImode);
25236 break;
25237 default:
25238 break;
25239 }
25240 if (IN_RANGE (ival, -128, 127))
25241 {
25242 len = 1;
25243 continue;
25244 }
25245 }
25246 switch (mode)
25247 {
25248 case MODE_QI:
25249 len = 1;
25250 break;
25251 case MODE_HI:
25252 len = 2;
25253 break;
25254 case MODE_SI:
25255 len = 4;
25256 break;
25257 /* Immediates for DImode instructions are encoded
25258 as 32bit sign extended values. */
25259 case MODE_DI:
25260 len = 4;
25261 break;
25262 default:
25263 fatal_insn ("unknown insn mode", insn);
25264 }
25265 }
25266 return len;
25267 }
25268
25269 /* Compute default value for "length_address" attribute. */
25270 int
25271 ix86_attr_length_address_default (rtx_insn *insn)
25272 {
25273 int i;
25274
25275 if (get_attr_type (insn) == TYPE_LEA)
25276 {
25277 rtx set = PATTERN (insn), addr;
25278
25279 if (GET_CODE (set) == PARALLEL)
25280 set = XVECEXP (set, 0, 0);
25281
25282 gcc_assert (GET_CODE (set) == SET);
25283
25284 addr = SET_SRC (set);
25285
25286 return memory_address_length (addr, true);
25287 }
25288
25289 extract_insn_cached (insn);
25290 for (i = recog_data.n_operands - 1; i >= 0; --i)
25291 if (MEM_P (recog_data.operand[i]))
25292 {
25293 constrain_operands_cached (reload_completed);
25294 if (which_alternative != -1)
25295 {
25296 const char *constraints = recog_data.constraints[i];
25297 int alt = which_alternative;
25298
25299 while (*constraints == '=' || *constraints == '+')
25300 constraints++;
25301 while (alt-- > 0)
25302 while (*constraints++ != ',')
25303 ;
25304 /* Skip ignored operands. */
25305 if (*constraints == 'X')
25306 continue;
25307 }
25308 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25309 }
25310 return 0;
25311 }
25312
25313 /* Compute default value for "length_vex" attribute. It includes
25314 2 or 3 byte VEX prefix and 1 opcode byte. */
25315
25316 int
25317 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25318 bool has_vex_w)
25319 {
25320 int i;
25321
25322 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25323 byte VEX prefix. */
25324 if (!has_0f_opcode || has_vex_w)
25325 return 3 + 1;
25326
25327 /* We can always use 2 byte VEX prefix in 32bit. */
25328 if (!TARGET_64BIT)
25329 return 2 + 1;
25330
25331 extract_insn_cached (insn);
25332
25333 for (i = recog_data.n_operands - 1; i >= 0; --i)
25334 if (REG_P (recog_data.operand[i]))
25335 {
25336 /* REX.W bit uses 3 byte VEX prefix. */
25337 if (GET_MODE (recog_data.operand[i]) == DImode
25338 && GENERAL_REG_P (recog_data.operand[i]))
25339 return 3 + 1;
25340 }
25341 else
25342 {
25343 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25344 if (MEM_P (recog_data.operand[i])
25345 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25346 return 3 + 1;
25347 }
25348
25349 return 2 + 1;
25350 }
25351 \f
25352 /* Return the maximum number of instructions a cpu can issue. */
25353
25354 static int
25355 ix86_issue_rate (void)
25356 {
25357 switch (ix86_tune)
25358 {
25359 case PROCESSOR_PENTIUM:
25360 case PROCESSOR_BONNELL:
25361 case PROCESSOR_SILVERMONT:
25362 case PROCESSOR_INTEL:
25363 case PROCESSOR_K6:
25364 case PROCESSOR_BTVER2:
25365 case PROCESSOR_PENTIUM4:
25366 case PROCESSOR_NOCONA:
25367 return 2;
25368
25369 case PROCESSOR_PENTIUMPRO:
25370 case PROCESSOR_ATHLON:
25371 case PROCESSOR_K8:
25372 case PROCESSOR_AMDFAM10:
25373 case PROCESSOR_GENERIC:
25374 case PROCESSOR_BTVER1:
25375 return 3;
25376
25377 case PROCESSOR_BDVER1:
25378 case PROCESSOR_BDVER2:
25379 case PROCESSOR_BDVER3:
25380 case PROCESSOR_BDVER4:
25381 case PROCESSOR_CORE2:
25382 case PROCESSOR_NEHALEM:
25383 case PROCESSOR_SANDYBRIDGE:
25384 case PROCESSOR_HASWELL:
25385 return 4;
25386
25387 default:
25388 return 1;
25389 }
25390 }
25391
25392 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25393 by DEP_INSN and nothing set by DEP_INSN. */
25394
25395 static bool
25396 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25397 {
25398 rtx set, set2;
25399
25400 /* Simplify the test for uninteresting insns. */
25401 if (insn_type != TYPE_SETCC
25402 && insn_type != TYPE_ICMOV
25403 && insn_type != TYPE_FCMOV
25404 && insn_type != TYPE_IBR)
25405 return false;
25406
25407 if ((set = single_set (dep_insn)) != 0)
25408 {
25409 set = SET_DEST (set);
25410 set2 = NULL_RTX;
25411 }
25412 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25413 && XVECLEN (PATTERN (dep_insn), 0) == 2
25414 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25415 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25416 {
25417 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25418 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25419 }
25420 else
25421 return false;
25422
25423 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25424 return false;
25425
25426 /* This test is true if the dependent insn reads the flags but
25427 not any other potentially set register. */
25428 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25429 return false;
25430
25431 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25432 return false;
25433
25434 return true;
25435 }
25436
25437 /* Return true iff USE_INSN has a memory address with operands set by
25438 SET_INSN. */
25439
25440 bool
25441 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25442 {
25443 int i;
25444 extract_insn_cached (use_insn);
25445 for (i = recog_data.n_operands - 1; i >= 0; --i)
25446 if (MEM_P (recog_data.operand[i]))
25447 {
25448 rtx addr = XEXP (recog_data.operand[i], 0);
25449 return modified_in_p (addr, set_insn) != 0;
25450 }
25451 return false;
25452 }
25453
25454 /* Helper function for exact_store_load_dependency.
25455 Return true if addr is found in insn. */
25456 static bool
25457 exact_dependency_1 (rtx addr, rtx insn)
25458 {
25459 enum rtx_code code;
25460 const char *format_ptr;
25461 int i, j;
25462
25463 code = GET_CODE (insn);
25464 switch (code)
25465 {
25466 case MEM:
25467 if (rtx_equal_p (addr, insn))
25468 return true;
25469 break;
25470 case REG:
25471 CASE_CONST_ANY:
25472 case SYMBOL_REF:
25473 case CODE_LABEL:
25474 case PC:
25475 case CC0:
25476 case EXPR_LIST:
25477 return false;
25478 default:
25479 break;
25480 }
25481
25482 format_ptr = GET_RTX_FORMAT (code);
25483 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25484 {
25485 switch (*format_ptr++)
25486 {
25487 case 'e':
25488 if (exact_dependency_1 (addr, XEXP (insn, i)))
25489 return true;
25490 break;
25491 case 'E':
25492 for (j = 0; j < XVECLEN (insn, i); j++)
25493 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25494 return true;
25495 break;
25496 }
25497 }
25498 return false;
25499 }
25500
25501 /* Return true if there exists exact dependency for store & load, i.e.
25502 the same memory address is used in them. */
25503 static bool
25504 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25505 {
25506 rtx set1, set2;
25507
25508 set1 = single_set (store);
25509 if (!set1)
25510 return false;
25511 if (!MEM_P (SET_DEST (set1)))
25512 return false;
25513 set2 = single_set (load);
25514 if (!set2)
25515 return false;
25516 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25517 return true;
25518 return false;
25519 }
25520
25521 static int
25522 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25523 {
25524 enum attr_type insn_type, dep_insn_type;
25525 enum attr_memory memory;
25526 rtx set, set2;
25527 int dep_insn_code_number;
25528
25529 /* Anti and output dependencies have zero cost on all CPUs. */
25530 if (REG_NOTE_KIND (link) != 0)
25531 return 0;
25532
25533 dep_insn_code_number = recog_memoized (dep_insn);
25534
25535 /* If we can't recognize the insns, we can't really do anything. */
25536 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25537 return cost;
25538
25539 insn_type = get_attr_type (insn);
25540 dep_insn_type = get_attr_type (dep_insn);
25541
25542 switch (ix86_tune)
25543 {
25544 case PROCESSOR_PENTIUM:
25545 /* Address Generation Interlock adds a cycle of latency. */
25546 if (insn_type == TYPE_LEA)
25547 {
25548 rtx addr = PATTERN (insn);
25549
25550 if (GET_CODE (addr) == PARALLEL)
25551 addr = XVECEXP (addr, 0, 0);
25552
25553 gcc_assert (GET_CODE (addr) == SET);
25554
25555 addr = SET_SRC (addr);
25556 if (modified_in_p (addr, dep_insn))
25557 cost += 1;
25558 }
25559 else if (ix86_agi_dependent (dep_insn, insn))
25560 cost += 1;
25561
25562 /* ??? Compares pair with jump/setcc. */
25563 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25564 cost = 0;
25565
25566 /* Floating point stores require value to be ready one cycle earlier. */
25567 if (insn_type == TYPE_FMOV
25568 && get_attr_memory (insn) == MEMORY_STORE
25569 && !ix86_agi_dependent (dep_insn, insn))
25570 cost += 1;
25571 break;
25572
25573 case PROCESSOR_PENTIUMPRO:
25574 /* INT->FP conversion is expensive. */
25575 if (get_attr_fp_int_src (dep_insn))
25576 cost += 5;
25577
25578 /* There is one cycle extra latency between an FP op and a store. */
25579 if (insn_type == TYPE_FMOV
25580 && (set = single_set (dep_insn)) != NULL_RTX
25581 && (set2 = single_set (insn)) != NULL_RTX
25582 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25583 && MEM_P (SET_DEST (set2)))
25584 cost += 1;
25585
25586 memory = get_attr_memory (insn);
25587
25588 /* Show ability of reorder buffer to hide latency of load by executing
25589 in parallel with previous instruction in case
25590 previous instruction is not needed to compute the address. */
25591 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25592 && !ix86_agi_dependent (dep_insn, insn))
25593 {
25594 /* Claim moves to take one cycle, as core can issue one load
25595 at time and the next load can start cycle later. */
25596 if (dep_insn_type == TYPE_IMOV
25597 || dep_insn_type == TYPE_FMOV)
25598 cost = 1;
25599 else if (cost > 1)
25600 cost--;
25601 }
25602 break;
25603
25604 case PROCESSOR_K6:
25605 /* The esp dependency is resolved before
25606 the instruction is really finished. */
25607 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25608 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25609 return 1;
25610
25611 /* INT->FP conversion is expensive. */
25612 if (get_attr_fp_int_src (dep_insn))
25613 cost += 5;
25614
25615 memory = get_attr_memory (insn);
25616
25617 /* Show ability of reorder buffer to hide latency of load by executing
25618 in parallel with previous instruction in case
25619 previous instruction is not needed to compute the address. */
25620 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25621 && !ix86_agi_dependent (dep_insn, insn))
25622 {
25623 /* Claim moves to take one cycle, as core can issue one load
25624 at time and the next load can start cycle later. */
25625 if (dep_insn_type == TYPE_IMOV
25626 || dep_insn_type == TYPE_FMOV)
25627 cost = 1;
25628 else if (cost > 2)
25629 cost -= 2;
25630 else
25631 cost = 1;
25632 }
25633 break;
25634
25635 case PROCESSOR_AMDFAM10:
25636 case PROCESSOR_BDVER1:
25637 case PROCESSOR_BDVER2:
25638 case PROCESSOR_BDVER3:
25639 case PROCESSOR_BDVER4:
25640 case PROCESSOR_BTVER1:
25641 case PROCESSOR_BTVER2:
25642 case PROCESSOR_GENERIC:
25643 /* Stack engine allows to execute push&pop instructions in parall. */
25644 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25645 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25646 return 0;
25647 /* FALLTHRU */
25648
25649 case PROCESSOR_ATHLON:
25650 case PROCESSOR_K8:
25651 memory = get_attr_memory (insn);
25652
25653 /* Show ability of reorder buffer to hide latency of load by executing
25654 in parallel with previous instruction in case
25655 previous instruction is not needed to compute the address. */
25656 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25657 && !ix86_agi_dependent (dep_insn, insn))
25658 {
25659 enum attr_unit unit = get_attr_unit (insn);
25660 int loadcost = 3;
25661
25662 /* Because of the difference between the length of integer and
25663 floating unit pipeline preparation stages, the memory operands
25664 for floating point are cheaper.
25665
25666 ??? For Athlon it the difference is most probably 2. */
25667 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25668 loadcost = 3;
25669 else
25670 loadcost = TARGET_ATHLON ? 2 : 0;
25671
25672 if (cost >= loadcost)
25673 cost -= loadcost;
25674 else
25675 cost = 0;
25676 }
25677 break;
25678
25679 case PROCESSOR_CORE2:
25680 case PROCESSOR_NEHALEM:
25681 case PROCESSOR_SANDYBRIDGE:
25682 case PROCESSOR_HASWELL:
25683 /* Stack engine allows to execute push&pop instructions in parall. */
25684 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25685 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25686 return 0;
25687
25688 memory = get_attr_memory (insn);
25689
25690 /* Show ability of reorder buffer to hide latency of load by executing
25691 in parallel with previous instruction in case
25692 previous instruction is not needed to compute the address. */
25693 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25694 && !ix86_agi_dependent (dep_insn, insn))
25695 {
25696 if (cost >= 4)
25697 cost -= 4;
25698 else
25699 cost = 0;
25700 }
25701 break;
25702
25703 case PROCESSOR_SILVERMONT:
25704 case PROCESSOR_INTEL:
25705 if (!reload_completed)
25706 return cost;
25707
25708 /* Increase cost of integer loads. */
25709 memory = get_attr_memory (dep_insn);
25710 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25711 {
25712 enum attr_unit unit = get_attr_unit (dep_insn);
25713 if (unit == UNIT_INTEGER && cost == 1)
25714 {
25715 if (memory == MEMORY_LOAD)
25716 cost = 3;
25717 else
25718 {
25719 /* Increase cost of ld/st for short int types only
25720 because of store forwarding issue. */
25721 rtx set = single_set (dep_insn);
25722 if (set && (GET_MODE (SET_DEST (set)) == QImode
25723 || GET_MODE (SET_DEST (set)) == HImode))
25724 {
25725 /* Increase cost of store/load insn if exact
25726 dependence exists and it is load insn. */
25727 enum attr_memory insn_memory = get_attr_memory (insn);
25728 if (insn_memory == MEMORY_LOAD
25729 && exact_store_load_dependency (dep_insn, insn))
25730 cost = 3;
25731 }
25732 }
25733 }
25734 }
25735
25736 default:
25737 break;
25738 }
25739
25740 return cost;
25741 }
25742
25743 /* How many alternative schedules to try. This should be as wide as the
25744 scheduling freedom in the DFA, but no wider. Making this value too
25745 large results extra work for the scheduler. */
25746
25747 static int
25748 ia32_multipass_dfa_lookahead (void)
25749 {
25750 switch (ix86_tune)
25751 {
25752 case PROCESSOR_PENTIUM:
25753 return 2;
25754
25755 case PROCESSOR_PENTIUMPRO:
25756 case PROCESSOR_K6:
25757 return 1;
25758
25759 case PROCESSOR_BDVER1:
25760 case PROCESSOR_BDVER2:
25761 case PROCESSOR_BDVER3:
25762 case PROCESSOR_BDVER4:
25763 /* We use lookahead value 4 for BD both before and after reload
25764 schedules. Plan is to have value 8 included for O3. */
25765 return 4;
25766
25767 case PROCESSOR_CORE2:
25768 case PROCESSOR_NEHALEM:
25769 case PROCESSOR_SANDYBRIDGE:
25770 case PROCESSOR_HASWELL:
25771 case PROCESSOR_BONNELL:
25772 case PROCESSOR_SILVERMONT:
25773 case PROCESSOR_INTEL:
25774 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25775 as many instructions can be executed on a cycle, i.e.,
25776 issue_rate. I wonder why tuning for many CPUs does not do this. */
25777 if (reload_completed)
25778 return ix86_issue_rate ();
25779 /* Don't use lookahead for pre-reload schedule to save compile time. */
25780 return 0;
25781
25782 default:
25783 return 0;
25784 }
25785 }
25786
25787 /* Return true if target platform supports macro-fusion. */
25788
25789 static bool
25790 ix86_macro_fusion_p ()
25791 {
25792 return TARGET_FUSE_CMP_AND_BRANCH;
25793 }
25794
25795 /* Check whether current microarchitecture support macro fusion
25796 for insn pair "CONDGEN + CONDJMP". Refer to
25797 "Intel Architectures Optimization Reference Manual". */
25798
25799 static bool
25800 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25801 {
25802 rtx src, dest;
25803 enum rtx_code ccode;
25804 rtx compare_set = NULL_RTX, test_if, cond;
25805 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25806
25807 if (!any_condjump_p (condjmp))
25808 return false;
25809
25810 if (get_attr_type (condgen) != TYPE_TEST
25811 && get_attr_type (condgen) != TYPE_ICMP
25812 && get_attr_type (condgen) != TYPE_INCDEC
25813 && get_attr_type (condgen) != TYPE_ALU)
25814 return false;
25815
25816 compare_set = single_set (condgen);
25817 if (compare_set == NULL_RTX
25818 && !TARGET_FUSE_ALU_AND_BRANCH)
25819 return false;
25820
25821 if (compare_set == NULL_RTX)
25822 {
25823 int i;
25824 rtx pat = PATTERN (condgen);
25825 for (i = 0; i < XVECLEN (pat, 0); i++)
25826 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25827 {
25828 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25829 if (GET_CODE (set_src) == COMPARE)
25830 compare_set = XVECEXP (pat, 0, i);
25831 else
25832 alu_set = XVECEXP (pat, 0, i);
25833 }
25834 }
25835 if (compare_set == NULL_RTX)
25836 return false;
25837 src = SET_SRC (compare_set);
25838 if (GET_CODE (src) != COMPARE)
25839 return false;
25840
25841 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25842 supported. */
25843 if ((MEM_P (XEXP (src, 0))
25844 && CONST_INT_P (XEXP (src, 1)))
25845 || (MEM_P (XEXP (src, 1))
25846 && CONST_INT_P (XEXP (src, 0))))
25847 return false;
25848
25849 /* No fusion for RIP-relative address. */
25850 if (MEM_P (XEXP (src, 0)))
25851 addr = XEXP (XEXP (src, 0), 0);
25852 else if (MEM_P (XEXP (src, 1)))
25853 addr = XEXP (XEXP (src, 1), 0);
25854
25855 if (addr) {
25856 ix86_address parts;
25857 int ok = ix86_decompose_address (addr, &parts);
25858 gcc_assert (ok);
25859
25860 if (rip_relative_addr_p (&parts))
25861 return false;
25862 }
25863
25864 test_if = SET_SRC (pc_set (condjmp));
25865 cond = XEXP (test_if, 0);
25866 ccode = GET_CODE (cond);
25867 /* Check whether conditional jump use Sign or Overflow Flags. */
25868 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25869 && (ccode == GE
25870 || ccode == GT
25871 || ccode == LE
25872 || ccode == LT))
25873 return false;
25874
25875 /* Return true for TYPE_TEST and TYPE_ICMP. */
25876 if (get_attr_type (condgen) == TYPE_TEST
25877 || get_attr_type (condgen) == TYPE_ICMP)
25878 return true;
25879
25880 /* The following is the case that macro-fusion for alu + jmp. */
25881 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25882 return false;
25883
25884 /* No fusion for alu op with memory destination operand. */
25885 dest = SET_DEST (alu_set);
25886 if (MEM_P (dest))
25887 return false;
25888
25889 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25890 supported. */
25891 if (get_attr_type (condgen) == TYPE_INCDEC
25892 && (ccode == GEU
25893 || ccode == GTU
25894 || ccode == LEU
25895 || ccode == LTU))
25896 return false;
25897
25898 return true;
25899 }
25900
25901 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25902 execution. It is applied if
25903 (1) IMUL instruction is on the top of list;
25904 (2) There exists the only producer of independent IMUL instruction in
25905 ready list.
25906 Return index of IMUL producer if it was found and -1 otherwise. */
25907 static int
25908 do_reorder_for_imul (rtx_insn **ready, int n_ready)
25909 {
25910 rtx_insn *insn;
25911 rtx set, insn1, insn2;
25912 sd_iterator_def sd_it;
25913 dep_t dep;
25914 int index = -1;
25915 int i;
25916
25917 if (!TARGET_BONNELL)
25918 return index;
25919
25920 /* Check that IMUL instruction is on the top of ready list. */
25921 insn = ready[n_ready - 1];
25922 set = single_set (insn);
25923 if (!set)
25924 return index;
25925 if (!(GET_CODE (SET_SRC (set)) == MULT
25926 && GET_MODE (SET_SRC (set)) == SImode))
25927 return index;
25928
25929 /* Search for producer of independent IMUL instruction. */
25930 for (i = n_ready - 2; i >= 0; i--)
25931 {
25932 insn = ready[i];
25933 if (!NONDEBUG_INSN_P (insn))
25934 continue;
25935 /* Skip IMUL instruction. */
25936 insn2 = PATTERN (insn);
25937 if (GET_CODE (insn2) == PARALLEL)
25938 insn2 = XVECEXP (insn2, 0, 0);
25939 if (GET_CODE (insn2) == SET
25940 && GET_CODE (SET_SRC (insn2)) == MULT
25941 && GET_MODE (SET_SRC (insn2)) == SImode)
25942 continue;
25943
25944 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25945 {
25946 rtx con;
25947 con = DEP_CON (dep);
25948 if (!NONDEBUG_INSN_P (con))
25949 continue;
25950 insn1 = PATTERN (con);
25951 if (GET_CODE (insn1) == PARALLEL)
25952 insn1 = XVECEXP (insn1, 0, 0);
25953
25954 if (GET_CODE (insn1) == SET
25955 && GET_CODE (SET_SRC (insn1)) == MULT
25956 && GET_MODE (SET_SRC (insn1)) == SImode)
25957 {
25958 sd_iterator_def sd_it1;
25959 dep_t dep1;
25960 /* Check if there is no other dependee for IMUL. */
25961 index = i;
25962 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25963 {
25964 rtx pro;
25965 pro = DEP_PRO (dep1);
25966 if (!NONDEBUG_INSN_P (pro))
25967 continue;
25968 if (pro != insn)
25969 index = -1;
25970 }
25971 if (index >= 0)
25972 break;
25973 }
25974 }
25975 if (index >= 0)
25976 break;
25977 }
25978 return index;
25979 }
25980
25981 /* Try to find the best candidate on the top of ready list if two insns
25982 have the same priority - candidate is best if its dependees were
25983 scheduled earlier. Applied for Silvermont only.
25984 Return true if top 2 insns must be interchanged. */
25985 static bool
25986 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
25987 {
25988 rtx_insn *top = ready[n_ready - 1];
25989 rtx_insn *next = ready[n_ready - 2];
25990 rtx set;
25991 sd_iterator_def sd_it;
25992 dep_t dep;
25993 int clock1 = -1;
25994 int clock2 = -1;
25995 #define INSN_TICK(INSN) (HID (INSN)->tick)
25996
25997 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25998 return false;
25999
26000 if (!NONDEBUG_INSN_P (top))
26001 return false;
26002 if (!NONJUMP_INSN_P (top))
26003 return false;
26004 if (!NONDEBUG_INSN_P (next))
26005 return false;
26006 if (!NONJUMP_INSN_P (next))
26007 return false;
26008 set = single_set (top);
26009 if (!set)
26010 return false;
26011 set = single_set (next);
26012 if (!set)
26013 return false;
26014
26015 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26016 {
26017 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26018 return false;
26019 /* Determine winner more precise. */
26020 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26021 {
26022 rtx pro;
26023 pro = DEP_PRO (dep);
26024 if (!NONDEBUG_INSN_P (pro))
26025 continue;
26026 if (INSN_TICK (pro) > clock1)
26027 clock1 = INSN_TICK (pro);
26028 }
26029 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26030 {
26031 rtx pro;
26032 pro = DEP_PRO (dep);
26033 if (!NONDEBUG_INSN_P (pro))
26034 continue;
26035 if (INSN_TICK (pro) > clock2)
26036 clock2 = INSN_TICK (pro);
26037 }
26038
26039 if (clock1 == clock2)
26040 {
26041 /* Determine winner - load must win. */
26042 enum attr_memory memory1, memory2;
26043 memory1 = get_attr_memory (top);
26044 memory2 = get_attr_memory (next);
26045 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26046 return true;
26047 }
26048 return (bool) (clock2 < clock1);
26049 }
26050 return false;
26051 #undef INSN_TICK
26052 }
26053
26054 /* Perform possible reodering of ready list for Atom/Silvermont only.
26055 Return issue rate. */
26056 static int
26057 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26058 int *pn_ready, int clock_var)
26059 {
26060 int issue_rate = -1;
26061 int n_ready = *pn_ready;
26062 int i;
26063 rtx_insn *insn;
26064 int index = -1;
26065
26066 /* Set up issue rate. */
26067 issue_rate = ix86_issue_rate ();
26068
26069 /* Do reodering for BONNELL/SILVERMONT only. */
26070 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26071 return issue_rate;
26072
26073 /* Nothing to do if ready list contains only 1 instruction. */
26074 if (n_ready <= 1)
26075 return issue_rate;
26076
26077 /* Do reodering for post-reload scheduler only. */
26078 if (!reload_completed)
26079 return issue_rate;
26080
26081 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26082 {
26083 if (sched_verbose > 1)
26084 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26085 INSN_UID (ready[index]));
26086
26087 /* Put IMUL producer (ready[index]) at the top of ready list. */
26088 insn = ready[index];
26089 for (i = index; i < n_ready - 1; i++)
26090 ready[i] = ready[i + 1];
26091 ready[n_ready - 1] = insn;
26092 return issue_rate;
26093 }
26094 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26095 {
26096 if (sched_verbose > 1)
26097 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26098 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26099 /* Swap 2 top elements of ready list. */
26100 insn = ready[n_ready - 1];
26101 ready[n_ready - 1] = ready[n_ready - 2];
26102 ready[n_ready - 2] = insn;
26103 }
26104 return issue_rate;
26105 }
26106
26107 static bool
26108 ix86_class_likely_spilled_p (reg_class_t);
26109
26110 /* Returns true if lhs of insn is HW function argument register and set up
26111 is_spilled to true if it is likely spilled HW register. */
26112 static bool
26113 insn_is_function_arg (rtx insn, bool* is_spilled)
26114 {
26115 rtx dst;
26116
26117 if (!NONDEBUG_INSN_P (insn))
26118 return false;
26119 /* Call instructions are not movable, ignore it. */
26120 if (CALL_P (insn))
26121 return false;
26122 insn = PATTERN (insn);
26123 if (GET_CODE (insn) == PARALLEL)
26124 insn = XVECEXP (insn, 0, 0);
26125 if (GET_CODE (insn) != SET)
26126 return false;
26127 dst = SET_DEST (insn);
26128 if (REG_P (dst) && HARD_REGISTER_P (dst)
26129 && ix86_function_arg_regno_p (REGNO (dst)))
26130 {
26131 /* Is it likely spilled HW register? */
26132 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26133 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26134 *is_spilled = true;
26135 return true;
26136 }
26137 return false;
26138 }
26139
26140 /* Add output dependencies for chain of function adjacent arguments if only
26141 there is a move to likely spilled HW register. Return first argument
26142 if at least one dependence was added or NULL otherwise. */
26143 static rtx_insn *
26144 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26145 {
26146 rtx_insn *insn;
26147 rtx_insn *last = call;
26148 rtx_insn *first_arg = NULL;
26149 bool is_spilled = false;
26150
26151 head = PREV_INSN (head);
26152
26153 /* Find nearest to call argument passing instruction. */
26154 while (true)
26155 {
26156 last = PREV_INSN (last);
26157 if (last == head)
26158 return NULL;
26159 if (!NONDEBUG_INSN_P (last))
26160 continue;
26161 if (insn_is_function_arg (last, &is_spilled))
26162 break;
26163 return NULL;
26164 }
26165
26166 first_arg = last;
26167 while (true)
26168 {
26169 insn = PREV_INSN (last);
26170 if (!INSN_P (insn))
26171 break;
26172 if (insn == head)
26173 break;
26174 if (!NONDEBUG_INSN_P (insn))
26175 {
26176 last = insn;
26177 continue;
26178 }
26179 if (insn_is_function_arg (insn, &is_spilled))
26180 {
26181 /* Add output depdendence between two function arguments if chain
26182 of output arguments contains likely spilled HW registers. */
26183 if (is_spilled)
26184 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26185 first_arg = last = insn;
26186 }
26187 else
26188 break;
26189 }
26190 if (!is_spilled)
26191 return NULL;
26192 return first_arg;
26193 }
26194
26195 /* Add output or anti dependency from insn to first_arg to restrict its code
26196 motion. */
26197 static void
26198 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26199 {
26200 rtx set;
26201 rtx tmp;
26202
26203 set = single_set (insn);
26204 if (!set)
26205 return;
26206 tmp = SET_DEST (set);
26207 if (REG_P (tmp))
26208 {
26209 /* Add output dependency to the first function argument. */
26210 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26211 return;
26212 }
26213 /* Add anti dependency. */
26214 add_dependence (first_arg, insn, REG_DEP_ANTI);
26215 }
26216
26217 /* Avoid cross block motion of function argument through adding dependency
26218 from the first non-jump instruction in bb. */
26219 static void
26220 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26221 {
26222 rtx_insn *insn = BB_END (bb);
26223
26224 while (insn)
26225 {
26226 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26227 {
26228 rtx set = single_set (insn);
26229 if (set)
26230 {
26231 avoid_func_arg_motion (arg, insn);
26232 return;
26233 }
26234 }
26235 if (insn == BB_HEAD (bb))
26236 return;
26237 insn = PREV_INSN (insn);
26238 }
26239 }
26240
26241 /* Hook for pre-reload schedule - avoid motion of function arguments
26242 passed in likely spilled HW registers. */
26243 static void
26244 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26245 {
26246 rtx_insn *insn;
26247 rtx_insn *first_arg = NULL;
26248 if (reload_completed)
26249 return;
26250 while (head != tail && DEBUG_INSN_P (head))
26251 head = NEXT_INSN (head);
26252 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26253 if (INSN_P (insn) && CALL_P (insn))
26254 {
26255 first_arg = add_parameter_dependencies (insn, head);
26256 if (first_arg)
26257 {
26258 /* Add dependee for first argument to predecessors if only
26259 region contains more than one block. */
26260 basic_block bb = BLOCK_FOR_INSN (insn);
26261 int rgn = CONTAINING_RGN (bb->index);
26262 int nr_blks = RGN_NR_BLOCKS (rgn);
26263 /* Skip trivial regions and region head blocks that can have
26264 predecessors outside of region. */
26265 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26266 {
26267 edge e;
26268 edge_iterator ei;
26269
26270 /* Regions are SCCs with the exception of selective
26271 scheduling with pipelining of outer blocks enabled.
26272 So also check that immediate predecessors of a non-head
26273 block are in the same region. */
26274 FOR_EACH_EDGE (e, ei, bb->preds)
26275 {
26276 /* Avoid creating of loop-carried dependencies through
26277 using topological ordering in the region. */
26278 if (rgn == CONTAINING_RGN (e->src->index)
26279 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26280 add_dependee_for_func_arg (first_arg, e->src);
26281 }
26282 }
26283 insn = first_arg;
26284 if (insn == head)
26285 break;
26286 }
26287 }
26288 else if (first_arg)
26289 avoid_func_arg_motion (first_arg, insn);
26290 }
26291
26292 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26293 HW registers to maximum, to schedule them at soon as possible. These are
26294 moves from function argument registers at the top of the function entry
26295 and moves from function return value registers after call. */
26296 static int
26297 ix86_adjust_priority (rtx_insn *insn, int priority)
26298 {
26299 rtx set;
26300
26301 if (reload_completed)
26302 return priority;
26303
26304 if (!NONDEBUG_INSN_P (insn))
26305 return priority;
26306
26307 set = single_set (insn);
26308 if (set)
26309 {
26310 rtx tmp = SET_SRC (set);
26311 if (REG_P (tmp)
26312 && HARD_REGISTER_P (tmp)
26313 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26314 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26315 return current_sched_info->sched_max_insns_priority;
26316 }
26317
26318 return priority;
26319 }
26320
26321 /* Model decoder of Core 2/i7.
26322 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26323 track the instruction fetch block boundaries and make sure that long
26324 (9+ bytes) instructions are assigned to D0. */
26325
26326 /* Maximum length of an insn that can be handled by
26327 a secondary decoder unit. '8' for Core 2/i7. */
26328 static int core2i7_secondary_decoder_max_insn_size;
26329
26330 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26331 '16' for Core 2/i7. */
26332 static int core2i7_ifetch_block_size;
26333
26334 /* Maximum number of instructions decoder can handle per cycle.
26335 '6' for Core 2/i7. */
26336 static int core2i7_ifetch_block_max_insns;
26337
26338 typedef struct ix86_first_cycle_multipass_data_ *
26339 ix86_first_cycle_multipass_data_t;
26340 typedef const struct ix86_first_cycle_multipass_data_ *
26341 const_ix86_first_cycle_multipass_data_t;
26342
26343 /* A variable to store target state across calls to max_issue within
26344 one cycle. */
26345 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26346 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26347
26348 /* Initialize DATA. */
26349 static void
26350 core2i7_first_cycle_multipass_init (void *_data)
26351 {
26352 ix86_first_cycle_multipass_data_t data
26353 = (ix86_first_cycle_multipass_data_t) _data;
26354
26355 data->ifetch_block_len = 0;
26356 data->ifetch_block_n_insns = 0;
26357 data->ready_try_change = NULL;
26358 data->ready_try_change_size = 0;
26359 }
26360
26361 /* Advancing the cycle; reset ifetch block counts. */
26362 static void
26363 core2i7_dfa_post_advance_cycle (void)
26364 {
26365 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26366
26367 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26368
26369 data->ifetch_block_len = 0;
26370 data->ifetch_block_n_insns = 0;
26371 }
26372
26373 static int min_insn_size (rtx_insn *);
26374
26375 /* Filter out insns from ready_try that the core will not be able to issue
26376 on current cycle due to decoder. */
26377 static void
26378 core2i7_first_cycle_multipass_filter_ready_try
26379 (const_ix86_first_cycle_multipass_data_t data,
26380 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26381 {
26382 while (n_ready--)
26383 {
26384 rtx_insn *insn;
26385 int insn_size;
26386
26387 if (ready_try[n_ready])
26388 continue;
26389
26390 insn = get_ready_element (n_ready);
26391 insn_size = min_insn_size (insn);
26392
26393 if (/* If this is a too long an insn for a secondary decoder ... */
26394 (!first_cycle_insn_p
26395 && insn_size > core2i7_secondary_decoder_max_insn_size)
26396 /* ... or it would not fit into the ifetch block ... */
26397 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26398 /* ... or the decoder is full already ... */
26399 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26400 /* ... mask the insn out. */
26401 {
26402 ready_try[n_ready] = 1;
26403
26404 if (data->ready_try_change)
26405 bitmap_set_bit (data->ready_try_change, n_ready);
26406 }
26407 }
26408 }
26409
26410 /* Prepare for a new round of multipass lookahead scheduling. */
26411 static void
26412 core2i7_first_cycle_multipass_begin (void *_data,
26413 signed char *ready_try, int n_ready,
26414 bool first_cycle_insn_p)
26415 {
26416 ix86_first_cycle_multipass_data_t data
26417 = (ix86_first_cycle_multipass_data_t) _data;
26418 const_ix86_first_cycle_multipass_data_t prev_data
26419 = ix86_first_cycle_multipass_data;
26420
26421 /* Restore the state from the end of the previous round. */
26422 data->ifetch_block_len = prev_data->ifetch_block_len;
26423 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26424
26425 /* Filter instructions that cannot be issued on current cycle due to
26426 decoder restrictions. */
26427 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26428 first_cycle_insn_p);
26429 }
26430
26431 /* INSN is being issued in current solution. Account for its impact on
26432 the decoder model. */
26433 static void
26434 core2i7_first_cycle_multipass_issue (void *_data,
26435 signed char *ready_try, int n_ready,
26436 rtx_insn *insn, const void *_prev_data)
26437 {
26438 ix86_first_cycle_multipass_data_t data
26439 = (ix86_first_cycle_multipass_data_t) _data;
26440 const_ix86_first_cycle_multipass_data_t prev_data
26441 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26442
26443 int insn_size = min_insn_size (insn);
26444
26445 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26446 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26447 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26448 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26449
26450 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26451 if (!data->ready_try_change)
26452 {
26453 data->ready_try_change = sbitmap_alloc (n_ready);
26454 data->ready_try_change_size = n_ready;
26455 }
26456 else if (data->ready_try_change_size < n_ready)
26457 {
26458 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26459 n_ready, 0);
26460 data->ready_try_change_size = n_ready;
26461 }
26462 bitmap_clear (data->ready_try_change);
26463
26464 /* Filter out insns from ready_try that the core will not be able to issue
26465 on current cycle due to decoder. */
26466 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26467 false);
26468 }
26469
26470 /* Revert the effect on ready_try. */
26471 static void
26472 core2i7_first_cycle_multipass_backtrack (const void *_data,
26473 signed char *ready_try,
26474 int n_ready ATTRIBUTE_UNUSED)
26475 {
26476 const_ix86_first_cycle_multipass_data_t data
26477 = (const_ix86_first_cycle_multipass_data_t) _data;
26478 unsigned int i = 0;
26479 sbitmap_iterator sbi;
26480
26481 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26482 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26483 {
26484 ready_try[i] = 0;
26485 }
26486 }
26487
26488 /* Save the result of multipass lookahead scheduling for the next round. */
26489 static void
26490 core2i7_first_cycle_multipass_end (const void *_data)
26491 {
26492 const_ix86_first_cycle_multipass_data_t data
26493 = (const_ix86_first_cycle_multipass_data_t) _data;
26494 ix86_first_cycle_multipass_data_t next_data
26495 = ix86_first_cycle_multipass_data;
26496
26497 if (data != NULL)
26498 {
26499 next_data->ifetch_block_len = data->ifetch_block_len;
26500 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26501 }
26502 }
26503
26504 /* Deallocate target data. */
26505 static void
26506 core2i7_first_cycle_multipass_fini (void *_data)
26507 {
26508 ix86_first_cycle_multipass_data_t data
26509 = (ix86_first_cycle_multipass_data_t) _data;
26510
26511 if (data->ready_try_change)
26512 {
26513 sbitmap_free (data->ready_try_change);
26514 data->ready_try_change = NULL;
26515 data->ready_try_change_size = 0;
26516 }
26517 }
26518
26519 /* Prepare for scheduling pass. */
26520 static void
26521 ix86_sched_init_global (FILE *, int, int)
26522 {
26523 /* Install scheduling hooks for current CPU. Some of these hooks are used
26524 in time-critical parts of the scheduler, so we only set them up when
26525 they are actually used. */
26526 switch (ix86_tune)
26527 {
26528 case PROCESSOR_CORE2:
26529 case PROCESSOR_NEHALEM:
26530 case PROCESSOR_SANDYBRIDGE:
26531 case PROCESSOR_HASWELL:
26532 /* Do not perform multipass scheduling for pre-reload schedule
26533 to save compile time. */
26534 if (reload_completed)
26535 {
26536 targetm.sched.dfa_post_advance_cycle
26537 = core2i7_dfa_post_advance_cycle;
26538 targetm.sched.first_cycle_multipass_init
26539 = core2i7_first_cycle_multipass_init;
26540 targetm.sched.first_cycle_multipass_begin
26541 = core2i7_first_cycle_multipass_begin;
26542 targetm.sched.first_cycle_multipass_issue
26543 = core2i7_first_cycle_multipass_issue;
26544 targetm.sched.first_cycle_multipass_backtrack
26545 = core2i7_first_cycle_multipass_backtrack;
26546 targetm.sched.first_cycle_multipass_end
26547 = core2i7_first_cycle_multipass_end;
26548 targetm.sched.first_cycle_multipass_fini
26549 = core2i7_first_cycle_multipass_fini;
26550
26551 /* Set decoder parameters. */
26552 core2i7_secondary_decoder_max_insn_size = 8;
26553 core2i7_ifetch_block_size = 16;
26554 core2i7_ifetch_block_max_insns = 6;
26555 break;
26556 }
26557 /* ... Fall through ... */
26558 default:
26559 targetm.sched.dfa_post_advance_cycle = NULL;
26560 targetm.sched.first_cycle_multipass_init = NULL;
26561 targetm.sched.first_cycle_multipass_begin = NULL;
26562 targetm.sched.first_cycle_multipass_issue = NULL;
26563 targetm.sched.first_cycle_multipass_backtrack = NULL;
26564 targetm.sched.first_cycle_multipass_end = NULL;
26565 targetm.sched.first_cycle_multipass_fini = NULL;
26566 break;
26567 }
26568 }
26569
26570 \f
26571 /* Compute the alignment given to a constant that is being placed in memory.
26572 EXP is the constant and ALIGN is the alignment that the object would
26573 ordinarily have.
26574 The value of this function is used instead of that alignment to align
26575 the object. */
26576
26577 int
26578 ix86_constant_alignment (tree exp, int align)
26579 {
26580 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26581 || TREE_CODE (exp) == INTEGER_CST)
26582 {
26583 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26584 return 64;
26585 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26586 return 128;
26587 }
26588 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26589 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26590 return BITS_PER_WORD;
26591
26592 return align;
26593 }
26594
26595 /* Compute the alignment for a static variable.
26596 TYPE is the data type, and ALIGN is the alignment that
26597 the object would ordinarily have. The value of this function is used
26598 instead of that alignment to align the object. */
26599
26600 int
26601 ix86_data_alignment (tree type, int align, bool opt)
26602 {
26603 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26604 for symbols from other compilation units or symbols that don't need
26605 to bind locally. In order to preserve some ABI compatibility with
26606 those compilers, ensure we don't decrease alignment from what we
26607 used to assume. */
26608
26609 int max_align_compat
26610 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26611
26612 /* A data structure, equal or greater than the size of a cache line
26613 (64 bytes in the Pentium 4 and other recent Intel processors, including
26614 processors based on Intel Core microarchitecture) should be aligned
26615 so that its base address is a multiple of a cache line size. */
26616
26617 int max_align
26618 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26619
26620 if (max_align < BITS_PER_WORD)
26621 max_align = BITS_PER_WORD;
26622
26623 if (opt
26624 && AGGREGATE_TYPE_P (type)
26625 && TYPE_SIZE (type)
26626 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26627 {
26628 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26629 && align < max_align_compat)
26630 align = max_align_compat;
26631 if (wi::geu_p (TYPE_SIZE (type), max_align)
26632 && align < max_align)
26633 align = max_align;
26634 }
26635
26636 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26637 to 16byte boundary. */
26638 if (TARGET_64BIT)
26639 {
26640 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26641 && TYPE_SIZE (type)
26642 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26643 && wi::geu_p (TYPE_SIZE (type), 128)
26644 && align < 128)
26645 return 128;
26646 }
26647
26648 if (!opt)
26649 return align;
26650
26651 if (TREE_CODE (type) == ARRAY_TYPE)
26652 {
26653 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26654 return 64;
26655 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26656 return 128;
26657 }
26658 else if (TREE_CODE (type) == COMPLEX_TYPE)
26659 {
26660
26661 if (TYPE_MODE (type) == DCmode && align < 64)
26662 return 64;
26663 if ((TYPE_MODE (type) == XCmode
26664 || TYPE_MODE (type) == TCmode) && align < 128)
26665 return 128;
26666 }
26667 else if ((TREE_CODE (type) == RECORD_TYPE
26668 || TREE_CODE (type) == UNION_TYPE
26669 || TREE_CODE (type) == QUAL_UNION_TYPE)
26670 && TYPE_FIELDS (type))
26671 {
26672 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26673 return 64;
26674 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26675 return 128;
26676 }
26677 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26678 || TREE_CODE (type) == INTEGER_TYPE)
26679 {
26680 if (TYPE_MODE (type) == DFmode && align < 64)
26681 return 64;
26682 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26683 return 128;
26684 }
26685
26686 return align;
26687 }
26688
26689 /* Compute the alignment for a local variable or a stack slot. EXP is
26690 the data type or decl itself, MODE is the widest mode available and
26691 ALIGN is the alignment that the object would ordinarily have. The
26692 value of this macro is used instead of that alignment to align the
26693 object. */
26694
26695 unsigned int
26696 ix86_local_alignment (tree exp, enum machine_mode mode,
26697 unsigned int align)
26698 {
26699 tree type, decl;
26700
26701 if (exp && DECL_P (exp))
26702 {
26703 type = TREE_TYPE (exp);
26704 decl = exp;
26705 }
26706 else
26707 {
26708 type = exp;
26709 decl = NULL;
26710 }
26711
26712 /* Don't do dynamic stack realignment for long long objects with
26713 -mpreferred-stack-boundary=2. */
26714 if (!TARGET_64BIT
26715 && align == 64
26716 && ix86_preferred_stack_boundary < 64
26717 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26718 && (!type || !TYPE_USER_ALIGN (type))
26719 && (!decl || !DECL_USER_ALIGN (decl)))
26720 align = 32;
26721
26722 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26723 register in MODE. We will return the largest alignment of XF
26724 and DF. */
26725 if (!type)
26726 {
26727 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26728 align = GET_MODE_ALIGNMENT (DFmode);
26729 return align;
26730 }
26731
26732 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26733 to 16byte boundary. Exact wording is:
26734
26735 An array uses the same alignment as its elements, except that a local or
26736 global array variable of length at least 16 bytes or
26737 a C99 variable-length array variable always has alignment of at least 16 bytes.
26738
26739 This was added to allow use of aligned SSE instructions at arrays. This
26740 rule is meant for static storage (where compiler can not do the analysis
26741 by itself). We follow it for automatic variables only when convenient.
26742 We fully control everything in the function compiled and functions from
26743 other unit can not rely on the alignment.
26744
26745 Exclude va_list type. It is the common case of local array where
26746 we can not benefit from the alignment.
26747
26748 TODO: Probably one should optimize for size only when var is not escaping. */
26749 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26750 && TARGET_SSE)
26751 {
26752 if (AGGREGATE_TYPE_P (type)
26753 && (va_list_type_node == NULL_TREE
26754 || (TYPE_MAIN_VARIANT (type)
26755 != TYPE_MAIN_VARIANT (va_list_type_node)))
26756 && TYPE_SIZE (type)
26757 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26758 && wi::geu_p (TYPE_SIZE (type), 16)
26759 && align < 128)
26760 return 128;
26761 }
26762 if (TREE_CODE (type) == ARRAY_TYPE)
26763 {
26764 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26765 return 64;
26766 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26767 return 128;
26768 }
26769 else if (TREE_CODE (type) == COMPLEX_TYPE)
26770 {
26771 if (TYPE_MODE (type) == DCmode && align < 64)
26772 return 64;
26773 if ((TYPE_MODE (type) == XCmode
26774 || TYPE_MODE (type) == TCmode) && align < 128)
26775 return 128;
26776 }
26777 else if ((TREE_CODE (type) == RECORD_TYPE
26778 || TREE_CODE (type) == UNION_TYPE
26779 || TREE_CODE (type) == QUAL_UNION_TYPE)
26780 && TYPE_FIELDS (type))
26781 {
26782 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26783 return 64;
26784 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26785 return 128;
26786 }
26787 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26788 || TREE_CODE (type) == INTEGER_TYPE)
26789 {
26790
26791 if (TYPE_MODE (type) == DFmode && align < 64)
26792 return 64;
26793 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26794 return 128;
26795 }
26796 return align;
26797 }
26798
26799 /* Compute the minimum required alignment for dynamic stack realignment
26800 purposes for a local variable, parameter or a stack slot. EXP is
26801 the data type or decl itself, MODE is its mode and ALIGN is the
26802 alignment that the object would ordinarily have. */
26803
26804 unsigned int
26805 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26806 unsigned int align)
26807 {
26808 tree type, decl;
26809
26810 if (exp && DECL_P (exp))
26811 {
26812 type = TREE_TYPE (exp);
26813 decl = exp;
26814 }
26815 else
26816 {
26817 type = exp;
26818 decl = NULL;
26819 }
26820
26821 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26822 return align;
26823
26824 /* Don't do dynamic stack realignment for long long objects with
26825 -mpreferred-stack-boundary=2. */
26826 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26827 && (!type || !TYPE_USER_ALIGN (type))
26828 && (!decl || !DECL_USER_ALIGN (decl)))
26829 return 32;
26830
26831 return align;
26832 }
26833 \f
26834 /* Find a location for the static chain incoming to a nested function.
26835 This is a register, unless all free registers are used by arguments. */
26836
26837 static rtx
26838 ix86_static_chain (const_tree fndecl, bool incoming_p)
26839 {
26840 unsigned regno;
26841
26842 if (!DECL_STATIC_CHAIN (fndecl))
26843 return NULL;
26844
26845 if (TARGET_64BIT)
26846 {
26847 /* We always use R10 in 64-bit mode. */
26848 regno = R10_REG;
26849 }
26850 else
26851 {
26852 tree fntype;
26853 unsigned int ccvt;
26854
26855 /* By default in 32-bit mode we use ECX to pass the static chain. */
26856 regno = CX_REG;
26857
26858 fntype = TREE_TYPE (fndecl);
26859 ccvt = ix86_get_callcvt (fntype);
26860 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26861 {
26862 /* Fastcall functions use ecx/edx for arguments, which leaves
26863 us with EAX for the static chain.
26864 Thiscall functions use ecx for arguments, which also
26865 leaves us with EAX for the static chain. */
26866 regno = AX_REG;
26867 }
26868 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26869 {
26870 /* Thiscall functions use ecx for arguments, which leaves
26871 us with EAX and EDX for the static chain.
26872 We are using for abi-compatibility EAX. */
26873 regno = AX_REG;
26874 }
26875 else if (ix86_function_regparm (fntype, fndecl) == 3)
26876 {
26877 /* For regparm 3, we have no free call-clobbered registers in
26878 which to store the static chain. In order to implement this,
26879 we have the trampoline push the static chain to the stack.
26880 However, we can't push a value below the return address when
26881 we call the nested function directly, so we have to use an
26882 alternate entry point. For this we use ESI, and have the
26883 alternate entry point push ESI, so that things appear the
26884 same once we're executing the nested function. */
26885 if (incoming_p)
26886 {
26887 if (fndecl == current_function_decl)
26888 ix86_static_chain_on_stack = true;
26889 return gen_frame_mem (SImode,
26890 plus_constant (Pmode,
26891 arg_pointer_rtx, -8));
26892 }
26893 regno = SI_REG;
26894 }
26895 }
26896
26897 return gen_rtx_REG (Pmode, regno);
26898 }
26899
26900 /* Emit RTL insns to initialize the variable parts of a trampoline.
26901 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26902 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26903 to be passed to the target function. */
26904
26905 static void
26906 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26907 {
26908 rtx mem, fnaddr;
26909 int opcode;
26910 int offset = 0;
26911
26912 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26913
26914 if (TARGET_64BIT)
26915 {
26916 int size;
26917
26918 /* Load the function address to r11. Try to load address using
26919 the shorter movl instead of movabs. We may want to support
26920 movq for kernel mode, but kernel does not use trampolines at
26921 the moment. FNADDR is a 32bit address and may not be in
26922 DImode when ptr_mode == SImode. Always use movl in this
26923 case. */
26924 if (ptr_mode == SImode
26925 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26926 {
26927 fnaddr = copy_addr_to_reg (fnaddr);
26928
26929 mem = adjust_address (m_tramp, HImode, offset);
26930 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26931
26932 mem = adjust_address (m_tramp, SImode, offset + 2);
26933 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26934 offset += 6;
26935 }
26936 else
26937 {
26938 mem = adjust_address (m_tramp, HImode, offset);
26939 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26940
26941 mem = adjust_address (m_tramp, DImode, offset + 2);
26942 emit_move_insn (mem, fnaddr);
26943 offset += 10;
26944 }
26945
26946 /* Load static chain using movabs to r10. Use the shorter movl
26947 instead of movabs when ptr_mode == SImode. */
26948 if (ptr_mode == SImode)
26949 {
26950 opcode = 0xba41;
26951 size = 6;
26952 }
26953 else
26954 {
26955 opcode = 0xba49;
26956 size = 10;
26957 }
26958
26959 mem = adjust_address (m_tramp, HImode, offset);
26960 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26961
26962 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26963 emit_move_insn (mem, chain_value);
26964 offset += size;
26965
26966 /* Jump to r11; the last (unused) byte is a nop, only there to
26967 pad the write out to a single 32-bit store. */
26968 mem = adjust_address (m_tramp, SImode, offset);
26969 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26970 offset += 4;
26971 }
26972 else
26973 {
26974 rtx disp, chain;
26975
26976 /* Depending on the static chain location, either load a register
26977 with a constant, or push the constant to the stack. All of the
26978 instructions are the same size. */
26979 chain = ix86_static_chain (fndecl, true);
26980 if (REG_P (chain))
26981 {
26982 switch (REGNO (chain))
26983 {
26984 case AX_REG:
26985 opcode = 0xb8; break;
26986 case CX_REG:
26987 opcode = 0xb9; break;
26988 default:
26989 gcc_unreachable ();
26990 }
26991 }
26992 else
26993 opcode = 0x68;
26994
26995 mem = adjust_address (m_tramp, QImode, offset);
26996 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26997
26998 mem = adjust_address (m_tramp, SImode, offset + 1);
26999 emit_move_insn (mem, chain_value);
27000 offset += 5;
27001
27002 mem = adjust_address (m_tramp, QImode, offset);
27003 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27004
27005 mem = adjust_address (m_tramp, SImode, offset + 1);
27006
27007 /* Compute offset from the end of the jmp to the target function.
27008 In the case in which the trampoline stores the static chain on
27009 the stack, we need to skip the first insn which pushes the
27010 (call-saved) register static chain; this push is 1 byte. */
27011 offset += 5;
27012 disp = expand_binop (SImode, sub_optab, fnaddr,
27013 plus_constant (Pmode, XEXP (m_tramp, 0),
27014 offset - (MEM_P (chain) ? 1 : 0)),
27015 NULL_RTX, 1, OPTAB_DIRECT);
27016 emit_move_insn (mem, disp);
27017 }
27018
27019 gcc_assert (offset <= TRAMPOLINE_SIZE);
27020
27021 #ifdef HAVE_ENABLE_EXECUTE_STACK
27022 #ifdef CHECK_EXECUTE_STACK_ENABLED
27023 if (CHECK_EXECUTE_STACK_ENABLED)
27024 #endif
27025 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27026 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27027 #endif
27028 }
27029 \f
27030 /* The following file contains several enumerations and data structures
27031 built from the definitions in i386-builtin-types.def. */
27032
27033 #include "i386-builtin-types.inc"
27034
27035 /* Table for the ix86 builtin non-function types. */
27036 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27037
27038 /* Retrieve an element from the above table, building some of
27039 the types lazily. */
27040
27041 static tree
27042 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27043 {
27044 unsigned int index;
27045 tree type, itype;
27046
27047 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27048
27049 type = ix86_builtin_type_tab[(int) tcode];
27050 if (type != NULL)
27051 return type;
27052
27053 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27054 if (tcode <= IX86_BT_LAST_VECT)
27055 {
27056 enum machine_mode mode;
27057
27058 index = tcode - IX86_BT_LAST_PRIM - 1;
27059 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27060 mode = ix86_builtin_type_vect_mode[index];
27061
27062 type = build_vector_type_for_mode (itype, mode);
27063 }
27064 else
27065 {
27066 int quals;
27067
27068 index = tcode - IX86_BT_LAST_VECT - 1;
27069 if (tcode <= IX86_BT_LAST_PTR)
27070 quals = TYPE_UNQUALIFIED;
27071 else
27072 quals = TYPE_QUAL_CONST;
27073
27074 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27075 if (quals != TYPE_UNQUALIFIED)
27076 itype = build_qualified_type (itype, quals);
27077
27078 type = build_pointer_type (itype);
27079 }
27080
27081 ix86_builtin_type_tab[(int) tcode] = type;
27082 return type;
27083 }
27084
27085 /* Table for the ix86 builtin function types. */
27086 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27087
27088 /* Retrieve an element from the above table, building some of
27089 the types lazily. */
27090
27091 static tree
27092 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27093 {
27094 tree type;
27095
27096 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27097
27098 type = ix86_builtin_func_type_tab[(int) tcode];
27099 if (type != NULL)
27100 return type;
27101
27102 if (tcode <= IX86_BT_LAST_FUNC)
27103 {
27104 unsigned start = ix86_builtin_func_start[(int) tcode];
27105 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27106 tree rtype, atype, args = void_list_node;
27107 unsigned i;
27108
27109 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27110 for (i = after - 1; i > start; --i)
27111 {
27112 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27113 args = tree_cons (NULL, atype, args);
27114 }
27115
27116 type = build_function_type (rtype, args);
27117 }
27118 else
27119 {
27120 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27121 enum ix86_builtin_func_type icode;
27122
27123 icode = ix86_builtin_func_alias_base[index];
27124 type = ix86_get_builtin_func_type (icode);
27125 }
27126
27127 ix86_builtin_func_type_tab[(int) tcode] = type;
27128 return type;
27129 }
27130
27131
27132 /* Codes for all the SSE/MMX builtins. */
27133 enum ix86_builtins
27134 {
27135 IX86_BUILTIN_ADDPS,
27136 IX86_BUILTIN_ADDSS,
27137 IX86_BUILTIN_DIVPS,
27138 IX86_BUILTIN_DIVSS,
27139 IX86_BUILTIN_MULPS,
27140 IX86_BUILTIN_MULSS,
27141 IX86_BUILTIN_SUBPS,
27142 IX86_BUILTIN_SUBSS,
27143
27144 IX86_BUILTIN_CMPEQPS,
27145 IX86_BUILTIN_CMPLTPS,
27146 IX86_BUILTIN_CMPLEPS,
27147 IX86_BUILTIN_CMPGTPS,
27148 IX86_BUILTIN_CMPGEPS,
27149 IX86_BUILTIN_CMPNEQPS,
27150 IX86_BUILTIN_CMPNLTPS,
27151 IX86_BUILTIN_CMPNLEPS,
27152 IX86_BUILTIN_CMPNGTPS,
27153 IX86_BUILTIN_CMPNGEPS,
27154 IX86_BUILTIN_CMPORDPS,
27155 IX86_BUILTIN_CMPUNORDPS,
27156 IX86_BUILTIN_CMPEQSS,
27157 IX86_BUILTIN_CMPLTSS,
27158 IX86_BUILTIN_CMPLESS,
27159 IX86_BUILTIN_CMPNEQSS,
27160 IX86_BUILTIN_CMPNLTSS,
27161 IX86_BUILTIN_CMPNLESS,
27162 IX86_BUILTIN_CMPORDSS,
27163 IX86_BUILTIN_CMPUNORDSS,
27164
27165 IX86_BUILTIN_COMIEQSS,
27166 IX86_BUILTIN_COMILTSS,
27167 IX86_BUILTIN_COMILESS,
27168 IX86_BUILTIN_COMIGTSS,
27169 IX86_BUILTIN_COMIGESS,
27170 IX86_BUILTIN_COMINEQSS,
27171 IX86_BUILTIN_UCOMIEQSS,
27172 IX86_BUILTIN_UCOMILTSS,
27173 IX86_BUILTIN_UCOMILESS,
27174 IX86_BUILTIN_UCOMIGTSS,
27175 IX86_BUILTIN_UCOMIGESS,
27176 IX86_BUILTIN_UCOMINEQSS,
27177
27178 IX86_BUILTIN_CVTPI2PS,
27179 IX86_BUILTIN_CVTPS2PI,
27180 IX86_BUILTIN_CVTSI2SS,
27181 IX86_BUILTIN_CVTSI642SS,
27182 IX86_BUILTIN_CVTSS2SI,
27183 IX86_BUILTIN_CVTSS2SI64,
27184 IX86_BUILTIN_CVTTPS2PI,
27185 IX86_BUILTIN_CVTTSS2SI,
27186 IX86_BUILTIN_CVTTSS2SI64,
27187
27188 IX86_BUILTIN_MAXPS,
27189 IX86_BUILTIN_MAXSS,
27190 IX86_BUILTIN_MINPS,
27191 IX86_BUILTIN_MINSS,
27192
27193 IX86_BUILTIN_LOADUPS,
27194 IX86_BUILTIN_STOREUPS,
27195 IX86_BUILTIN_MOVSS,
27196
27197 IX86_BUILTIN_MOVHLPS,
27198 IX86_BUILTIN_MOVLHPS,
27199 IX86_BUILTIN_LOADHPS,
27200 IX86_BUILTIN_LOADLPS,
27201 IX86_BUILTIN_STOREHPS,
27202 IX86_BUILTIN_STORELPS,
27203
27204 IX86_BUILTIN_MASKMOVQ,
27205 IX86_BUILTIN_MOVMSKPS,
27206 IX86_BUILTIN_PMOVMSKB,
27207
27208 IX86_BUILTIN_MOVNTPS,
27209 IX86_BUILTIN_MOVNTQ,
27210
27211 IX86_BUILTIN_LOADDQU,
27212 IX86_BUILTIN_STOREDQU,
27213
27214 IX86_BUILTIN_PACKSSWB,
27215 IX86_BUILTIN_PACKSSDW,
27216 IX86_BUILTIN_PACKUSWB,
27217
27218 IX86_BUILTIN_PADDB,
27219 IX86_BUILTIN_PADDW,
27220 IX86_BUILTIN_PADDD,
27221 IX86_BUILTIN_PADDQ,
27222 IX86_BUILTIN_PADDSB,
27223 IX86_BUILTIN_PADDSW,
27224 IX86_BUILTIN_PADDUSB,
27225 IX86_BUILTIN_PADDUSW,
27226 IX86_BUILTIN_PSUBB,
27227 IX86_BUILTIN_PSUBW,
27228 IX86_BUILTIN_PSUBD,
27229 IX86_BUILTIN_PSUBQ,
27230 IX86_BUILTIN_PSUBSB,
27231 IX86_BUILTIN_PSUBSW,
27232 IX86_BUILTIN_PSUBUSB,
27233 IX86_BUILTIN_PSUBUSW,
27234
27235 IX86_BUILTIN_PAND,
27236 IX86_BUILTIN_PANDN,
27237 IX86_BUILTIN_POR,
27238 IX86_BUILTIN_PXOR,
27239
27240 IX86_BUILTIN_PAVGB,
27241 IX86_BUILTIN_PAVGW,
27242
27243 IX86_BUILTIN_PCMPEQB,
27244 IX86_BUILTIN_PCMPEQW,
27245 IX86_BUILTIN_PCMPEQD,
27246 IX86_BUILTIN_PCMPGTB,
27247 IX86_BUILTIN_PCMPGTW,
27248 IX86_BUILTIN_PCMPGTD,
27249
27250 IX86_BUILTIN_PMADDWD,
27251
27252 IX86_BUILTIN_PMAXSW,
27253 IX86_BUILTIN_PMAXUB,
27254 IX86_BUILTIN_PMINSW,
27255 IX86_BUILTIN_PMINUB,
27256
27257 IX86_BUILTIN_PMULHUW,
27258 IX86_BUILTIN_PMULHW,
27259 IX86_BUILTIN_PMULLW,
27260
27261 IX86_BUILTIN_PSADBW,
27262 IX86_BUILTIN_PSHUFW,
27263
27264 IX86_BUILTIN_PSLLW,
27265 IX86_BUILTIN_PSLLD,
27266 IX86_BUILTIN_PSLLQ,
27267 IX86_BUILTIN_PSRAW,
27268 IX86_BUILTIN_PSRAD,
27269 IX86_BUILTIN_PSRLW,
27270 IX86_BUILTIN_PSRLD,
27271 IX86_BUILTIN_PSRLQ,
27272 IX86_BUILTIN_PSLLWI,
27273 IX86_BUILTIN_PSLLDI,
27274 IX86_BUILTIN_PSLLQI,
27275 IX86_BUILTIN_PSRAWI,
27276 IX86_BUILTIN_PSRADI,
27277 IX86_BUILTIN_PSRLWI,
27278 IX86_BUILTIN_PSRLDI,
27279 IX86_BUILTIN_PSRLQI,
27280
27281 IX86_BUILTIN_PUNPCKHBW,
27282 IX86_BUILTIN_PUNPCKHWD,
27283 IX86_BUILTIN_PUNPCKHDQ,
27284 IX86_BUILTIN_PUNPCKLBW,
27285 IX86_BUILTIN_PUNPCKLWD,
27286 IX86_BUILTIN_PUNPCKLDQ,
27287
27288 IX86_BUILTIN_SHUFPS,
27289
27290 IX86_BUILTIN_RCPPS,
27291 IX86_BUILTIN_RCPSS,
27292 IX86_BUILTIN_RSQRTPS,
27293 IX86_BUILTIN_RSQRTPS_NR,
27294 IX86_BUILTIN_RSQRTSS,
27295 IX86_BUILTIN_RSQRTF,
27296 IX86_BUILTIN_SQRTPS,
27297 IX86_BUILTIN_SQRTPS_NR,
27298 IX86_BUILTIN_SQRTSS,
27299
27300 IX86_BUILTIN_UNPCKHPS,
27301 IX86_BUILTIN_UNPCKLPS,
27302
27303 IX86_BUILTIN_ANDPS,
27304 IX86_BUILTIN_ANDNPS,
27305 IX86_BUILTIN_ORPS,
27306 IX86_BUILTIN_XORPS,
27307
27308 IX86_BUILTIN_EMMS,
27309 IX86_BUILTIN_LDMXCSR,
27310 IX86_BUILTIN_STMXCSR,
27311 IX86_BUILTIN_SFENCE,
27312
27313 IX86_BUILTIN_FXSAVE,
27314 IX86_BUILTIN_FXRSTOR,
27315 IX86_BUILTIN_FXSAVE64,
27316 IX86_BUILTIN_FXRSTOR64,
27317
27318 IX86_BUILTIN_XSAVE,
27319 IX86_BUILTIN_XRSTOR,
27320 IX86_BUILTIN_XSAVE64,
27321 IX86_BUILTIN_XRSTOR64,
27322
27323 IX86_BUILTIN_XSAVEOPT,
27324 IX86_BUILTIN_XSAVEOPT64,
27325
27326 IX86_BUILTIN_XSAVEC,
27327 IX86_BUILTIN_XSAVEC64,
27328
27329 IX86_BUILTIN_XSAVES,
27330 IX86_BUILTIN_XRSTORS,
27331 IX86_BUILTIN_XSAVES64,
27332 IX86_BUILTIN_XRSTORS64,
27333
27334 /* 3DNow! Original */
27335 IX86_BUILTIN_FEMMS,
27336 IX86_BUILTIN_PAVGUSB,
27337 IX86_BUILTIN_PF2ID,
27338 IX86_BUILTIN_PFACC,
27339 IX86_BUILTIN_PFADD,
27340 IX86_BUILTIN_PFCMPEQ,
27341 IX86_BUILTIN_PFCMPGE,
27342 IX86_BUILTIN_PFCMPGT,
27343 IX86_BUILTIN_PFMAX,
27344 IX86_BUILTIN_PFMIN,
27345 IX86_BUILTIN_PFMUL,
27346 IX86_BUILTIN_PFRCP,
27347 IX86_BUILTIN_PFRCPIT1,
27348 IX86_BUILTIN_PFRCPIT2,
27349 IX86_BUILTIN_PFRSQIT1,
27350 IX86_BUILTIN_PFRSQRT,
27351 IX86_BUILTIN_PFSUB,
27352 IX86_BUILTIN_PFSUBR,
27353 IX86_BUILTIN_PI2FD,
27354 IX86_BUILTIN_PMULHRW,
27355
27356 /* 3DNow! Athlon Extensions */
27357 IX86_BUILTIN_PF2IW,
27358 IX86_BUILTIN_PFNACC,
27359 IX86_BUILTIN_PFPNACC,
27360 IX86_BUILTIN_PI2FW,
27361 IX86_BUILTIN_PSWAPDSI,
27362 IX86_BUILTIN_PSWAPDSF,
27363
27364 /* SSE2 */
27365 IX86_BUILTIN_ADDPD,
27366 IX86_BUILTIN_ADDSD,
27367 IX86_BUILTIN_DIVPD,
27368 IX86_BUILTIN_DIVSD,
27369 IX86_BUILTIN_MULPD,
27370 IX86_BUILTIN_MULSD,
27371 IX86_BUILTIN_SUBPD,
27372 IX86_BUILTIN_SUBSD,
27373
27374 IX86_BUILTIN_CMPEQPD,
27375 IX86_BUILTIN_CMPLTPD,
27376 IX86_BUILTIN_CMPLEPD,
27377 IX86_BUILTIN_CMPGTPD,
27378 IX86_BUILTIN_CMPGEPD,
27379 IX86_BUILTIN_CMPNEQPD,
27380 IX86_BUILTIN_CMPNLTPD,
27381 IX86_BUILTIN_CMPNLEPD,
27382 IX86_BUILTIN_CMPNGTPD,
27383 IX86_BUILTIN_CMPNGEPD,
27384 IX86_BUILTIN_CMPORDPD,
27385 IX86_BUILTIN_CMPUNORDPD,
27386 IX86_BUILTIN_CMPEQSD,
27387 IX86_BUILTIN_CMPLTSD,
27388 IX86_BUILTIN_CMPLESD,
27389 IX86_BUILTIN_CMPNEQSD,
27390 IX86_BUILTIN_CMPNLTSD,
27391 IX86_BUILTIN_CMPNLESD,
27392 IX86_BUILTIN_CMPORDSD,
27393 IX86_BUILTIN_CMPUNORDSD,
27394
27395 IX86_BUILTIN_COMIEQSD,
27396 IX86_BUILTIN_COMILTSD,
27397 IX86_BUILTIN_COMILESD,
27398 IX86_BUILTIN_COMIGTSD,
27399 IX86_BUILTIN_COMIGESD,
27400 IX86_BUILTIN_COMINEQSD,
27401 IX86_BUILTIN_UCOMIEQSD,
27402 IX86_BUILTIN_UCOMILTSD,
27403 IX86_BUILTIN_UCOMILESD,
27404 IX86_BUILTIN_UCOMIGTSD,
27405 IX86_BUILTIN_UCOMIGESD,
27406 IX86_BUILTIN_UCOMINEQSD,
27407
27408 IX86_BUILTIN_MAXPD,
27409 IX86_BUILTIN_MAXSD,
27410 IX86_BUILTIN_MINPD,
27411 IX86_BUILTIN_MINSD,
27412
27413 IX86_BUILTIN_ANDPD,
27414 IX86_BUILTIN_ANDNPD,
27415 IX86_BUILTIN_ORPD,
27416 IX86_BUILTIN_XORPD,
27417
27418 IX86_BUILTIN_SQRTPD,
27419 IX86_BUILTIN_SQRTSD,
27420
27421 IX86_BUILTIN_UNPCKHPD,
27422 IX86_BUILTIN_UNPCKLPD,
27423
27424 IX86_BUILTIN_SHUFPD,
27425
27426 IX86_BUILTIN_LOADUPD,
27427 IX86_BUILTIN_STOREUPD,
27428 IX86_BUILTIN_MOVSD,
27429
27430 IX86_BUILTIN_LOADHPD,
27431 IX86_BUILTIN_LOADLPD,
27432
27433 IX86_BUILTIN_CVTDQ2PD,
27434 IX86_BUILTIN_CVTDQ2PS,
27435
27436 IX86_BUILTIN_CVTPD2DQ,
27437 IX86_BUILTIN_CVTPD2PI,
27438 IX86_BUILTIN_CVTPD2PS,
27439 IX86_BUILTIN_CVTTPD2DQ,
27440 IX86_BUILTIN_CVTTPD2PI,
27441
27442 IX86_BUILTIN_CVTPI2PD,
27443 IX86_BUILTIN_CVTSI2SD,
27444 IX86_BUILTIN_CVTSI642SD,
27445
27446 IX86_BUILTIN_CVTSD2SI,
27447 IX86_BUILTIN_CVTSD2SI64,
27448 IX86_BUILTIN_CVTSD2SS,
27449 IX86_BUILTIN_CVTSS2SD,
27450 IX86_BUILTIN_CVTTSD2SI,
27451 IX86_BUILTIN_CVTTSD2SI64,
27452
27453 IX86_BUILTIN_CVTPS2DQ,
27454 IX86_BUILTIN_CVTPS2PD,
27455 IX86_BUILTIN_CVTTPS2DQ,
27456
27457 IX86_BUILTIN_MOVNTI,
27458 IX86_BUILTIN_MOVNTI64,
27459 IX86_BUILTIN_MOVNTPD,
27460 IX86_BUILTIN_MOVNTDQ,
27461
27462 IX86_BUILTIN_MOVQ128,
27463
27464 /* SSE2 MMX */
27465 IX86_BUILTIN_MASKMOVDQU,
27466 IX86_BUILTIN_MOVMSKPD,
27467 IX86_BUILTIN_PMOVMSKB128,
27468
27469 IX86_BUILTIN_PACKSSWB128,
27470 IX86_BUILTIN_PACKSSDW128,
27471 IX86_BUILTIN_PACKUSWB128,
27472
27473 IX86_BUILTIN_PADDB128,
27474 IX86_BUILTIN_PADDW128,
27475 IX86_BUILTIN_PADDD128,
27476 IX86_BUILTIN_PADDQ128,
27477 IX86_BUILTIN_PADDSB128,
27478 IX86_BUILTIN_PADDSW128,
27479 IX86_BUILTIN_PADDUSB128,
27480 IX86_BUILTIN_PADDUSW128,
27481 IX86_BUILTIN_PSUBB128,
27482 IX86_BUILTIN_PSUBW128,
27483 IX86_BUILTIN_PSUBD128,
27484 IX86_BUILTIN_PSUBQ128,
27485 IX86_BUILTIN_PSUBSB128,
27486 IX86_BUILTIN_PSUBSW128,
27487 IX86_BUILTIN_PSUBUSB128,
27488 IX86_BUILTIN_PSUBUSW128,
27489
27490 IX86_BUILTIN_PAND128,
27491 IX86_BUILTIN_PANDN128,
27492 IX86_BUILTIN_POR128,
27493 IX86_BUILTIN_PXOR128,
27494
27495 IX86_BUILTIN_PAVGB128,
27496 IX86_BUILTIN_PAVGW128,
27497
27498 IX86_BUILTIN_PCMPEQB128,
27499 IX86_BUILTIN_PCMPEQW128,
27500 IX86_BUILTIN_PCMPEQD128,
27501 IX86_BUILTIN_PCMPGTB128,
27502 IX86_BUILTIN_PCMPGTW128,
27503 IX86_BUILTIN_PCMPGTD128,
27504
27505 IX86_BUILTIN_PMADDWD128,
27506
27507 IX86_BUILTIN_PMAXSW128,
27508 IX86_BUILTIN_PMAXUB128,
27509 IX86_BUILTIN_PMINSW128,
27510 IX86_BUILTIN_PMINUB128,
27511
27512 IX86_BUILTIN_PMULUDQ,
27513 IX86_BUILTIN_PMULUDQ128,
27514 IX86_BUILTIN_PMULHUW128,
27515 IX86_BUILTIN_PMULHW128,
27516 IX86_BUILTIN_PMULLW128,
27517
27518 IX86_BUILTIN_PSADBW128,
27519 IX86_BUILTIN_PSHUFHW,
27520 IX86_BUILTIN_PSHUFLW,
27521 IX86_BUILTIN_PSHUFD,
27522
27523 IX86_BUILTIN_PSLLDQI128,
27524 IX86_BUILTIN_PSLLWI128,
27525 IX86_BUILTIN_PSLLDI128,
27526 IX86_BUILTIN_PSLLQI128,
27527 IX86_BUILTIN_PSRAWI128,
27528 IX86_BUILTIN_PSRADI128,
27529 IX86_BUILTIN_PSRLDQI128,
27530 IX86_BUILTIN_PSRLWI128,
27531 IX86_BUILTIN_PSRLDI128,
27532 IX86_BUILTIN_PSRLQI128,
27533
27534 IX86_BUILTIN_PSLLDQ128,
27535 IX86_BUILTIN_PSLLW128,
27536 IX86_BUILTIN_PSLLD128,
27537 IX86_BUILTIN_PSLLQ128,
27538 IX86_BUILTIN_PSRAW128,
27539 IX86_BUILTIN_PSRAD128,
27540 IX86_BUILTIN_PSRLW128,
27541 IX86_BUILTIN_PSRLD128,
27542 IX86_BUILTIN_PSRLQ128,
27543
27544 IX86_BUILTIN_PUNPCKHBW128,
27545 IX86_BUILTIN_PUNPCKHWD128,
27546 IX86_BUILTIN_PUNPCKHDQ128,
27547 IX86_BUILTIN_PUNPCKHQDQ128,
27548 IX86_BUILTIN_PUNPCKLBW128,
27549 IX86_BUILTIN_PUNPCKLWD128,
27550 IX86_BUILTIN_PUNPCKLDQ128,
27551 IX86_BUILTIN_PUNPCKLQDQ128,
27552
27553 IX86_BUILTIN_CLFLUSH,
27554 IX86_BUILTIN_MFENCE,
27555 IX86_BUILTIN_LFENCE,
27556 IX86_BUILTIN_PAUSE,
27557
27558 IX86_BUILTIN_FNSTENV,
27559 IX86_BUILTIN_FLDENV,
27560 IX86_BUILTIN_FNSTSW,
27561 IX86_BUILTIN_FNCLEX,
27562
27563 IX86_BUILTIN_BSRSI,
27564 IX86_BUILTIN_BSRDI,
27565 IX86_BUILTIN_RDPMC,
27566 IX86_BUILTIN_RDTSC,
27567 IX86_BUILTIN_RDTSCP,
27568 IX86_BUILTIN_ROLQI,
27569 IX86_BUILTIN_ROLHI,
27570 IX86_BUILTIN_RORQI,
27571 IX86_BUILTIN_RORHI,
27572
27573 /* SSE3. */
27574 IX86_BUILTIN_ADDSUBPS,
27575 IX86_BUILTIN_HADDPS,
27576 IX86_BUILTIN_HSUBPS,
27577 IX86_BUILTIN_MOVSHDUP,
27578 IX86_BUILTIN_MOVSLDUP,
27579 IX86_BUILTIN_ADDSUBPD,
27580 IX86_BUILTIN_HADDPD,
27581 IX86_BUILTIN_HSUBPD,
27582 IX86_BUILTIN_LDDQU,
27583
27584 IX86_BUILTIN_MONITOR,
27585 IX86_BUILTIN_MWAIT,
27586
27587 /* SSSE3. */
27588 IX86_BUILTIN_PHADDW,
27589 IX86_BUILTIN_PHADDD,
27590 IX86_BUILTIN_PHADDSW,
27591 IX86_BUILTIN_PHSUBW,
27592 IX86_BUILTIN_PHSUBD,
27593 IX86_BUILTIN_PHSUBSW,
27594 IX86_BUILTIN_PMADDUBSW,
27595 IX86_BUILTIN_PMULHRSW,
27596 IX86_BUILTIN_PSHUFB,
27597 IX86_BUILTIN_PSIGNB,
27598 IX86_BUILTIN_PSIGNW,
27599 IX86_BUILTIN_PSIGND,
27600 IX86_BUILTIN_PALIGNR,
27601 IX86_BUILTIN_PABSB,
27602 IX86_BUILTIN_PABSW,
27603 IX86_BUILTIN_PABSD,
27604
27605 IX86_BUILTIN_PHADDW128,
27606 IX86_BUILTIN_PHADDD128,
27607 IX86_BUILTIN_PHADDSW128,
27608 IX86_BUILTIN_PHSUBW128,
27609 IX86_BUILTIN_PHSUBD128,
27610 IX86_BUILTIN_PHSUBSW128,
27611 IX86_BUILTIN_PMADDUBSW128,
27612 IX86_BUILTIN_PMULHRSW128,
27613 IX86_BUILTIN_PSHUFB128,
27614 IX86_BUILTIN_PSIGNB128,
27615 IX86_BUILTIN_PSIGNW128,
27616 IX86_BUILTIN_PSIGND128,
27617 IX86_BUILTIN_PALIGNR128,
27618 IX86_BUILTIN_PABSB128,
27619 IX86_BUILTIN_PABSW128,
27620 IX86_BUILTIN_PABSD128,
27621
27622 /* AMDFAM10 - SSE4A New Instructions. */
27623 IX86_BUILTIN_MOVNTSD,
27624 IX86_BUILTIN_MOVNTSS,
27625 IX86_BUILTIN_EXTRQI,
27626 IX86_BUILTIN_EXTRQ,
27627 IX86_BUILTIN_INSERTQI,
27628 IX86_BUILTIN_INSERTQ,
27629
27630 /* SSE4.1. */
27631 IX86_BUILTIN_BLENDPD,
27632 IX86_BUILTIN_BLENDPS,
27633 IX86_BUILTIN_BLENDVPD,
27634 IX86_BUILTIN_BLENDVPS,
27635 IX86_BUILTIN_PBLENDVB128,
27636 IX86_BUILTIN_PBLENDW128,
27637
27638 IX86_BUILTIN_DPPD,
27639 IX86_BUILTIN_DPPS,
27640
27641 IX86_BUILTIN_INSERTPS128,
27642
27643 IX86_BUILTIN_MOVNTDQA,
27644 IX86_BUILTIN_MPSADBW128,
27645 IX86_BUILTIN_PACKUSDW128,
27646 IX86_BUILTIN_PCMPEQQ,
27647 IX86_BUILTIN_PHMINPOSUW128,
27648
27649 IX86_BUILTIN_PMAXSB128,
27650 IX86_BUILTIN_PMAXSD128,
27651 IX86_BUILTIN_PMAXUD128,
27652 IX86_BUILTIN_PMAXUW128,
27653
27654 IX86_BUILTIN_PMINSB128,
27655 IX86_BUILTIN_PMINSD128,
27656 IX86_BUILTIN_PMINUD128,
27657 IX86_BUILTIN_PMINUW128,
27658
27659 IX86_BUILTIN_PMOVSXBW128,
27660 IX86_BUILTIN_PMOVSXBD128,
27661 IX86_BUILTIN_PMOVSXBQ128,
27662 IX86_BUILTIN_PMOVSXWD128,
27663 IX86_BUILTIN_PMOVSXWQ128,
27664 IX86_BUILTIN_PMOVSXDQ128,
27665
27666 IX86_BUILTIN_PMOVZXBW128,
27667 IX86_BUILTIN_PMOVZXBD128,
27668 IX86_BUILTIN_PMOVZXBQ128,
27669 IX86_BUILTIN_PMOVZXWD128,
27670 IX86_BUILTIN_PMOVZXWQ128,
27671 IX86_BUILTIN_PMOVZXDQ128,
27672
27673 IX86_BUILTIN_PMULDQ128,
27674 IX86_BUILTIN_PMULLD128,
27675
27676 IX86_BUILTIN_ROUNDSD,
27677 IX86_BUILTIN_ROUNDSS,
27678
27679 IX86_BUILTIN_ROUNDPD,
27680 IX86_BUILTIN_ROUNDPS,
27681
27682 IX86_BUILTIN_FLOORPD,
27683 IX86_BUILTIN_CEILPD,
27684 IX86_BUILTIN_TRUNCPD,
27685 IX86_BUILTIN_RINTPD,
27686 IX86_BUILTIN_ROUNDPD_AZ,
27687
27688 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27689 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27690 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27691
27692 IX86_BUILTIN_FLOORPS,
27693 IX86_BUILTIN_CEILPS,
27694 IX86_BUILTIN_TRUNCPS,
27695 IX86_BUILTIN_RINTPS,
27696 IX86_BUILTIN_ROUNDPS_AZ,
27697
27698 IX86_BUILTIN_FLOORPS_SFIX,
27699 IX86_BUILTIN_CEILPS_SFIX,
27700 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27701
27702 IX86_BUILTIN_PTESTZ,
27703 IX86_BUILTIN_PTESTC,
27704 IX86_BUILTIN_PTESTNZC,
27705
27706 IX86_BUILTIN_VEC_INIT_V2SI,
27707 IX86_BUILTIN_VEC_INIT_V4HI,
27708 IX86_BUILTIN_VEC_INIT_V8QI,
27709 IX86_BUILTIN_VEC_EXT_V2DF,
27710 IX86_BUILTIN_VEC_EXT_V2DI,
27711 IX86_BUILTIN_VEC_EXT_V4SF,
27712 IX86_BUILTIN_VEC_EXT_V4SI,
27713 IX86_BUILTIN_VEC_EXT_V8HI,
27714 IX86_BUILTIN_VEC_EXT_V2SI,
27715 IX86_BUILTIN_VEC_EXT_V4HI,
27716 IX86_BUILTIN_VEC_EXT_V16QI,
27717 IX86_BUILTIN_VEC_SET_V2DI,
27718 IX86_BUILTIN_VEC_SET_V4SF,
27719 IX86_BUILTIN_VEC_SET_V4SI,
27720 IX86_BUILTIN_VEC_SET_V8HI,
27721 IX86_BUILTIN_VEC_SET_V4HI,
27722 IX86_BUILTIN_VEC_SET_V16QI,
27723
27724 IX86_BUILTIN_VEC_PACK_SFIX,
27725 IX86_BUILTIN_VEC_PACK_SFIX256,
27726
27727 /* SSE4.2. */
27728 IX86_BUILTIN_CRC32QI,
27729 IX86_BUILTIN_CRC32HI,
27730 IX86_BUILTIN_CRC32SI,
27731 IX86_BUILTIN_CRC32DI,
27732
27733 IX86_BUILTIN_PCMPESTRI128,
27734 IX86_BUILTIN_PCMPESTRM128,
27735 IX86_BUILTIN_PCMPESTRA128,
27736 IX86_BUILTIN_PCMPESTRC128,
27737 IX86_BUILTIN_PCMPESTRO128,
27738 IX86_BUILTIN_PCMPESTRS128,
27739 IX86_BUILTIN_PCMPESTRZ128,
27740 IX86_BUILTIN_PCMPISTRI128,
27741 IX86_BUILTIN_PCMPISTRM128,
27742 IX86_BUILTIN_PCMPISTRA128,
27743 IX86_BUILTIN_PCMPISTRC128,
27744 IX86_BUILTIN_PCMPISTRO128,
27745 IX86_BUILTIN_PCMPISTRS128,
27746 IX86_BUILTIN_PCMPISTRZ128,
27747
27748 IX86_BUILTIN_PCMPGTQ,
27749
27750 /* AES instructions */
27751 IX86_BUILTIN_AESENC128,
27752 IX86_BUILTIN_AESENCLAST128,
27753 IX86_BUILTIN_AESDEC128,
27754 IX86_BUILTIN_AESDECLAST128,
27755 IX86_BUILTIN_AESIMC128,
27756 IX86_BUILTIN_AESKEYGENASSIST128,
27757
27758 /* PCLMUL instruction */
27759 IX86_BUILTIN_PCLMULQDQ128,
27760
27761 /* AVX */
27762 IX86_BUILTIN_ADDPD256,
27763 IX86_BUILTIN_ADDPS256,
27764 IX86_BUILTIN_ADDSUBPD256,
27765 IX86_BUILTIN_ADDSUBPS256,
27766 IX86_BUILTIN_ANDPD256,
27767 IX86_BUILTIN_ANDPS256,
27768 IX86_BUILTIN_ANDNPD256,
27769 IX86_BUILTIN_ANDNPS256,
27770 IX86_BUILTIN_BLENDPD256,
27771 IX86_BUILTIN_BLENDPS256,
27772 IX86_BUILTIN_BLENDVPD256,
27773 IX86_BUILTIN_BLENDVPS256,
27774 IX86_BUILTIN_DIVPD256,
27775 IX86_BUILTIN_DIVPS256,
27776 IX86_BUILTIN_DPPS256,
27777 IX86_BUILTIN_HADDPD256,
27778 IX86_BUILTIN_HADDPS256,
27779 IX86_BUILTIN_HSUBPD256,
27780 IX86_BUILTIN_HSUBPS256,
27781 IX86_BUILTIN_MAXPD256,
27782 IX86_BUILTIN_MAXPS256,
27783 IX86_BUILTIN_MINPD256,
27784 IX86_BUILTIN_MINPS256,
27785 IX86_BUILTIN_MULPD256,
27786 IX86_BUILTIN_MULPS256,
27787 IX86_BUILTIN_ORPD256,
27788 IX86_BUILTIN_ORPS256,
27789 IX86_BUILTIN_SHUFPD256,
27790 IX86_BUILTIN_SHUFPS256,
27791 IX86_BUILTIN_SUBPD256,
27792 IX86_BUILTIN_SUBPS256,
27793 IX86_BUILTIN_XORPD256,
27794 IX86_BUILTIN_XORPS256,
27795 IX86_BUILTIN_CMPSD,
27796 IX86_BUILTIN_CMPSS,
27797 IX86_BUILTIN_CMPPD,
27798 IX86_BUILTIN_CMPPS,
27799 IX86_BUILTIN_CMPPD256,
27800 IX86_BUILTIN_CMPPS256,
27801 IX86_BUILTIN_CVTDQ2PD256,
27802 IX86_BUILTIN_CVTDQ2PS256,
27803 IX86_BUILTIN_CVTPD2PS256,
27804 IX86_BUILTIN_CVTPS2DQ256,
27805 IX86_BUILTIN_CVTPS2PD256,
27806 IX86_BUILTIN_CVTTPD2DQ256,
27807 IX86_BUILTIN_CVTPD2DQ256,
27808 IX86_BUILTIN_CVTTPS2DQ256,
27809 IX86_BUILTIN_EXTRACTF128PD256,
27810 IX86_BUILTIN_EXTRACTF128PS256,
27811 IX86_BUILTIN_EXTRACTF128SI256,
27812 IX86_BUILTIN_VZEROALL,
27813 IX86_BUILTIN_VZEROUPPER,
27814 IX86_BUILTIN_VPERMILVARPD,
27815 IX86_BUILTIN_VPERMILVARPS,
27816 IX86_BUILTIN_VPERMILVARPD256,
27817 IX86_BUILTIN_VPERMILVARPS256,
27818 IX86_BUILTIN_VPERMILPD,
27819 IX86_BUILTIN_VPERMILPS,
27820 IX86_BUILTIN_VPERMILPD256,
27821 IX86_BUILTIN_VPERMILPS256,
27822 IX86_BUILTIN_VPERMIL2PD,
27823 IX86_BUILTIN_VPERMIL2PS,
27824 IX86_BUILTIN_VPERMIL2PD256,
27825 IX86_BUILTIN_VPERMIL2PS256,
27826 IX86_BUILTIN_VPERM2F128PD256,
27827 IX86_BUILTIN_VPERM2F128PS256,
27828 IX86_BUILTIN_VPERM2F128SI256,
27829 IX86_BUILTIN_VBROADCASTSS,
27830 IX86_BUILTIN_VBROADCASTSD256,
27831 IX86_BUILTIN_VBROADCASTSS256,
27832 IX86_BUILTIN_VBROADCASTPD256,
27833 IX86_BUILTIN_VBROADCASTPS256,
27834 IX86_BUILTIN_VINSERTF128PD256,
27835 IX86_BUILTIN_VINSERTF128PS256,
27836 IX86_BUILTIN_VINSERTF128SI256,
27837 IX86_BUILTIN_LOADUPD256,
27838 IX86_BUILTIN_LOADUPS256,
27839 IX86_BUILTIN_STOREUPD256,
27840 IX86_BUILTIN_STOREUPS256,
27841 IX86_BUILTIN_LDDQU256,
27842 IX86_BUILTIN_MOVNTDQ256,
27843 IX86_BUILTIN_MOVNTPD256,
27844 IX86_BUILTIN_MOVNTPS256,
27845 IX86_BUILTIN_LOADDQU256,
27846 IX86_BUILTIN_STOREDQU256,
27847 IX86_BUILTIN_MASKLOADPD,
27848 IX86_BUILTIN_MASKLOADPS,
27849 IX86_BUILTIN_MASKSTOREPD,
27850 IX86_BUILTIN_MASKSTOREPS,
27851 IX86_BUILTIN_MASKLOADPD256,
27852 IX86_BUILTIN_MASKLOADPS256,
27853 IX86_BUILTIN_MASKSTOREPD256,
27854 IX86_BUILTIN_MASKSTOREPS256,
27855 IX86_BUILTIN_MOVSHDUP256,
27856 IX86_BUILTIN_MOVSLDUP256,
27857 IX86_BUILTIN_MOVDDUP256,
27858
27859 IX86_BUILTIN_SQRTPD256,
27860 IX86_BUILTIN_SQRTPS256,
27861 IX86_BUILTIN_SQRTPS_NR256,
27862 IX86_BUILTIN_RSQRTPS256,
27863 IX86_BUILTIN_RSQRTPS_NR256,
27864
27865 IX86_BUILTIN_RCPPS256,
27866
27867 IX86_BUILTIN_ROUNDPD256,
27868 IX86_BUILTIN_ROUNDPS256,
27869
27870 IX86_BUILTIN_FLOORPD256,
27871 IX86_BUILTIN_CEILPD256,
27872 IX86_BUILTIN_TRUNCPD256,
27873 IX86_BUILTIN_RINTPD256,
27874 IX86_BUILTIN_ROUNDPD_AZ256,
27875
27876 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27877 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27878 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27879
27880 IX86_BUILTIN_FLOORPS256,
27881 IX86_BUILTIN_CEILPS256,
27882 IX86_BUILTIN_TRUNCPS256,
27883 IX86_BUILTIN_RINTPS256,
27884 IX86_BUILTIN_ROUNDPS_AZ256,
27885
27886 IX86_BUILTIN_FLOORPS_SFIX256,
27887 IX86_BUILTIN_CEILPS_SFIX256,
27888 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27889
27890 IX86_BUILTIN_UNPCKHPD256,
27891 IX86_BUILTIN_UNPCKLPD256,
27892 IX86_BUILTIN_UNPCKHPS256,
27893 IX86_BUILTIN_UNPCKLPS256,
27894
27895 IX86_BUILTIN_SI256_SI,
27896 IX86_BUILTIN_PS256_PS,
27897 IX86_BUILTIN_PD256_PD,
27898 IX86_BUILTIN_SI_SI256,
27899 IX86_BUILTIN_PS_PS256,
27900 IX86_BUILTIN_PD_PD256,
27901
27902 IX86_BUILTIN_VTESTZPD,
27903 IX86_BUILTIN_VTESTCPD,
27904 IX86_BUILTIN_VTESTNZCPD,
27905 IX86_BUILTIN_VTESTZPS,
27906 IX86_BUILTIN_VTESTCPS,
27907 IX86_BUILTIN_VTESTNZCPS,
27908 IX86_BUILTIN_VTESTZPD256,
27909 IX86_BUILTIN_VTESTCPD256,
27910 IX86_BUILTIN_VTESTNZCPD256,
27911 IX86_BUILTIN_VTESTZPS256,
27912 IX86_BUILTIN_VTESTCPS256,
27913 IX86_BUILTIN_VTESTNZCPS256,
27914 IX86_BUILTIN_PTESTZ256,
27915 IX86_BUILTIN_PTESTC256,
27916 IX86_BUILTIN_PTESTNZC256,
27917
27918 IX86_BUILTIN_MOVMSKPD256,
27919 IX86_BUILTIN_MOVMSKPS256,
27920
27921 /* AVX2 */
27922 IX86_BUILTIN_MPSADBW256,
27923 IX86_BUILTIN_PABSB256,
27924 IX86_BUILTIN_PABSW256,
27925 IX86_BUILTIN_PABSD256,
27926 IX86_BUILTIN_PACKSSDW256,
27927 IX86_BUILTIN_PACKSSWB256,
27928 IX86_BUILTIN_PACKUSDW256,
27929 IX86_BUILTIN_PACKUSWB256,
27930 IX86_BUILTIN_PADDB256,
27931 IX86_BUILTIN_PADDW256,
27932 IX86_BUILTIN_PADDD256,
27933 IX86_BUILTIN_PADDQ256,
27934 IX86_BUILTIN_PADDSB256,
27935 IX86_BUILTIN_PADDSW256,
27936 IX86_BUILTIN_PADDUSB256,
27937 IX86_BUILTIN_PADDUSW256,
27938 IX86_BUILTIN_PALIGNR256,
27939 IX86_BUILTIN_AND256I,
27940 IX86_BUILTIN_ANDNOT256I,
27941 IX86_BUILTIN_PAVGB256,
27942 IX86_BUILTIN_PAVGW256,
27943 IX86_BUILTIN_PBLENDVB256,
27944 IX86_BUILTIN_PBLENDVW256,
27945 IX86_BUILTIN_PCMPEQB256,
27946 IX86_BUILTIN_PCMPEQW256,
27947 IX86_BUILTIN_PCMPEQD256,
27948 IX86_BUILTIN_PCMPEQQ256,
27949 IX86_BUILTIN_PCMPGTB256,
27950 IX86_BUILTIN_PCMPGTW256,
27951 IX86_BUILTIN_PCMPGTD256,
27952 IX86_BUILTIN_PCMPGTQ256,
27953 IX86_BUILTIN_PHADDW256,
27954 IX86_BUILTIN_PHADDD256,
27955 IX86_BUILTIN_PHADDSW256,
27956 IX86_BUILTIN_PHSUBW256,
27957 IX86_BUILTIN_PHSUBD256,
27958 IX86_BUILTIN_PHSUBSW256,
27959 IX86_BUILTIN_PMADDUBSW256,
27960 IX86_BUILTIN_PMADDWD256,
27961 IX86_BUILTIN_PMAXSB256,
27962 IX86_BUILTIN_PMAXSW256,
27963 IX86_BUILTIN_PMAXSD256,
27964 IX86_BUILTIN_PMAXUB256,
27965 IX86_BUILTIN_PMAXUW256,
27966 IX86_BUILTIN_PMAXUD256,
27967 IX86_BUILTIN_PMINSB256,
27968 IX86_BUILTIN_PMINSW256,
27969 IX86_BUILTIN_PMINSD256,
27970 IX86_BUILTIN_PMINUB256,
27971 IX86_BUILTIN_PMINUW256,
27972 IX86_BUILTIN_PMINUD256,
27973 IX86_BUILTIN_PMOVMSKB256,
27974 IX86_BUILTIN_PMOVSXBW256,
27975 IX86_BUILTIN_PMOVSXBD256,
27976 IX86_BUILTIN_PMOVSXBQ256,
27977 IX86_BUILTIN_PMOVSXWD256,
27978 IX86_BUILTIN_PMOVSXWQ256,
27979 IX86_BUILTIN_PMOVSXDQ256,
27980 IX86_BUILTIN_PMOVZXBW256,
27981 IX86_BUILTIN_PMOVZXBD256,
27982 IX86_BUILTIN_PMOVZXBQ256,
27983 IX86_BUILTIN_PMOVZXWD256,
27984 IX86_BUILTIN_PMOVZXWQ256,
27985 IX86_BUILTIN_PMOVZXDQ256,
27986 IX86_BUILTIN_PMULDQ256,
27987 IX86_BUILTIN_PMULHRSW256,
27988 IX86_BUILTIN_PMULHUW256,
27989 IX86_BUILTIN_PMULHW256,
27990 IX86_BUILTIN_PMULLW256,
27991 IX86_BUILTIN_PMULLD256,
27992 IX86_BUILTIN_PMULUDQ256,
27993 IX86_BUILTIN_POR256,
27994 IX86_BUILTIN_PSADBW256,
27995 IX86_BUILTIN_PSHUFB256,
27996 IX86_BUILTIN_PSHUFD256,
27997 IX86_BUILTIN_PSHUFHW256,
27998 IX86_BUILTIN_PSHUFLW256,
27999 IX86_BUILTIN_PSIGNB256,
28000 IX86_BUILTIN_PSIGNW256,
28001 IX86_BUILTIN_PSIGND256,
28002 IX86_BUILTIN_PSLLDQI256,
28003 IX86_BUILTIN_PSLLWI256,
28004 IX86_BUILTIN_PSLLW256,
28005 IX86_BUILTIN_PSLLDI256,
28006 IX86_BUILTIN_PSLLD256,
28007 IX86_BUILTIN_PSLLQI256,
28008 IX86_BUILTIN_PSLLQ256,
28009 IX86_BUILTIN_PSRAWI256,
28010 IX86_BUILTIN_PSRAW256,
28011 IX86_BUILTIN_PSRADI256,
28012 IX86_BUILTIN_PSRAD256,
28013 IX86_BUILTIN_PSRLDQI256,
28014 IX86_BUILTIN_PSRLWI256,
28015 IX86_BUILTIN_PSRLW256,
28016 IX86_BUILTIN_PSRLDI256,
28017 IX86_BUILTIN_PSRLD256,
28018 IX86_BUILTIN_PSRLQI256,
28019 IX86_BUILTIN_PSRLQ256,
28020 IX86_BUILTIN_PSUBB256,
28021 IX86_BUILTIN_PSUBW256,
28022 IX86_BUILTIN_PSUBD256,
28023 IX86_BUILTIN_PSUBQ256,
28024 IX86_BUILTIN_PSUBSB256,
28025 IX86_BUILTIN_PSUBSW256,
28026 IX86_BUILTIN_PSUBUSB256,
28027 IX86_BUILTIN_PSUBUSW256,
28028 IX86_BUILTIN_PUNPCKHBW256,
28029 IX86_BUILTIN_PUNPCKHWD256,
28030 IX86_BUILTIN_PUNPCKHDQ256,
28031 IX86_BUILTIN_PUNPCKHQDQ256,
28032 IX86_BUILTIN_PUNPCKLBW256,
28033 IX86_BUILTIN_PUNPCKLWD256,
28034 IX86_BUILTIN_PUNPCKLDQ256,
28035 IX86_BUILTIN_PUNPCKLQDQ256,
28036 IX86_BUILTIN_PXOR256,
28037 IX86_BUILTIN_MOVNTDQA256,
28038 IX86_BUILTIN_VBROADCASTSS_PS,
28039 IX86_BUILTIN_VBROADCASTSS_PS256,
28040 IX86_BUILTIN_VBROADCASTSD_PD256,
28041 IX86_BUILTIN_VBROADCASTSI256,
28042 IX86_BUILTIN_PBLENDD256,
28043 IX86_BUILTIN_PBLENDD128,
28044 IX86_BUILTIN_PBROADCASTB256,
28045 IX86_BUILTIN_PBROADCASTW256,
28046 IX86_BUILTIN_PBROADCASTD256,
28047 IX86_BUILTIN_PBROADCASTQ256,
28048 IX86_BUILTIN_PBROADCASTB128,
28049 IX86_BUILTIN_PBROADCASTW128,
28050 IX86_BUILTIN_PBROADCASTD128,
28051 IX86_BUILTIN_PBROADCASTQ128,
28052 IX86_BUILTIN_VPERMVARSI256,
28053 IX86_BUILTIN_VPERMDF256,
28054 IX86_BUILTIN_VPERMVARSF256,
28055 IX86_BUILTIN_VPERMDI256,
28056 IX86_BUILTIN_VPERMTI256,
28057 IX86_BUILTIN_VEXTRACT128I256,
28058 IX86_BUILTIN_VINSERT128I256,
28059 IX86_BUILTIN_MASKLOADD,
28060 IX86_BUILTIN_MASKLOADQ,
28061 IX86_BUILTIN_MASKLOADD256,
28062 IX86_BUILTIN_MASKLOADQ256,
28063 IX86_BUILTIN_MASKSTORED,
28064 IX86_BUILTIN_MASKSTOREQ,
28065 IX86_BUILTIN_MASKSTORED256,
28066 IX86_BUILTIN_MASKSTOREQ256,
28067 IX86_BUILTIN_PSLLVV4DI,
28068 IX86_BUILTIN_PSLLVV2DI,
28069 IX86_BUILTIN_PSLLVV8SI,
28070 IX86_BUILTIN_PSLLVV4SI,
28071 IX86_BUILTIN_PSRAVV8SI,
28072 IX86_BUILTIN_PSRAVV4SI,
28073 IX86_BUILTIN_PSRLVV4DI,
28074 IX86_BUILTIN_PSRLVV2DI,
28075 IX86_BUILTIN_PSRLVV8SI,
28076 IX86_BUILTIN_PSRLVV4SI,
28077
28078 IX86_BUILTIN_GATHERSIV2DF,
28079 IX86_BUILTIN_GATHERSIV4DF,
28080 IX86_BUILTIN_GATHERDIV2DF,
28081 IX86_BUILTIN_GATHERDIV4DF,
28082 IX86_BUILTIN_GATHERSIV4SF,
28083 IX86_BUILTIN_GATHERSIV8SF,
28084 IX86_BUILTIN_GATHERDIV4SF,
28085 IX86_BUILTIN_GATHERDIV8SF,
28086 IX86_BUILTIN_GATHERSIV2DI,
28087 IX86_BUILTIN_GATHERSIV4DI,
28088 IX86_BUILTIN_GATHERDIV2DI,
28089 IX86_BUILTIN_GATHERDIV4DI,
28090 IX86_BUILTIN_GATHERSIV4SI,
28091 IX86_BUILTIN_GATHERSIV8SI,
28092 IX86_BUILTIN_GATHERDIV4SI,
28093 IX86_BUILTIN_GATHERDIV8SI,
28094
28095 /* AVX512F */
28096 IX86_BUILTIN_SI512_SI256,
28097 IX86_BUILTIN_PD512_PD256,
28098 IX86_BUILTIN_PS512_PS256,
28099 IX86_BUILTIN_SI512_SI,
28100 IX86_BUILTIN_PD512_PD,
28101 IX86_BUILTIN_PS512_PS,
28102 IX86_BUILTIN_ADDPD512,
28103 IX86_BUILTIN_ADDPS512,
28104 IX86_BUILTIN_ADDSD_ROUND,
28105 IX86_BUILTIN_ADDSS_ROUND,
28106 IX86_BUILTIN_ALIGND512,
28107 IX86_BUILTIN_ALIGNQ512,
28108 IX86_BUILTIN_BLENDMD512,
28109 IX86_BUILTIN_BLENDMPD512,
28110 IX86_BUILTIN_BLENDMPS512,
28111 IX86_BUILTIN_BLENDMQ512,
28112 IX86_BUILTIN_BROADCASTF32X4_512,
28113 IX86_BUILTIN_BROADCASTF64X4_512,
28114 IX86_BUILTIN_BROADCASTI32X4_512,
28115 IX86_BUILTIN_BROADCASTI64X4_512,
28116 IX86_BUILTIN_BROADCASTSD512,
28117 IX86_BUILTIN_BROADCASTSS512,
28118 IX86_BUILTIN_CMPD512,
28119 IX86_BUILTIN_CMPPD512,
28120 IX86_BUILTIN_CMPPS512,
28121 IX86_BUILTIN_CMPQ512,
28122 IX86_BUILTIN_CMPSD_MASK,
28123 IX86_BUILTIN_CMPSS_MASK,
28124 IX86_BUILTIN_COMIDF,
28125 IX86_BUILTIN_COMISF,
28126 IX86_BUILTIN_COMPRESSPD512,
28127 IX86_BUILTIN_COMPRESSPDSTORE512,
28128 IX86_BUILTIN_COMPRESSPS512,
28129 IX86_BUILTIN_COMPRESSPSSTORE512,
28130 IX86_BUILTIN_CVTDQ2PD512,
28131 IX86_BUILTIN_CVTDQ2PS512,
28132 IX86_BUILTIN_CVTPD2DQ512,
28133 IX86_BUILTIN_CVTPD2PS512,
28134 IX86_BUILTIN_CVTPD2UDQ512,
28135 IX86_BUILTIN_CVTPH2PS512,
28136 IX86_BUILTIN_CVTPS2DQ512,
28137 IX86_BUILTIN_CVTPS2PD512,
28138 IX86_BUILTIN_CVTPS2PH512,
28139 IX86_BUILTIN_CVTPS2UDQ512,
28140 IX86_BUILTIN_CVTSD2SS_ROUND,
28141 IX86_BUILTIN_CVTSI2SD64,
28142 IX86_BUILTIN_CVTSI2SS32,
28143 IX86_BUILTIN_CVTSI2SS64,
28144 IX86_BUILTIN_CVTSS2SD_ROUND,
28145 IX86_BUILTIN_CVTTPD2DQ512,
28146 IX86_BUILTIN_CVTTPD2UDQ512,
28147 IX86_BUILTIN_CVTTPS2DQ512,
28148 IX86_BUILTIN_CVTTPS2UDQ512,
28149 IX86_BUILTIN_CVTUDQ2PD512,
28150 IX86_BUILTIN_CVTUDQ2PS512,
28151 IX86_BUILTIN_CVTUSI2SD32,
28152 IX86_BUILTIN_CVTUSI2SD64,
28153 IX86_BUILTIN_CVTUSI2SS32,
28154 IX86_BUILTIN_CVTUSI2SS64,
28155 IX86_BUILTIN_DIVPD512,
28156 IX86_BUILTIN_DIVPS512,
28157 IX86_BUILTIN_DIVSD_ROUND,
28158 IX86_BUILTIN_DIVSS_ROUND,
28159 IX86_BUILTIN_EXPANDPD512,
28160 IX86_BUILTIN_EXPANDPD512Z,
28161 IX86_BUILTIN_EXPANDPDLOAD512,
28162 IX86_BUILTIN_EXPANDPDLOAD512Z,
28163 IX86_BUILTIN_EXPANDPS512,
28164 IX86_BUILTIN_EXPANDPS512Z,
28165 IX86_BUILTIN_EXPANDPSLOAD512,
28166 IX86_BUILTIN_EXPANDPSLOAD512Z,
28167 IX86_BUILTIN_EXTRACTF32X4,
28168 IX86_BUILTIN_EXTRACTF64X4,
28169 IX86_BUILTIN_EXTRACTI32X4,
28170 IX86_BUILTIN_EXTRACTI64X4,
28171 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28172 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28173 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28174 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28175 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28176 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28177 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28178 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28179 IX86_BUILTIN_GETEXPPD512,
28180 IX86_BUILTIN_GETEXPPS512,
28181 IX86_BUILTIN_GETEXPSD128,
28182 IX86_BUILTIN_GETEXPSS128,
28183 IX86_BUILTIN_GETMANTPD512,
28184 IX86_BUILTIN_GETMANTPS512,
28185 IX86_BUILTIN_GETMANTSD128,
28186 IX86_BUILTIN_GETMANTSS128,
28187 IX86_BUILTIN_INSERTF32X4,
28188 IX86_BUILTIN_INSERTF64X4,
28189 IX86_BUILTIN_INSERTI32X4,
28190 IX86_BUILTIN_INSERTI64X4,
28191 IX86_BUILTIN_LOADAPD512,
28192 IX86_BUILTIN_LOADAPS512,
28193 IX86_BUILTIN_LOADDQUDI512,
28194 IX86_BUILTIN_LOADDQUSI512,
28195 IX86_BUILTIN_LOADUPD512,
28196 IX86_BUILTIN_LOADUPS512,
28197 IX86_BUILTIN_MAXPD512,
28198 IX86_BUILTIN_MAXPS512,
28199 IX86_BUILTIN_MAXSD_ROUND,
28200 IX86_BUILTIN_MAXSS_ROUND,
28201 IX86_BUILTIN_MINPD512,
28202 IX86_BUILTIN_MINPS512,
28203 IX86_BUILTIN_MINSD_ROUND,
28204 IX86_BUILTIN_MINSS_ROUND,
28205 IX86_BUILTIN_MOVAPD512,
28206 IX86_BUILTIN_MOVAPS512,
28207 IX86_BUILTIN_MOVDDUP512,
28208 IX86_BUILTIN_MOVDQA32LOAD512,
28209 IX86_BUILTIN_MOVDQA32STORE512,
28210 IX86_BUILTIN_MOVDQA32_512,
28211 IX86_BUILTIN_MOVDQA64LOAD512,
28212 IX86_BUILTIN_MOVDQA64STORE512,
28213 IX86_BUILTIN_MOVDQA64_512,
28214 IX86_BUILTIN_MOVNTDQ512,
28215 IX86_BUILTIN_MOVNTDQA512,
28216 IX86_BUILTIN_MOVNTPD512,
28217 IX86_BUILTIN_MOVNTPS512,
28218 IX86_BUILTIN_MOVSHDUP512,
28219 IX86_BUILTIN_MOVSLDUP512,
28220 IX86_BUILTIN_MULPD512,
28221 IX86_BUILTIN_MULPS512,
28222 IX86_BUILTIN_MULSD_ROUND,
28223 IX86_BUILTIN_MULSS_ROUND,
28224 IX86_BUILTIN_PABSD512,
28225 IX86_BUILTIN_PABSQ512,
28226 IX86_BUILTIN_PADDD512,
28227 IX86_BUILTIN_PADDQ512,
28228 IX86_BUILTIN_PANDD512,
28229 IX86_BUILTIN_PANDND512,
28230 IX86_BUILTIN_PANDNQ512,
28231 IX86_BUILTIN_PANDQ512,
28232 IX86_BUILTIN_PBROADCASTD512,
28233 IX86_BUILTIN_PBROADCASTD512_GPR,
28234 IX86_BUILTIN_PBROADCASTMB512,
28235 IX86_BUILTIN_PBROADCASTMW512,
28236 IX86_BUILTIN_PBROADCASTQ512,
28237 IX86_BUILTIN_PBROADCASTQ512_GPR,
28238 IX86_BUILTIN_PBROADCASTQ512_MEM,
28239 IX86_BUILTIN_PCMPEQD512_MASK,
28240 IX86_BUILTIN_PCMPEQQ512_MASK,
28241 IX86_BUILTIN_PCMPGTD512_MASK,
28242 IX86_BUILTIN_PCMPGTQ512_MASK,
28243 IX86_BUILTIN_PCOMPRESSD512,
28244 IX86_BUILTIN_PCOMPRESSDSTORE512,
28245 IX86_BUILTIN_PCOMPRESSQ512,
28246 IX86_BUILTIN_PCOMPRESSQSTORE512,
28247 IX86_BUILTIN_PEXPANDD512,
28248 IX86_BUILTIN_PEXPANDD512Z,
28249 IX86_BUILTIN_PEXPANDDLOAD512,
28250 IX86_BUILTIN_PEXPANDDLOAD512Z,
28251 IX86_BUILTIN_PEXPANDQ512,
28252 IX86_BUILTIN_PEXPANDQ512Z,
28253 IX86_BUILTIN_PEXPANDQLOAD512,
28254 IX86_BUILTIN_PEXPANDQLOAD512Z,
28255 IX86_BUILTIN_PMAXSD512,
28256 IX86_BUILTIN_PMAXSQ512,
28257 IX86_BUILTIN_PMAXUD512,
28258 IX86_BUILTIN_PMAXUQ512,
28259 IX86_BUILTIN_PMINSD512,
28260 IX86_BUILTIN_PMINSQ512,
28261 IX86_BUILTIN_PMINUD512,
28262 IX86_BUILTIN_PMINUQ512,
28263 IX86_BUILTIN_PMOVDB512,
28264 IX86_BUILTIN_PMOVDB512_MEM,
28265 IX86_BUILTIN_PMOVDW512,
28266 IX86_BUILTIN_PMOVDW512_MEM,
28267 IX86_BUILTIN_PMOVQB512,
28268 IX86_BUILTIN_PMOVQB512_MEM,
28269 IX86_BUILTIN_PMOVQD512,
28270 IX86_BUILTIN_PMOVQD512_MEM,
28271 IX86_BUILTIN_PMOVQW512,
28272 IX86_BUILTIN_PMOVQW512_MEM,
28273 IX86_BUILTIN_PMOVSDB512,
28274 IX86_BUILTIN_PMOVSDB512_MEM,
28275 IX86_BUILTIN_PMOVSDW512,
28276 IX86_BUILTIN_PMOVSDW512_MEM,
28277 IX86_BUILTIN_PMOVSQB512,
28278 IX86_BUILTIN_PMOVSQB512_MEM,
28279 IX86_BUILTIN_PMOVSQD512,
28280 IX86_BUILTIN_PMOVSQD512_MEM,
28281 IX86_BUILTIN_PMOVSQW512,
28282 IX86_BUILTIN_PMOVSQW512_MEM,
28283 IX86_BUILTIN_PMOVSXBD512,
28284 IX86_BUILTIN_PMOVSXBQ512,
28285 IX86_BUILTIN_PMOVSXDQ512,
28286 IX86_BUILTIN_PMOVSXWD512,
28287 IX86_BUILTIN_PMOVSXWQ512,
28288 IX86_BUILTIN_PMOVUSDB512,
28289 IX86_BUILTIN_PMOVUSDB512_MEM,
28290 IX86_BUILTIN_PMOVUSDW512,
28291 IX86_BUILTIN_PMOVUSDW512_MEM,
28292 IX86_BUILTIN_PMOVUSQB512,
28293 IX86_BUILTIN_PMOVUSQB512_MEM,
28294 IX86_BUILTIN_PMOVUSQD512,
28295 IX86_BUILTIN_PMOVUSQD512_MEM,
28296 IX86_BUILTIN_PMOVUSQW512,
28297 IX86_BUILTIN_PMOVUSQW512_MEM,
28298 IX86_BUILTIN_PMOVZXBD512,
28299 IX86_BUILTIN_PMOVZXBQ512,
28300 IX86_BUILTIN_PMOVZXDQ512,
28301 IX86_BUILTIN_PMOVZXWD512,
28302 IX86_BUILTIN_PMOVZXWQ512,
28303 IX86_BUILTIN_PMULDQ512,
28304 IX86_BUILTIN_PMULLD512,
28305 IX86_BUILTIN_PMULUDQ512,
28306 IX86_BUILTIN_PORD512,
28307 IX86_BUILTIN_PORQ512,
28308 IX86_BUILTIN_PROLD512,
28309 IX86_BUILTIN_PROLQ512,
28310 IX86_BUILTIN_PROLVD512,
28311 IX86_BUILTIN_PROLVQ512,
28312 IX86_BUILTIN_PRORD512,
28313 IX86_BUILTIN_PRORQ512,
28314 IX86_BUILTIN_PRORVD512,
28315 IX86_BUILTIN_PRORVQ512,
28316 IX86_BUILTIN_PSHUFD512,
28317 IX86_BUILTIN_PSLLD512,
28318 IX86_BUILTIN_PSLLDI512,
28319 IX86_BUILTIN_PSLLQ512,
28320 IX86_BUILTIN_PSLLQI512,
28321 IX86_BUILTIN_PSLLVV16SI,
28322 IX86_BUILTIN_PSLLVV8DI,
28323 IX86_BUILTIN_PSRAD512,
28324 IX86_BUILTIN_PSRADI512,
28325 IX86_BUILTIN_PSRAQ512,
28326 IX86_BUILTIN_PSRAQI512,
28327 IX86_BUILTIN_PSRAVV16SI,
28328 IX86_BUILTIN_PSRAVV8DI,
28329 IX86_BUILTIN_PSRLD512,
28330 IX86_BUILTIN_PSRLDI512,
28331 IX86_BUILTIN_PSRLQ512,
28332 IX86_BUILTIN_PSRLQI512,
28333 IX86_BUILTIN_PSRLVV16SI,
28334 IX86_BUILTIN_PSRLVV8DI,
28335 IX86_BUILTIN_PSUBD512,
28336 IX86_BUILTIN_PSUBQ512,
28337 IX86_BUILTIN_PTESTMD512,
28338 IX86_BUILTIN_PTESTMQ512,
28339 IX86_BUILTIN_PTESTNMD512,
28340 IX86_BUILTIN_PTESTNMQ512,
28341 IX86_BUILTIN_PUNPCKHDQ512,
28342 IX86_BUILTIN_PUNPCKHQDQ512,
28343 IX86_BUILTIN_PUNPCKLDQ512,
28344 IX86_BUILTIN_PUNPCKLQDQ512,
28345 IX86_BUILTIN_PXORD512,
28346 IX86_BUILTIN_PXORQ512,
28347 IX86_BUILTIN_RCP14PD512,
28348 IX86_BUILTIN_RCP14PS512,
28349 IX86_BUILTIN_RCP14SD,
28350 IX86_BUILTIN_RCP14SS,
28351 IX86_BUILTIN_RNDSCALEPD,
28352 IX86_BUILTIN_RNDSCALEPS,
28353 IX86_BUILTIN_RNDSCALESD,
28354 IX86_BUILTIN_RNDSCALESS,
28355 IX86_BUILTIN_RSQRT14PD512,
28356 IX86_BUILTIN_RSQRT14PS512,
28357 IX86_BUILTIN_RSQRT14SD,
28358 IX86_BUILTIN_RSQRT14SS,
28359 IX86_BUILTIN_SCALEFPD512,
28360 IX86_BUILTIN_SCALEFPS512,
28361 IX86_BUILTIN_SCALEFSD,
28362 IX86_BUILTIN_SCALEFSS,
28363 IX86_BUILTIN_SHUFPD512,
28364 IX86_BUILTIN_SHUFPS512,
28365 IX86_BUILTIN_SHUF_F32x4,
28366 IX86_BUILTIN_SHUF_F64x2,
28367 IX86_BUILTIN_SHUF_I32x4,
28368 IX86_BUILTIN_SHUF_I64x2,
28369 IX86_BUILTIN_SQRTPD512,
28370 IX86_BUILTIN_SQRTPD512_MASK,
28371 IX86_BUILTIN_SQRTPS512_MASK,
28372 IX86_BUILTIN_SQRTPS_NR512,
28373 IX86_BUILTIN_SQRTSD_ROUND,
28374 IX86_BUILTIN_SQRTSS_ROUND,
28375 IX86_BUILTIN_STOREAPD512,
28376 IX86_BUILTIN_STOREAPS512,
28377 IX86_BUILTIN_STOREDQUDI512,
28378 IX86_BUILTIN_STOREDQUSI512,
28379 IX86_BUILTIN_STOREUPD512,
28380 IX86_BUILTIN_STOREUPS512,
28381 IX86_BUILTIN_SUBPD512,
28382 IX86_BUILTIN_SUBPS512,
28383 IX86_BUILTIN_SUBSD_ROUND,
28384 IX86_BUILTIN_SUBSS_ROUND,
28385 IX86_BUILTIN_UCMPD512,
28386 IX86_BUILTIN_UCMPQ512,
28387 IX86_BUILTIN_UNPCKHPD512,
28388 IX86_BUILTIN_UNPCKHPS512,
28389 IX86_BUILTIN_UNPCKLPD512,
28390 IX86_BUILTIN_UNPCKLPS512,
28391 IX86_BUILTIN_VCVTSD2SI32,
28392 IX86_BUILTIN_VCVTSD2SI64,
28393 IX86_BUILTIN_VCVTSD2USI32,
28394 IX86_BUILTIN_VCVTSD2USI64,
28395 IX86_BUILTIN_VCVTSS2SI32,
28396 IX86_BUILTIN_VCVTSS2SI64,
28397 IX86_BUILTIN_VCVTSS2USI32,
28398 IX86_BUILTIN_VCVTSS2USI64,
28399 IX86_BUILTIN_VCVTTSD2SI32,
28400 IX86_BUILTIN_VCVTTSD2SI64,
28401 IX86_BUILTIN_VCVTTSD2USI32,
28402 IX86_BUILTIN_VCVTTSD2USI64,
28403 IX86_BUILTIN_VCVTTSS2SI32,
28404 IX86_BUILTIN_VCVTTSS2SI64,
28405 IX86_BUILTIN_VCVTTSS2USI32,
28406 IX86_BUILTIN_VCVTTSS2USI64,
28407 IX86_BUILTIN_VFMADDPD512_MASK,
28408 IX86_BUILTIN_VFMADDPD512_MASK3,
28409 IX86_BUILTIN_VFMADDPD512_MASKZ,
28410 IX86_BUILTIN_VFMADDPS512_MASK,
28411 IX86_BUILTIN_VFMADDPS512_MASK3,
28412 IX86_BUILTIN_VFMADDPS512_MASKZ,
28413 IX86_BUILTIN_VFMADDSD3_ROUND,
28414 IX86_BUILTIN_VFMADDSS3_ROUND,
28415 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28416 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28417 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28418 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28419 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28420 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28421 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28422 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28423 IX86_BUILTIN_VFMSUBPD512_MASK3,
28424 IX86_BUILTIN_VFMSUBPS512_MASK3,
28425 IX86_BUILTIN_VFMSUBSD3_MASK3,
28426 IX86_BUILTIN_VFMSUBSS3_MASK3,
28427 IX86_BUILTIN_VFNMADDPD512_MASK,
28428 IX86_BUILTIN_VFNMADDPS512_MASK,
28429 IX86_BUILTIN_VFNMSUBPD512_MASK,
28430 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28431 IX86_BUILTIN_VFNMSUBPS512_MASK,
28432 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28433 IX86_BUILTIN_VPCLZCNTD512,
28434 IX86_BUILTIN_VPCLZCNTQ512,
28435 IX86_BUILTIN_VPCONFLICTD512,
28436 IX86_BUILTIN_VPCONFLICTQ512,
28437 IX86_BUILTIN_VPERMDF512,
28438 IX86_BUILTIN_VPERMDI512,
28439 IX86_BUILTIN_VPERMI2VARD512,
28440 IX86_BUILTIN_VPERMI2VARPD512,
28441 IX86_BUILTIN_VPERMI2VARPS512,
28442 IX86_BUILTIN_VPERMI2VARQ512,
28443 IX86_BUILTIN_VPERMILPD512,
28444 IX86_BUILTIN_VPERMILPS512,
28445 IX86_BUILTIN_VPERMILVARPD512,
28446 IX86_BUILTIN_VPERMILVARPS512,
28447 IX86_BUILTIN_VPERMT2VARD512,
28448 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28449 IX86_BUILTIN_VPERMT2VARPD512,
28450 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28451 IX86_BUILTIN_VPERMT2VARPS512,
28452 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28453 IX86_BUILTIN_VPERMT2VARQ512,
28454 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28455 IX86_BUILTIN_VPERMVARDF512,
28456 IX86_BUILTIN_VPERMVARDI512,
28457 IX86_BUILTIN_VPERMVARSF512,
28458 IX86_BUILTIN_VPERMVARSI512,
28459 IX86_BUILTIN_VTERNLOGD512_MASK,
28460 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28461 IX86_BUILTIN_VTERNLOGQ512_MASK,
28462 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28463
28464 /* Mask arithmetic operations */
28465 IX86_BUILTIN_KAND16,
28466 IX86_BUILTIN_KANDN16,
28467 IX86_BUILTIN_KNOT16,
28468 IX86_BUILTIN_KOR16,
28469 IX86_BUILTIN_KORTESTC16,
28470 IX86_BUILTIN_KORTESTZ16,
28471 IX86_BUILTIN_KUNPCKBW,
28472 IX86_BUILTIN_KXNOR16,
28473 IX86_BUILTIN_KXOR16,
28474 IX86_BUILTIN_KMOV16,
28475
28476 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28477 where all operands are 32-byte or 64-byte wide respectively. */
28478 IX86_BUILTIN_GATHERALTSIV4DF,
28479 IX86_BUILTIN_GATHERALTDIV8SF,
28480 IX86_BUILTIN_GATHERALTSIV4DI,
28481 IX86_BUILTIN_GATHERALTDIV8SI,
28482 IX86_BUILTIN_GATHER3ALTDIV16SF,
28483 IX86_BUILTIN_GATHER3ALTDIV16SI,
28484 IX86_BUILTIN_GATHER3ALTSIV8DF,
28485 IX86_BUILTIN_GATHER3ALTSIV8DI,
28486 IX86_BUILTIN_GATHER3DIV16SF,
28487 IX86_BUILTIN_GATHER3DIV16SI,
28488 IX86_BUILTIN_GATHER3DIV8DF,
28489 IX86_BUILTIN_GATHER3DIV8DI,
28490 IX86_BUILTIN_GATHER3SIV16SF,
28491 IX86_BUILTIN_GATHER3SIV16SI,
28492 IX86_BUILTIN_GATHER3SIV8DF,
28493 IX86_BUILTIN_GATHER3SIV8DI,
28494 IX86_BUILTIN_SCATTERDIV16SF,
28495 IX86_BUILTIN_SCATTERDIV16SI,
28496 IX86_BUILTIN_SCATTERDIV8DF,
28497 IX86_BUILTIN_SCATTERDIV8DI,
28498 IX86_BUILTIN_SCATTERSIV16SF,
28499 IX86_BUILTIN_SCATTERSIV16SI,
28500 IX86_BUILTIN_SCATTERSIV8DF,
28501 IX86_BUILTIN_SCATTERSIV8DI,
28502
28503 /* AVX512PF */
28504 IX86_BUILTIN_GATHERPFQPD,
28505 IX86_BUILTIN_GATHERPFDPS,
28506 IX86_BUILTIN_GATHERPFDPD,
28507 IX86_BUILTIN_GATHERPFQPS,
28508 IX86_BUILTIN_SCATTERPFDPD,
28509 IX86_BUILTIN_SCATTERPFDPS,
28510 IX86_BUILTIN_SCATTERPFQPD,
28511 IX86_BUILTIN_SCATTERPFQPS,
28512
28513 /* AVX-512ER */
28514 IX86_BUILTIN_EXP2PD_MASK,
28515 IX86_BUILTIN_EXP2PS_MASK,
28516 IX86_BUILTIN_EXP2PS,
28517 IX86_BUILTIN_RCP28PD,
28518 IX86_BUILTIN_RCP28PS,
28519 IX86_BUILTIN_RCP28SD,
28520 IX86_BUILTIN_RCP28SS,
28521 IX86_BUILTIN_RSQRT28PD,
28522 IX86_BUILTIN_RSQRT28PS,
28523 IX86_BUILTIN_RSQRT28SD,
28524 IX86_BUILTIN_RSQRT28SS,
28525
28526 /* SHA builtins. */
28527 IX86_BUILTIN_SHA1MSG1,
28528 IX86_BUILTIN_SHA1MSG2,
28529 IX86_BUILTIN_SHA1NEXTE,
28530 IX86_BUILTIN_SHA1RNDS4,
28531 IX86_BUILTIN_SHA256MSG1,
28532 IX86_BUILTIN_SHA256MSG2,
28533 IX86_BUILTIN_SHA256RNDS2,
28534
28535 /* CLFLUSHOPT instructions. */
28536 IX86_BUILTIN_CLFLUSHOPT,
28537
28538 /* TFmode support builtins. */
28539 IX86_BUILTIN_INFQ,
28540 IX86_BUILTIN_HUGE_VALQ,
28541 IX86_BUILTIN_FABSQ,
28542 IX86_BUILTIN_COPYSIGNQ,
28543
28544 /* Vectorizer support builtins. */
28545 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28546 IX86_BUILTIN_CPYSGNPS,
28547 IX86_BUILTIN_CPYSGNPD,
28548 IX86_BUILTIN_CPYSGNPS256,
28549 IX86_BUILTIN_CPYSGNPS512,
28550 IX86_BUILTIN_CPYSGNPD256,
28551 IX86_BUILTIN_CPYSGNPD512,
28552 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28553 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28554
28555
28556 /* FMA4 instructions. */
28557 IX86_BUILTIN_VFMADDSS,
28558 IX86_BUILTIN_VFMADDSD,
28559 IX86_BUILTIN_VFMADDPS,
28560 IX86_BUILTIN_VFMADDPD,
28561 IX86_BUILTIN_VFMADDPS256,
28562 IX86_BUILTIN_VFMADDPD256,
28563 IX86_BUILTIN_VFMADDSUBPS,
28564 IX86_BUILTIN_VFMADDSUBPD,
28565 IX86_BUILTIN_VFMADDSUBPS256,
28566 IX86_BUILTIN_VFMADDSUBPD256,
28567
28568 /* FMA3 instructions. */
28569 IX86_BUILTIN_VFMADDSS3,
28570 IX86_BUILTIN_VFMADDSD3,
28571
28572 /* XOP instructions. */
28573 IX86_BUILTIN_VPCMOV,
28574 IX86_BUILTIN_VPCMOV_V2DI,
28575 IX86_BUILTIN_VPCMOV_V4SI,
28576 IX86_BUILTIN_VPCMOV_V8HI,
28577 IX86_BUILTIN_VPCMOV_V16QI,
28578 IX86_BUILTIN_VPCMOV_V4SF,
28579 IX86_BUILTIN_VPCMOV_V2DF,
28580 IX86_BUILTIN_VPCMOV256,
28581 IX86_BUILTIN_VPCMOV_V4DI256,
28582 IX86_BUILTIN_VPCMOV_V8SI256,
28583 IX86_BUILTIN_VPCMOV_V16HI256,
28584 IX86_BUILTIN_VPCMOV_V32QI256,
28585 IX86_BUILTIN_VPCMOV_V8SF256,
28586 IX86_BUILTIN_VPCMOV_V4DF256,
28587
28588 IX86_BUILTIN_VPPERM,
28589
28590 IX86_BUILTIN_VPMACSSWW,
28591 IX86_BUILTIN_VPMACSWW,
28592 IX86_BUILTIN_VPMACSSWD,
28593 IX86_BUILTIN_VPMACSWD,
28594 IX86_BUILTIN_VPMACSSDD,
28595 IX86_BUILTIN_VPMACSDD,
28596 IX86_BUILTIN_VPMACSSDQL,
28597 IX86_BUILTIN_VPMACSSDQH,
28598 IX86_BUILTIN_VPMACSDQL,
28599 IX86_BUILTIN_VPMACSDQH,
28600 IX86_BUILTIN_VPMADCSSWD,
28601 IX86_BUILTIN_VPMADCSWD,
28602
28603 IX86_BUILTIN_VPHADDBW,
28604 IX86_BUILTIN_VPHADDBD,
28605 IX86_BUILTIN_VPHADDBQ,
28606 IX86_BUILTIN_VPHADDWD,
28607 IX86_BUILTIN_VPHADDWQ,
28608 IX86_BUILTIN_VPHADDDQ,
28609 IX86_BUILTIN_VPHADDUBW,
28610 IX86_BUILTIN_VPHADDUBD,
28611 IX86_BUILTIN_VPHADDUBQ,
28612 IX86_BUILTIN_VPHADDUWD,
28613 IX86_BUILTIN_VPHADDUWQ,
28614 IX86_BUILTIN_VPHADDUDQ,
28615 IX86_BUILTIN_VPHSUBBW,
28616 IX86_BUILTIN_VPHSUBWD,
28617 IX86_BUILTIN_VPHSUBDQ,
28618
28619 IX86_BUILTIN_VPROTB,
28620 IX86_BUILTIN_VPROTW,
28621 IX86_BUILTIN_VPROTD,
28622 IX86_BUILTIN_VPROTQ,
28623 IX86_BUILTIN_VPROTB_IMM,
28624 IX86_BUILTIN_VPROTW_IMM,
28625 IX86_BUILTIN_VPROTD_IMM,
28626 IX86_BUILTIN_VPROTQ_IMM,
28627
28628 IX86_BUILTIN_VPSHLB,
28629 IX86_BUILTIN_VPSHLW,
28630 IX86_BUILTIN_VPSHLD,
28631 IX86_BUILTIN_VPSHLQ,
28632 IX86_BUILTIN_VPSHAB,
28633 IX86_BUILTIN_VPSHAW,
28634 IX86_BUILTIN_VPSHAD,
28635 IX86_BUILTIN_VPSHAQ,
28636
28637 IX86_BUILTIN_VFRCZSS,
28638 IX86_BUILTIN_VFRCZSD,
28639 IX86_BUILTIN_VFRCZPS,
28640 IX86_BUILTIN_VFRCZPD,
28641 IX86_BUILTIN_VFRCZPS256,
28642 IX86_BUILTIN_VFRCZPD256,
28643
28644 IX86_BUILTIN_VPCOMEQUB,
28645 IX86_BUILTIN_VPCOMNEUB,
28646 IX86_BUILTIN_VPCOMLTUB,
28647 IX86_BUILTIN_VPCOMLEUB,
28648 IX86_BUILTIN_VPCOMGTUB,
28649 IX86_BUILTIN_VPCOMGEUB,
28650 IX86_BUILTIN_VPCOMFALSEUB,
28651 IX86_BUILTIN_VPCOMTRUEUB,
28652
28653 IX86_BUILTIN_VPCOMEQUW,
28654 IX86_BUILTIN_VPCOMNEUW,
28655 IX86_BUILTIN_VPCOMLTUW,
28656 IX86_BUILTIN_VPCOMLEUW,
28657 IX86_BUILTIN_VPCOMGTUW,
28658 IX86_BUILTIN_VPCOMGEUW,
28659 IX86_BUILTIN_VPCOMFALSEUW,
28660 IX86_BUILTIN_VPCOMTRUEUW,
28661
28662 IX86_BUILTIN_VPCOMEQUD,
28663 IX86_BUILTIN_VPCOMNEUD,
28664 IX86_BUILTIN_VPCOMLTUD,
28665 IX86_BUILTIN_VPCOMLEUD,
28666 IX86_BUILTIN_VPCOMGTUD,
28667 IX86_BUILTIN_VPCOMGEUD,
28668 IX86_BUILTIN_VPCOMFALSEUD,
28669 IX86_BUILTIN_VPCOMTRUEUD,
28670
28671 IX86_BUILTIN_VPCOMEQUQ,
28672 IX86_BUILTIN_VPCOMNEUQ,
28673 IX86_BUILTIN_VPCOMLTUQ,
28674 IX86_BUILTIN_VPCOMLEUQ,
28675 IX86_BUILTIN_VPCOMGTUQ,
28676 IX86_BUILTIN_VPCOMGEUQ,
28677 IX86_BUILTIN_VPCOMFALSEUQ,
28678 IX86_BUILTIN_VPCOMTRUEUQ,
28679
28680 IX86_BUILTIN_VPCOMEQB,
28681 IX86_BUILTIN_VPCOMNEB,
28682 IX86_BUILTIN_VPCOMLTB,
28683 IX86_BUILTIN_VPCOMLEB,
28684 IX86_BUILTIN_VPCOMGTB,
28685 IX86_BUILTIN_VPCOMGEB,
28686 IX86_BUILTIN_VPCOMFALSEB,
28687 IX86_BUILTIN_VPCOMTRUEB,
28688
28689 IX86_BUILTIN_VPCOMEQW,
28690 IX86_BUILTIN_VPCOMNEW,
28691 IX86_BUILTIN_VPCOMLTW,
28692 IX86_BUILTIN_VPCOMLEW,
28693 IX86_BUILTIN_VPCOMGTW,
28694 IX86_BUILTIN_VPCOMGEW,
28695 IX86_BUILTIN_VPCOMFALSEW,
28696 IX86_BUILTIN_VPCOMTRUEW,
28697
28698 IX86_BUILTIN_VPCOMEQD,
28699 IX86_BUILTIN_VPCOMNED,
28700 IX86_BUILTIN_VPCOMLTD,
28701 IX86_BUILTIN_VPCOMLED,
28702 IX86_BUILTIN_VPCOMGTD,
28703 IX86_BUILTIN_VPCOMGED,
28704 IX86_BUILTIN_VPCOMFALSED,
28705 IX86_BUILTIN_VPCOMTRUED,
28706
28707 IX86_BUILTIN_VPCOMEQQ,
28708 IX86_BUILTIN_VPCOMNEQ,
28709 IX86_BUILTIN_VPCOMLTQ,
28710 IX86_BUILTIN_VPCOMLEQ,
28711 IX86_BUILTIN_VPCOMGTQ,
28712 IX86_BUILTIN_VPCOMGEQ,
28713 IX86_BUILTIN_VPCOMFALSEQ,
28714 IX86_BUILTIN_VPCOMTRUEQ,
28715
28716 /* LWP instructions. */
28717 IX86_BUILTIN_LLWPCB,
28718 IX86_BUILTIN_SLWPCB,
28719 IX86_BUILTIN_LWPVAL32,
28720 IX86_BUILTIN_LWPVAL64,
28721 IX86_BUILTIN_LWPINS32,
28722 IX86_BUILTIN_LWPINS64,
28723
28724 IX86_BUILTIN_CLZS,
28725
28726 /* RTM */
28727 IX86_BUILTIN_XBEGIN,
28728 IX86_BUILTIN_XEND,
28729 IX86_BUILTIN_XABORT,
28730 IX86_BUILTIN_XTEST,
28731
28732 /* BMI instructions. */
28733 IX86_BUILTIN_BEXTR32,
28734 IX86_BUILTIN_BEXTR64,
28735 IX86_BUILTIN_CTZS,
28736
28737 /* TBM instructions. */
28738 IX86_BUILTIN_BEXTRI32,
28739 IX86_BUILTIN_BEXTRI64,
28740
28741 /* BMI2 instructions. */
28742 IX86_BUILTIN_BZHI32,
28743 IX86_BUILTIN_BZHI64,
28744 IX86_BUILTIN_PDEP32,
28745 IX86_BUILTIN_PDEP64,
28746 IX86_BUILTIN_PEXT32,
28747 IX86_BUILTIN_PEXT64,
28748
28749 /* ADX instructions. */
28750 IX86_BUILTIN_ADDCARRYX32,
28751 IX86_BUILTIN_ADDCARRYX64,
28752
28753 /* SBB instructions. */
28754 IX86_BUILTIN_SBB32,
28755 IX86_BUILTIN_SBB64,
28756
28757 /* FSGSBASE instructions. */
28758 IX86_BUILTIN_RDFSBASE32,
28759 IX86_BUILTIN_RDFSBASE64,
28760 IX86_BUILTIN_RDGSBASE32,
28761 IX86_BUILTIN_RDGSBASE64,
28762 IX86_BUILTIN_WRFSBASE32,
28763 IX86_BUILTIN_WRFSBASE64,
28764 IX86_BUILTIN_WRGSBASE32,
28765 IX86_BUILTIN_WRGSBASE64,
28766
28767 /* RDRND instructions. */
28768 IX86_BUILTIN_RDRAND16_STEP,
28769 IX86_BUILTIN_RDRAND32_STEP,
28770 IX86_BUILTIN_RDRAND64_STEP,
28771
28772 /* RDSEED instructions. */
28773 IX86_BUILTIN_RDSEED16_STEP,
28774 IX86_BUILTIN_RDSEED32_STEP,
28775 IX86_BUILTIN_RDSEED64_STEP,
28776
28777 /* F16C instructions. */
28778 IX86_BUILTIN_CVTPH2PS,
28779 IX86_BUILTIN_CVTPH2PS256,
28780 IX86_BUILTIN_CVTPS2PH,
28781 IX86_BUILTIN_CVTPS2PH256,
28782
28783 /* CFString built-in for darwin */
28784 IX86_BUILTIN_CFSTRING,
28785
28786 /* Builtins to get CPU type and supported features. */
28787 IX86_BUILTIN_CPU_INIT,
28788 IX86_BUILTIN_CPU_IS,
28789 IX86_BUILTIN_CPU_SUPPORTS,
28790
28791 /* Read/write FLAGS register built-ins. */
28792 IX86_BUILTIN_READ_FLAGS,
28793 IX86_BUILTIN_WRITE_FLAGS,
28794
28795 IX86_BUILTIN_MAX
28796 };
28797
28798 /* Table for the ix86 builtin decls. */
28799 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28800
28801 /* Table of all of the builtin functions that are possible with different ISA's
28802 but are waiting to be built until a function is declared to use that
28803 ISA. */
28804 struct builtin_isa {
28805 const char *name; /* function name */
28806 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28807 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28808 bool const_p; /* true if the declaration is constant */
28809 bool set_and_not_built_p;
28810 };
28811
28812 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28813
28814
28815 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28816 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28817 function decl in the ix86_builtins array. Returns the function decl or
28818 NULL_TREE, if the builtin was not added.
28819
28820 If the front end has a special hook for builtin functions, delay adding
28821 builtin functions that aren't in the current ISA until the ISA is changed
28822 with function specific optimization. Doing so, can save about 300K for the
28823 default compiler. When the builtin is expanded, check at that time whether
28824 it is valid.
28825
28826 If the front end doesn't have a special hook, record all builtins, even if
28827 it isn't an instruction set in the current ISA in case the user uses
28828 function specific options for a different ISA, so that we don't get scope
28829 errors if a builtin is added in the middle of a function scope. */
28830
28831 static inline tree
28832 def_builtin (HOST_WIDE_INT mask, const char *name,
28833 enum ix86_builtin_func_type tcode,
28834 enum ix86_builtins code)
28835 {
28836 tree decl = NULL_TREE;
28837
28838 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28839 {
28840 ix86_builtins_isa[(int) code].isa = mask;
28841
28842 mask &= ~OPTION_MASK_ISA_64BIT;
28843 if (mask == 0
28844 || (mask & ix86_isa_flags) != 0
28845 || (lang_hooks.builtin_function
28846 == lang_hooks.builtin_function_ext_scope))
28847
28848 {
28849 tree type = ix86_get_builtin_func_type (tcode);
28850 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28851 NULL, NULL_TREE);
28852 ix86_builtins[(int) code] = decl;
28853 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28854 }
28855 else
28856 {
28857 ix86_builtins[(int) code] = NULL_TREE;
28858 ix86_builtins_isa[(int) code].tcode = tcode;
28859 ix86_builtins_isa[(int) code].name = name;
28860 ix86_builtins_isa[(int) code].const_p = false;
28861 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28862 }
28863 }
28864
28865 return decl;
28866 }
28867
28868 /* Like def_builtin, but also marks the function decl "const". */
28869
28870 static inline tree
28871 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28872 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28873 {
28874 tree decl = def_builtin (mask, name, tcode, code);
28875 if (decl)
28876 TREE_READONLY (decl) = 1;
28877 else
28878 ix86_builtins_isa[(int) code].const_p = true;
28879
28880 return decl;
28881 }
28882
28883 /* Add any new builtin functions for a given ISA that may not have been
28884 declared. This saves a bit of space compared to adding all of the
28885 declarations to the tree, even if we didn't use them. */
28886
28887 static void
28888 ix86_add_new_builtins (HOST_WIDE_INT isa)
28889 {
28890 int i;
28891
28892 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28893 {
28894 if ((ix86_builtins_isa[i].isa & isa) != 0
28895 && ix86_builtins_isa[i].set_and_not_built_p)
28896 {
28897 tree decl, type;
28898
28899 /* Don't define the builtin again. */
28900 ix86_builtins_isa[i].set_and_not_built_p = false;
28901
28902 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28903 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28904 type, i, BUILT_IN_MD, NULL,
28905 NULL_TREE);
28906
28907 ix86_builtins[i] = decl;
28908 if (ix86_builtins_isa[i].const_p)
28909 TREE_READONLY (decl) = 1;
28910 }
28911 }
28912 }
28913
28914 /* Bits for builtin_description.flag. */
28915
28916 /* Set when we don't support the comparison natively, and should
28917 swap_comparison in order to support it. */
28918 #define BUILTIN_DESC_SWAP_OPERANDS 1
28919
28920 struct builtin_description
28921 {
28922 const HOST_WIDE_INT mask;
28923 const enum insn_code icode;
28924 const char *const name;
28925 const enum ix86_builtins code;
28926 const enum rtx_code comparison;
28927 const int flag;
28928 };
28929
28930 static const struct builtin_description bdesc_comi[] =
28931 {
28932 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28933 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28934 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28935 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28936 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28937 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28938 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28939 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28940 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28942 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28943 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28944 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28945 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28956 };
28957
28958 static const struct builtin_description bdesc_pcmpestr[] =
28959 {
28960 /* SSE4.2 */
28961 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28962 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28963 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28964 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28965 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28966 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28967 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28968 };
28969
28970 static const struct builtin_description bdesc_pcmpistr[] =
28971 {
28972 /* SSE4.2 */
28973 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28974 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28975 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28976 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28977 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28978 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28979 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28980 };
28981
28982 /* Special builtins with variable number of arguments. */
28983 static const struct builtin_description bdesc_special_args[] =
28984 {
28985 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28986 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28987 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28988
28989 /* 80387 (for use internally for atomic compound assignment). */
28990 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28991 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28992 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
28993 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28994
28995 /* MMX */
28996 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28997
28998 /* 3DNow! */
28999 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29000
29001 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29002 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29003 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29004 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29005 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29006 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29007 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29008 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29009 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29010
29011 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29012 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29013 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29014 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29015 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29016 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29017 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29018 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29019
29020 /* SSE */
29021 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29022 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29023 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29024
29025 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29026 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29027 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29028 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29029
29030 /* SSE or 3DNow!A */
29031 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29032 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29033
29034 /* SSE2 */
29035 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29036 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29040 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29042 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29043 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29044 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29045
29046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29048
29049 /* SSE3 */
29050 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29051
29052 /* SSE4.1 */
29053 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29054
29055 /* SSE4A */
29056 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29057 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29058
29059 /* AVX */
29060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29062
29063 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29064 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29065 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29068
29069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29074 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29076
29077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29078 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29080
29081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29084 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29085 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29086 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29087 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29088 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29089
29090 /* AVX2 */
29091 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29092 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29093 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29094 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29095 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29096 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29097 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29098 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29099 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29100
29101 /* AVX512F */
29102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29149
29150 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29151 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29152 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29153 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29154 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29155 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29156
29157 /* FSGSBASE */
29158 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29159 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29160 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29161 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29162 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29163 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29164 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29165 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29166
29167 /* RTM */
29168 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29169 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29170 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29171 };
29172
29173 /* Builtins with variable number of arguments. */
29174 static const struct builtin_description bdesc_args[] =
29175 {
29176 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29177 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29178 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29179 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29180 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29181 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29182 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29183
29184 /* MMX */
29185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29191
29192 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29200
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29203
29204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29208
29209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29215
29216 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29218 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29219 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29220 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29221 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29222
29223 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29224 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29225 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29226
29227 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29228
29229 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29230 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29231 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29232 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29233 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29234 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29235
29236 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29237 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29238 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29239 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29240 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29241 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29242
29243 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29244 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29245 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29247
29248 /* 3DNow! */
29249 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29250 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29251 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29252 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29253
29254 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29255 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29256 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29257 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29258 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29259 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29260 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29261 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29262 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29263 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29264 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29265 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29266 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29267 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29268 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29269
29270 /* 3DNow!A */
29271 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29272 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29273 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29274 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29275 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29276 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29277
29278 /* SSE */
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29280 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29285 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29287 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29290 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29291
29292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29293
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29302
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29313 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29314 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29315 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29316 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29317 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29318 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29319 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29320 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29321 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29322 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29323
29324 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29325 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29326 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29327 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29328
29329 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29330 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29331 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29332 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29333
29334 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29335
29336 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29337 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29338 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29339 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29340 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29341
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29344 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29345
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29347
29348 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29351
29352 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29354
29355 /* SSE MMX or 3Dnow!A */
29356 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29357 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29358 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29359
29360 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29361 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29362 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29363 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29364
29365 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29366 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29367
29368 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29369
29370 /* SSE2 */
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29372
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29378
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29384
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29386
29387 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29389 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29390 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29391
29392 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29394 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29395
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29398 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29404
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29425
29426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29430
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29435
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29437
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29441
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29443
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29450 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29452
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29461
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29464
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29469
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29472
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29479
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29484
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29493
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29497
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29500
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29502 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29503
29504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29505
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29507 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29510
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29513 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29518
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29522 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29525 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29526
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29530 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29531
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29535
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29537
29538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29539
29540 /* SSE2 MMX */
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29542 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29543
29544 /* SSE3 */
29545 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29546 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29547
29548 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29549 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29550 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29551 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29552 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29553 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29554
29555 /* SSSE3 */
29556 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29557 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29558 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29559 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29560 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29561 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29562
29563 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29564 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29565 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29566 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29567 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29568 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29569 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29570 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29571 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29572 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29573 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29574 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29575 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29576 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29577 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29578 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29579 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29580 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29581 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29582 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29583 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29584 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29585 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29586 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29587
29588 /* SSSE3. */
29589 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29590 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29591
29592 /* SSE4.1 */
29593 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29595 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29596 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29597 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29598 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29599 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29600 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29601 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29602 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29603
29604 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29606 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29607 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29608 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29609 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29610 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29611 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29612 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29613 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29615 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29616 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29617
29618 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29619 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29620 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29621 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29622 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29623 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29624 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29625 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29626 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29627 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29628 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29629 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29630
29631 /* SSE4.1 */
29632 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29633 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29634 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29635 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29636
29637 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29638 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29639 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29640 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29641
29642 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29643 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29644
29645 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29646 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29647
29648 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29649 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29650 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29651 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29652
29653 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29654 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29655
29656 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29657 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29658
29659 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29660 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29661 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29662
29663 /* SSE4.2 */
29664 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29665 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29666 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29667 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29668 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29669
29670 /* SSE4A */
29671 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29672 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29673 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29674 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29675
29676 /* AES */
29677 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29678 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29679
29680 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29681 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29682 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29683 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29684
29685 /* PCLMUL */
29686 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29687
29688 /* AVX */
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29712 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29715
29716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29720
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29724 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29727 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29732 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29735 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29738 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29743 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29746 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29749 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29755
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29759
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29765
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29767
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29770
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29775
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29778
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29781
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29786
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29789
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29792
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29797
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29804
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29816 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29820
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29823
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29826
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29828
29829 /* AVX2 */
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29976
29977 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29978
29979 /* BMI */
29980 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29981 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29982 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29983
29984 /* TBM */
29985 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29986 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29987
29988 /* F16C */
29989 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29990 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29991 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29992 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29993
29994 /* BMI2 */
29995 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29996 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29997 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29998 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29999 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30000 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30001
30002 /* AVX512F */
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30009 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30010 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30012 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30013 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30058 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30059 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30061 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30122 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30170 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30171 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30172 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30173 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30200
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30205 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30209
30210 /* Mask arithmetic operations */
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30221
30222 /* SHA */
30223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30230 };
30231
30232 /* Builtins with rounding support. */
30233 static const struct builtin_description bdesc_round_args[] =
30234 {
30235 /* AVX512F */
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30255 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30257 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30264 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30266 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30271 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30316 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30318 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30320 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30322 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30324 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30325 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30326 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30327 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30328 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30330 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30355
30356 /* AVX512ER */
30357 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30358 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30359 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30360 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30361 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30362 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30363 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30364 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30365 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30366 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30367 };
30368
30369 /* FMA4 and XOP. */
30370 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30371 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30372 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30373 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30374 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30375 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30376 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30377 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30378 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30379 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30380 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30381 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30382 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30383 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30384 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30385 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30386 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30387 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30388 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30389 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30390 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30391 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30392 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30393 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30394 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30395 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30396 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30397 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30398 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30399 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30400 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30401 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30402 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30403 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30404 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30405 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30406 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30407 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30408 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30409 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30410 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30411 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30412 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30413 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30414 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30415 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30416 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30417 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30418 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30419 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30420 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30421 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30422
30423 static const struct builtin_description bdesc_multi_arg[] =
30424 {
30425 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30426 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30427 UNKNOWN, (int)MULTI_ARG_3_SF },
30428 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30429 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30430 UNKNOWN, (int)MULTI_ARG_3_DF },
30431
30432 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30433 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30434 UNKNOWN, (int)MULTI_ARG_3_SF },
30435 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30436 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30437 UNKNOWN, (int)MULTI_ARG_3_DF },
30438
30439 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30440 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30441 UNKNOWN, (int)MULTI_ARG_3_SF },
30442 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30443 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30444 UNKNOWN, (int)MULTI_ARG_3_DF },
30445 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30446 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30447 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30448 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30449 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30450 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30451
30452 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30453 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30454 UNKNOWN, (int)MULTI_ARG_3_SF },
30455 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30456 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30457 UNKNOWN, (int)MULTI_ARG_3_DF },
30458 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30459 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30460 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30461 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30462 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30463 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30464
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30472
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30480
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30482
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30495
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30512
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30519
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30535
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30543
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30551
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30559
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30567
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30573 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30575
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30580 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30583
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30591
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30596 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30599
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30604 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30608
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30612 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30617
30618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30620 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30622
30623 };
30624 \f
30625 /* TM vector builtins. */
30626
30627 /* Reuse the existing x86-specific `struct builtin_description' cause
30628 we're lazy. Add casts to make them fit. */
30629 static const struct builtin_description bdesc_tm[] =
30630 {
30631 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30632 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30633 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30634 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30635 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30636 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30637 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30638
30639 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30640 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30641 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30642 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30643 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30644 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30645 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30646
30647 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30648 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30649 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30650 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30651 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30652 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30653 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30654
30655 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30656 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30657 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30658 };
30659
30660 /* TM callbacks. */
30661
30662 /* Return the builtin decl needed to load a vector of TYPE. */
30663
30664 static tree
30665 ix86_builtin_tm_load (tree type)
30666 {
30667 if (TREE_CODE (type) == VECTOR_TYPE)
30668 {
30669 switch (tree_to_uhwi (TYPE_SIZE (type)))
30670 {
30671 case 64:
30672 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30673 case 128:
30674 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30675 case 256:
30676 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30677 }
30678 }
30679 return NULL_TREE;
30680 }
30681
30682 /* Return the builtin decl needed to store a vector of TYPE. */
30683
30684 static tree
30685 ix86_builtin_tm_store (tree type)
30686 {
30687 if (TREE_CODE (type) == VECTOR_TYPE)
30688 {
30689 switch (tree_to_uhwi (TYPE_SIZE (type)))
30690 {
30691 case 64:
30692 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30693 case 128:
30694 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30695 case 256:
30696 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30697 }
30698 }
30699 return NULL_TREE;
30700 }
30701 \f
30702 /* Initialize the transactional memory vector load/store builtins. */
30703
30704 static void
30705 ix86_init_tm_builtins (void)
30706 {
30707 enum ix86_builtin_func_type ftype;
30708 const struct builtin_description *d;
30709 size_t i;
30710 tree decl;
30711 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30712 tree attrs_log, attrs_type_log;
30713
30714 if (!flag_tm)
30715 return;
30716
30717 /* If there are no builtins defined, we must be compiling in a
30718 language without trans-mem support. */
30719 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30720 return;
30721
30722 /* Use whatever attributes a normal TM load has. */
30723 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30724 attrs_load = DECL_ATTRIBUTES (decl);
30725 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30726 /* Use whatever attributes a normal TM store has. */
30727 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30728 attrs_store = DECL_ATTRIBUTES (decl);
30729 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30730 /* Use whatever attributes a normal TM log has. */
30731 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30732 attrs_log = DECL_ATTRIBUTES (decl);
30733 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30734
30735 for (i = 0, d = bdesc_tm;
30736 i < ARRAY_SIZE (bdesc_tm);
30737 i++, d++)
30738 {
30739 if ((d->mask & ix86_isa_flags) != 0
30740 || (lang_hooks.builtin_function
30741 == lang_hooks.builtin_function_ext_scope))
30742 {
30743 tree type, attrs, attrs_type;
30744 enum built_in_function code = (enum built_in_function) d->code;
30745
30746 ftype = (enum ix86_builtin_func_type) d->flag;
30747 type = ix86_get_builtin_func_type (ftype);
30748
30749 if (BUILTIN_TM_LOAD_P (code))
30750 {
30751 attrs = attrs_load;
30752 attrs_type = attrs_type_load;
30753 }
30754 else if (BUILTIN_TM_STORE_P (code))
30755 {
30756 attrs = attrs_store;
30757 attrs_type = attrs_type_store;
30758 }
30759 else
30760 {
30761 attrs = attrs_log;
30762 attrs_type = attrs_type_log;
30763 }
30764 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30765 /* The builtin without the prefix for
30766 calling it directly. */
30767 d->name + strlen ("__builtin_"),
30768 attrs);
30769 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30770 set the TYPE_ATTRIBUTES. */
30771 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30772
30773 set_builtin_decl (code, decl, false);
30774 }
30775 }
30776 }
30777
30778 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30779 in the current target ISA to allow the user to compile particular modules
30780 with different target specific options that differ from the command line
30781 options. */
30782 static void
30783 ix86_init_mmx_sse_builtins (void)
30784 {
30785 const struct builtin_description * d;
30786 enum ix86_builtin_func_type ftype;
30787 size_t i;
30788
30789 /* Add all special builtins with variable number of operands. */
30790 for (i = 0, d = bdesc_special_args;
30791 i < ARRAY_SIZE (bdesc_special_args);
30792 i++, d++)
30793 {
30794 if (d->name == 0)
30795 continue;
30796
30797 ftype = (enum ix86_builtin_func_type) d->flag;
30798 def_builtin (d->mask, d->name, ftype, d->code);
30799 }
30800
30801 /* Add all builtins with variable number of operands. */
30802 for (i = 0, d = bdesc_args;
30803 i < ARRAY_SIZE (bdesc_args);
30804 i++, d++)
30805 {
30806 if (d->name == 0)
30807 continue;
30808
30809 ftype = (enum ix86_builtin_func_type) d->flag;
30810 def_builtin_const (d->mask, d->name, ftype, d->code);
30811 }
30812
30813 /* Add all builtins with rounding. */
30814 for (i = 0, d = bdesc_round_args;
30815 i < ARRAY_SIZE (bdesc_round_args);
30816 i++, d++)
30817 {
30818 if (d->name == 0)
30819 continue;
30820
30821 ftype = (enum ix86_builtin_func_type) d->flag;
30822 def_builtin_const (d->mask, d->name, ftype, d->code);
30823 }
30824
30825 /* pcmpestr[im] insns. */
30826 for (i = 0, d = bdesc_pcmpestr;
30827 i < ARRAY_SIZE (bdesc_pcmpestr);
30828 i++, d++)
30829 {
30830 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30831 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30832 else
30833 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30834 def_builtin_const (d->mask, d->name, ftype, d->code);
30835 }
30836
30837 /* pcmpistr[im] insns. */
30838 for (i = 0, d = bdesc_pcmpistr;
30839 i < ARRAY_SIZE (bdesc_pcmpistr);
30840 i++, d++)
30841 {
30842 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30843 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30844 else
30845 ftype = INT_FTYPE_V16QI_V16QI_INT;
30846 def_builtin_const (d->mask, d->name, ftype, d->code);
30847 }
30848
30849 /* comi/ucomi insns. */
30850 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30851 {
30852 if (d->mask == OPTION_MASK_ISA_SSE2)
30853 ftype = INT_FTYPE_V2DF_V2DF;
30854 else
30855 ftype = INT_FTYPE_V4SF_V4SF;
30856 def_builtin_const (d->mask, d->name, ftype, d->code);
30857 }
30858
30859 /* SSE */
30860 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30861 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30862 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30863 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30864
30865 /* SSE or 3DNow!A */
30866 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30867 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30868 IX86_BUILTIN_MASKMOVQ);
30869
30870 /* SSE2 */
30871 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30872 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30873
30874 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30875 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30876 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30877 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30878
30879 /* SSE3. */
30880 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30881 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30882 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30883 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30884
30885 /* AES */
30886 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30887 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30888 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30889 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30890 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30891 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30892 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30893 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30894 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30895 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30896 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30897 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30898
30899 /* PCLMUL */
30900 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30901 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30902
30903 /* RDRND */
30904 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30905 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30906 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30907 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30908 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30909 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30910 IX86_BUILTIN_RDRAND64_STEP);
30911
30912 /* AVX2 */
30913 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30914 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30915 IX86_BUILTIN_GATHERSIV2DF);
30916
30917 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30918 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30919 IX86_BUILTIN_GATHERSIV4DF);
30920
30921 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30922 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30923 IX86_BUILTIN_GATHERDIV2DF);
30924
30925 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30926 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30927 IX86_BUILTIN_GATHERDIV4DF);
30928
30929 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30930 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30931 IX86_BUILTIN_GATHERSIV4SF);
30932
30933 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30934 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30935 IX86_BUILTIN_GATHERSIV8SF);
30936
30937 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30938 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30939 IX86_BUILTIN_GATHERDIV4SF);
30940
30941 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30942 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30943 IX86_BUILTIN_GATHERDIV8SF);
30944
30945 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30946 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30947 IX86_BUILTIN_GATHERSIV2DI);
30948
30949 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30950 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30951 IX86_BUILTIN_GATHERSIV4DI);
30952
30953 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30954 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30955 IX86_BUILTIN_GATHERDIV2DI);
30956
30957 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30958 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30959 IX86_BUILTIN_GATHERDIV4DI);
30960
30961 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30962 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30963 IX86_BUILTIN_GATHERSIV4SI);
30964
30965 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30966 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30967 IX86_BUILTIN_GATHERSIV8SI);
30968
30969 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30970 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30971 IX86_BUILTIN_GATHERDIV4SI);
30972
30973 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30974 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30975 IX86_BUILTIN_GATHERDIV8SI);
30976
30977 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30978 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30979 IX86_BUILTIN_GATHERALTSIV4DF);
30980
30981 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30982 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30983 IX86_BUILTIN_GATHERALTDIV8SF);
30984
30985 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30986 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30987 IX86_BUILTIN_GATHERALTSIV4DI);
30988
30989 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30990 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30991 IX86_BUILTIN_GATHERALTDIV8SI);
30992
30993 /* AVX512F */
30994 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30995 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30996 IX86_BUILTIN_GATHER3SIV16SF);
30997
30998 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30999 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31000 IX86_BUILTIN_GATHER3SIV8DF);
31001
31002 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31003 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31004 IX86_BUILTIN_GATHER3DIV16SF);
31005
31006 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31007 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31008 IX86_BUILTIN_GATHER3DIV8DF);
31009
31010 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31011 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31012 IX86_BUILTIN_GATHER3SIV16SI);
31013
31014 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31015 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31016 IX86_BUILTIN_GATHER3SIV8DI);
31017
31018 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31019 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31020 IX86_BUILTIN_GATHER3DIV16SI);
31021
31022 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31023 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31024 IX86_BUILTIN_GATHER3DIV8DI);
31025
31026 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31027 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31028 IX86_BUILTIN_GATHER3ALTSIV8DF);
31029
31030 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31031 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31032 IX86_BUILTIN_GATHER3ALTDIV16SF);
31033
31034 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31035 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31036 IX86_BUILTIN_GATHER3ALTSIV8DI);
31037
31038 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31039 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31040 IX86_BUILTIN_GATHER3ALTDIV16SI);
31041
31042 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31043 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31044 IX86_BUILTIN_SCATTERSIV16SF);
31045
31046 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31047 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31048 IX86_BUILTIN_SCATTERSIV8DF);
31049
31050 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31051 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31052 IX86_BUILTIN_SCATTERDIV16SF);
31053
31054 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31055 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31056 IX86_BUILTIN_SCATTERDIV8DF);
31057
31058 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31059 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31060 IX86_BUILTIN_SCATTERSIV16SI);
31061
31062 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31063 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31064 IX86_BUILTIN_SCATTERSIV8DI);
31065
31066 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31067 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31068 IX86_BUILTIN_SCATTERDIV16SI);
31069
31070 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31071 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31072 IX86_BUILTIN_SCATTERDIV8DI);
31073
31074 /* AVX512PF */
31075 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31076 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31077 IX86_BUILTIN_GATHERPFDPD);
31078 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31079 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31080 IX86_BUILTIN_GATHERPFDPS);
31081 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31082 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31083 IX86_BUILTIN_GATHERPFQPD);
31084 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31085 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31086 IX86_BUILTIN_GATHERPFQPS);
31087 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31088 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31089 IX86_BUILTIN_SCATTERPFDPD);
31090 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31091 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31092 IX86_BUILTIN_SCATTERPFDPS);
31093 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31094 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31095 IX86_BUILTIN_SCATTERPFQPD);
31096 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31097 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31098 IX86_BUILTIN_SCATTERPFQPS);
31099
31100 /* SHA */
31101 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31102 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31103 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31104 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31105 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31106 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31107 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31108 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31109 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31110 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31111 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31112 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31113 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31114 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31115
31116 /* RTM. */
31117 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31118 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31119
31120 /* MMX access to the vec_init patterns. */
31121 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31122 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31123
31124 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31125 V4HI_FTYPE_HI_HI_HI_HI,
31126 IX86_BUILTIN_VEC_INIT_V4HI);
31127
31128 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31129 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31130 IX86_BUILTIN_VEC_INIT_V8QI);
31131
31132 /* Access to the vec_extract patterns. */
31133 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31134 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31135 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31136 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31137 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31138 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31139 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31140 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31141 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31142 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31143
31144 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31145 "__builtin_ia32_vec_ext_v4hi",
31146 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31147
31148 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31149 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31150
31151 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31152 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31153
31154 /* Access to the vec_set patterns. */
31155 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31156 "__builtin_ia32_vec_set_v2di",
31157 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31158
31159 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31160 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31161
31162 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31163 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31164
31165 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31166 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31167
31168 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31169 "__builtin_ia32_vec_set_v4hi",
31170 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31171
31172 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31173 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31174
31175 /* RDSEED */
31176 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31177 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31178 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31179 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31180 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31181 "__builtin_ia32_rdseed_di_step",
31182 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31183
31184 /* ADCX */
31185 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31186 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31187 def_builtin (OPTION_MASK_ISA_64BIT,
31188 "__builtin_ia32_addcarryx_u64",
31189 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31190 IX86_BUILTIN_ADDCARRYX64);
31191
31192 /* SBB */
31193 def_builtin (0, "__builtin_ia32_sbb_u32",
31194 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31195 def_builtin (OPTION_MASK_ISA_64BIT,
31196 "__builtin_ia32_sbb_u64",
31197 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31198 IX86_BUILTIN_SBB64);
31199
31200 /* Read/write FLAGS. */
31201 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31202 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31203 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31204 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31205 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31206 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31207 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31208 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31209
31210 /* CLFLUSHOPT. */
31211 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31212 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31213
31214 /* Add FMA4 multi-arg argument instructions */
31215 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31216 {
31217 if (d->name == 0)
31218 continue;
31219
31220 ftype = (enum ix86_builtin_func_type) d->flag;
31221 def_builtin_const (d->mask, d->name, ftype, d->code);
31222 }
31223 }
31224
31225 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31226 to return a pointer to VERSION_DECL if the outcome of the expression
31227 formed by PREDICATE_CHAIN is true. This function will be called during
31228 version dispatch to decide which function version to execute. It returns
31229 the basic block at the end, to which more conditions can be added. */
31230
31231 static basic_block
31232 add_condition_to_bb (tree function_decl, tree version_decl,
31233 tree predicate_chain, basic_block new_bb)
31234 {
31235 gimple return_stmt;
31236 tree convert_expr, result_var;
31237 gimple convert_stmt;
31238 gimple call_cond_stmt;
31239 gimple if_else_stmt;
31240
31241 basic_block bb1, bb2, bb3;
31242 edge e12, e23;
31243
31244 tree cond_var, and_expr_var = NULL_TREE;
31245 gimple_seq gseq;
31246
31247 tree predicate_decl, predicate_arg;
31248
31249 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31250
31251 gcc_assert (new_bb != NULL);
31252 gseq = bb_seq (new_bb);
31253
31254
31255 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31256 build_fold_addr_expr (version_decl));
31257 result_var = create_tmp_var (ptr_type_node, NULL);
31258 convert_stmt = gimple_build_assign (result_var, convert_expr);
31259 return_stmt = gimple_build_return (result_var);
31260
31261 if (predicate_chain == NULL_TREE)
31262 {
31263 gimple_seq_add_stmt (&gseq, convert_stmt);
31264 gimple_seq_add_stmt (&gseq, return_stmt);
31265 set_bb_seq (new_bb, gseq);
31266 gimple_set_bb (convert_stmt, new_bb);
31267 gimple_set_bb (return_stmt, new_bb);
31268 pop_cfun ();
31269 return new_bb;
31270 }
31271
31272 while (predicate_chain != NULL)
31273 {
31274 cond_var = create_tmp_var (integer_type_node, NULL);
31275 predicate_decl = TREE_PURPOSE (predicate_chain);
31276 predicate_arg = TREE_VALUE (predicate_chain);
31277 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31278 gimple_call_set_lhs (call_cond_stmt, cond_var);
31279
31280 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31281 gimple_set_bb (call_cond_stmt, new_bb);
31282 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31283
31284 predicate_chain = TREE_CHAIN (predicate_chain);
31285
31286 if (and_expr_var == NULL)
31287 and_expr_var = cond_var;
31288 else
31289 {
31290 gimple assign_stmt;
31291 /* Use MIN_EXPR to check if any integer is zero?.
31292 and_expr_var = min_expr <cond_var, and_expr_var> */
31293 assign_stmt = gimple_build_assign (and_expr_var,
31294 build2 (MIN_EXPR, integer_type_node,
31295 cond_var, and_expr_var));
31296
31297 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31298 gimple_set_bb (assign_stmt, new_bb);
31299 gimple_seq_add_stmt (&gseq, assign_stmt);
31300 }
31301 }
31302
31303 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31304 integer_zero_node,
31305 NULL_TREE, NULL_TREE);
31306 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31307 gimple_set_bb (if_else_stmt, new_bb);
31308 gimple_seq_add_stmt (&gseq, if_else_stmt);
31309
31310 gimple_seq_add_stmt (&gseq, convert_stmt);
31311 gimple_seq_add_stmt (&gseq, return_stmt);
31312 set_bb_seq (new_bb, gseq);
31313
31314 bb1 = new_bb;
31315 e12 = split_block (bb1, if_else_stmt);
31316 bb2 = e12->dest;
31317 e12->flags &= ~EDGE_FALLTHRU;
31318 e12->flags |= EDGE_TRUE_VALUE;
31319
31320 e23 = split_block (bb2, return_stmt);
31321
31322 gimple_set_bb (convert_stmt, bb2);
31323 gimple_set_bb (return_stmt, bb2);
31324
31325 bb3 = e23->dest;
31326 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31327
31328 remove_edge (e23);
31329 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31330
31331 pop_cfun ();
31332
31333 return bb3;
31334 }
31335
31336 /* This parses the attribute arguments to target in DECL and determines
31337 the right builtin to use to match the platform specification.
31338 It returns the priority value for this version decl. If PREDICATE_LIST
31339 is not NULL, it stores the list of cpu features that need to be checked
31340 before dispatching this function. */
31341
31342 static unsigned int
31343 get_builtin_code_for_version (tree decl, tree *predicate_list)
31344 {
31345 tree attrs;
31346 struct cl_target_option cur_target;
31347 tree target_node;
31348 struct cl_target_option *new_target;
31349 const char *arg_str = NULL;
31350 const char *attrs_str = NULL;
31351 char *tok_str = NULL;
31352 char *token;
31353
31354 /* Priority of i386 features, greater value is higher priority. This is
31355 used to decide the order in which function dispatch must happen. For
31356 instance, a version specialized for SSE4.2 should be checked for dispatch
31357 before a version for SSE3, as SSE4.2 implies SSE3. */
31358 enum feature_priority
31359 {
31360 P_ZERO = 0,
31361 P_MMX,
31362 P_SSE,
31363 P_SSE2,
31364 P_SSE3,
31365 P_SSSE3,
31366 P_PROC_SSSE3,
31367 P_SSE4_A,
31368 P_PROC_SSE4_A,
31369 P_SSE4_1,
31370 P_SSE4_2,
31371 P_PROC_SSE4_2,
31372 P_POPCNT,
31373 P_AVX,
31374 P_PROC_AVX,
31375 P_FMA4,
31376 P_XOP,
31377 P_PROC_XOP,
31378 P_FMA,
31379 P_PROC_FMA,
31380 P_AVX2,
31381 P_PROC_AVX2
31382 };
31383
31384 enum feature_priority priority = P_ZERO;
31385
31386 /* These are the target attribute strings for which a dispatcher is
31387 available, from fold_builtin_cpu. */
31388
31389 static struct _feature_list
31390 {
31391 const char *const name;
31392 const enum feature_priority priority;
31393 }
31394 const feature_list[] =
31395 {
31396 {"mmx", P_MMX},
31397 {"sse", P_SSE},
31398 {"sse2", P_SSE2},
31399 {"sse3", P_SSE3},
31400 {"sse4a", P_SSE4_A},
31401 {"ssse3", P_SSSE3},
31402 {"sse4.1", P_SSE4_1},
31403 {"sse4.2", P_SSE4_2},
31404 {"popcnt", P_POPCNT},
31405 {"avx", P_AVX},
31406 {"fma4", P_FMA4},
31407 {"xop", P_XOP},
31408 {"fma", P_FMA},
31409 {"avx2", P_AVX2}
31410 };
31411
31412
31413 static unsigned int NUM_FEATURES
31414 = sizeof (feature_list) / sizeof (struct _feature_list);
31415
31416 unsigned int i;
31417
31418 tree predicate_chain = NULL_TREE;
31419 tree predicate_decl, predicate_arg;
31420
31421 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31422 gcc_assert (attrs != NULL);
31423
31424 attrs = TREE_VALUE (TREE_VALUE (attrs));
31425
31426 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31427 attrs_str = TREE_STRING_POINTER (attrs);
31428
31429 /* Return priority zero for default function. */
31430 if (strcmp (attrs_str, "default") == 0)
31431 return 0;
31432
31433 /* Handle arch= if specified. For priority, set it to be 1 more than
31434 the best instruction set the processor can handle. For instance, if
31435 there is a version for atom and a version for ssse3 (the highest ISA
31436 priority for atom), the atom version must be checked for dispatch
31437 before the ssse3 version. */
31438 if (strstr (attrs_str, "arch=") != NULL)
31439 {
31440 cl_target_option_save (&cur_target, &global_options);
31441 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31442 &global_options_set);
31443
31444 gcc_assert (target_node);
31445 new_target = TREE_TARGET_OPTION (target_node);
31446 gcc_assert (new_target);
31447
31448 if (new_target->arch_specified && new_target->arch > 0)
31449 {
31450 switch (new_target->arch)
31451 {
31452 case PROCESSOR_CORE2:
31453 arg_str = "core2";
31454 priority = P_PROC_SSSE3;
31455 break;
31456 case PROCESSOR_NEHALEM:
31457 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31458 arg_str = "westmere";
31459 else
31460 /* We translate "arch=corei7" and "arch=nehalem" to
31461 "corei7" so that it will be mapped to M_INTEL_COREI7
31462 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31463 arg_str = "corei7";
31464 priority = P_PROC_SSE4_2;
31465 break;
31466 case PROCESSOR_SANDYBRIDGE:
31467 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31468 arg_str = "ivybridge";
31469 else
31470 arg_str = "sandybridge";
31471 priority = P_PROC_AVX;
31472 break;
31473 case PROCESSOR_HASWELL:
31474 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31475 arg_str = "broadwell";
31476 else
31477 arg_str = "haswell";
31478 priority = P_PROC_AVX2;
31479 break;
31480 case PROCESSOR_BONNELL:
31481 arg_str = "bonnell";
31482 priority = P_PROC_SSSE3;
31483 break;
31484 case PROCESSOR_SILVERMONT:
31485 arg_str = "silvermont";
31486 priority = P_PROC_SSE4_2;
31487 break;
31488 case PROCESSOR_AMDFAM10:
31489 arg_str = "amdfam10h";
31490 priority = P_PROC_SSE4_A;
31491 break;
31492 case PROCESSOR_BTVER1:
31493 arg_str = "btver1";
31494 priority = P_PROC_SSE4_A;
31495 break;
31496 case PROCESSOR_BTVER2:
31497 arg_str = "btver2";
31498 priority = P_PROC_AVX;
31499 break;
31500 case PROCESSOR_BDVER1:
31501 arg_str = "bdver1";
31502 priority = P_PROC_XOP;
31503 break;
31504 case PROCESSOR_BDVER2:
31505 arg_str = "bdver2";
31506 priority = P_PROC_FMA;
31507 break;
31508 case PROCESSOR_BDVER3:
31509 arg_str = "bdver3";
31510 priority = P_PROC_FMA;
31511 break;
31512 case PROCESSOR_BDVER4:
31513 arg_str = "bdver4";
31514 priority = P_PROC_AVX2;
31515 break;
31516 }
31517 }
31518
31519 cl_target_option_restore (&global_options, &cur_target);
31520
31521 if (predicate_list && arg_str == NULL)
31522 {
31523 error_at (DECL_SOURCE_LOCATION (decl),
31524 "No dispatcher found for the versioning attributes");
31525 return 0;
31526 }
31527
31528 if (predicate_list)
31529 {
31530 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31531 /* For a C string literal the length includes the trailing NULL. */
31532 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31533 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31534 predicate_chain);
31535 }
31536 }
31537
31538 /* Process feature name. */
31539 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31540 strcpy (tok_str, attrs_str);
31541 token = strtok (tok_str, ",");
31542 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31543
31544 while (token != NULL)
31545 {
31546 /* Do not process "arch=" */
31547 if (strncmp (token, "arch=", 5) == 0)
31548 {
31549 token = strtok (NULL, ",");
31550 continue;
31551 }
31552 for (i = 0; i < NUM_FEATURES; ++i)
31553 {
31554 if (strcmp (token, feature_list[i].name) == 0)
31555 {
31556 if (predicate_list)
31557 {
31558 predicate_arg = build_string_literal (
31559 strlen (feature_list[i].name) + 1,
31560 feature_list[i].name);
31561 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31562 predicate_chain);
31563 }
31564 /* Find the maximum priority feature. */
31565 if (feature_list[i].priority > priority)
31566 priority = feature_list[i].priority;
31567
31568 break;
31569 }
31570 }
31571 if (predicate_list && i == NUM_FEATURES)
31572 {
31573 error_at (DECL_SOURCE_LOCATION (decl),
31574 "No dispatcher found for %s", token);
31575 return 0;
31576 }
31577 token = strtok (NULL, ",");
31578 }
31579 free (tok_str);
31580
31581 if (predicate_list && predicate_chain == NULL_TREE)
31582 {
31583 error_at (DECL_SOURCE_LOCATION (decl),
31584 "No dispatcher found for the versioning attributes : %s",
31585 attrs_str);
31586 return 0;
31587 }
31588 else if (predicate_list)
31589 {
31590 predicate_chain = nreverse (predicate_chain);
31591 *predicate_list = predicate_chain;
31592 }
31593
31594 return priority;
31595 }
31596
31597 /* This compares the priority of target features in function DECL1
31598 and DECL2. It returns positive value if DECL1 is higher priority,
31599 negative value if DECL2 is higher priority and 0 if they are the
31600 same. */
31601
31602 static int
31603 ix86_compare_version_priority (tree decl1, tree decl2)
31604 {
31605 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31606 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31607
31608 return (int)priority1 - (int)priority2;
31609 }
31610
31611 /* V1 and V2 point to function versions with different priorities
31612 based on the target ISA. This function compares their priorities. */
31613
31614 static int
31615 feature_compare (const void *v1, const void *v2)
31616 {
31617 typedef struct _function_version_info
31618 {
31619 tree version_decl;
31620 tree predicate_chain;
31621 unsigned int dispatch_priority;
31622 } function_version_info;
31623
31624 const function_version_info c1 = *(const function_version_info *)v1;
31625 const function_version_info c2 = *(const function_version_info *)v2;
31626 return (c2.dispatch_priority - c1.dispatch_priority);
31627 }
31628
31629 /* This function generates the dispatch function for
31630 multi-versioned functions. DISPATCH_DECL is the function which will
31631 contain the dispatch logic. FNDECLS are the function choices for
31632 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31633 in DISPATCH_DECL in which the dispatch code is generated. */
31634
31635 static int
31636 dispatch_function_versions (tree dispatch_decl,
31637 void *fndecls_p,
31638 basic_block *empty_bb)
31639 {
31640 tree default_decl;
31641 gimple ifunc_cpu_init_stmt;
31642 gimple_seq gseq;
31643 int ix;
31644 tree ele;
31645 vec<tree> *fndecls;
31646 unsigned int num_versions = 0;
31647 unsigned int actual_versions = 0;
31648 unsigned int i;
31649
31650 struct _function_version_info
31651 {
31652 tree version_decl;
31653 tree predicate_chain;
31654 unsigned int dispatch_priority;
31655 }*function_version_info;
31656
31657 gcc_assert (dispatch_decl != NULL
31658 && fndecls_p != NULL
31659 && empty_bb != NULL);
31660
31661 /*fndecls_p is actually a vector. */
31662 fndecls = static_cast<vec<tree> *> (fndecls_p);
31663
31664 /* At least one more version other than the default. */
31665 num_versions = fndecls->length ();
31666 gcc_assert (num_versions >= 2);
31667
31668 function_version_info = (struct _function_version_info *)
31669 XNEWVEC (struct _function_version_info, (num_versions - 1));
31670
31671 /* The first version in the vector is the default decl. */
31672 default_decl = (*fndecls)[0];
31673
31674 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31675
31676 gseq = bb_seq (*empty_bb);
31677 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31678 constructors, so explicity call __builtin_cpu_init here. */
31679 ifunc_cpu_init_stmt = gimple_build_call_vec (
31680 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31681 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31682 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31683 set_bb_seq (*empty_bb, gseq);
31684
31685 pop_cfun ();
31686
31687
31688 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31689 {
31690 tree version_decl = ele;
31691 tree predicate_chain = NULL_TREE;
31692 unsigned int priority;
31693 /* Get attribute string, parse it and find the right predicate decl.
31694 The predicate function could be a lengthy combination of many
31695 features, like arch-type and various isa-variants. */
31696 priority = get_builtin_code_for_version (version_decl,
31697 &predicate_chain);
31698
31699 if (predicate_chain == NULL_TREE)
31700 continue;
31701
31702 function_version_info [actual_versions].version_decl = version_decl;
31703 function_version_info [actual_versions].predicate_chain
31704 = predicate_chain;
31705 function_version_info [actual_versions].dispatch_priority = priority;
31706 actual_versions++;
31707 }
31708
31709 /* Sort the versions according to descending order of dispatch priority. The
31710 priority is based on the ISA. This is not a perfect solution. There
31711 could still be ambiguity. If more than one function version is suitable
31712 to execute, which one should be dispatched? In future, allow the user
31713 to specify a dispatch priority next to the version. */
31714 qsort (function_version_info, actual_versions,
31715 sizeof (struct _function_version_info), feature_compare);
31716
31717 for (i = 0; i < actual_versions; ++i)
31718 *empty_bb = add_condition_to_bb (dispatch_decl,
31719 function_version_info[i].version_decl,
31720 function_version_info[i].predicate_chain,
31721 *empty_bb);
31722
31723 /* dispatch default version at the end. */
31724 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31725 NULL, *empty_bb);
31726
31727 free (function_version_info);
31728 return 0;
31729 }
31730
31731 /* Comparator function to be used in qsort routine to sort attribute
31732 specification strings to "target". */
31733
31734 static int
31735 attr_strcmp (const void *v1, const void *v2)
31736 {
31737 const char *c1 = *(char *const*)v1;
31738 const char *c2 = *(char *const*)v2;
31739 return strcmp (c1, c2);
31740 }
31741
31742 /* ARGLIST is the argument to target attribute. This function tokenizes
31743 the comma separated arguments, sorts them and returns a string which
31744 is a unique identifier for the comma separated arguments. It also
31745 replaces non-identifier characters "=,-" with "_". */
31746
31747 static char *
31748 sorted_attr_string (tree arglist)
31749 {
31750 tree arg;
31751 size_t str_len_sum = 0;
31752 char **args = NULL;
31753 char *attr_str, *ret_str;
31754 char *attr = NULL;
31755 unsigned int argnum = 1;
31756 unsigned int i;
31757
31758 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31759 {
31760 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31761 size_t len = strlen (str);
31762 str_len_sum += len + 1;
31763 if (arg != arglist)
31764 argnum++;
31765 for (i = 0; i < strlen (str); i++)
31766 if (str[i] == ',')
31767 argnum++;
31768 }
31769
31770 attr_str = XNEWVEC (char, str_len_sum);
31771 str_len_sum = 0;
31772 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31773 {
31774 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31775 size_t len = strlen (str);
31776 memcpy (attr_str + str_len_sum, str, len);
31777 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31778 str_len_sum += len + 1;
31779 }
31780
31781 /* Replace "=,-" with "_". */
31782 for (i = 0; i < strlen (attr_str); i++)
31783 if (attr_str[i] == '=' || attr_str[i]== '-')
31784 attr_str[i] = '_';
31785
31786 if (argnum == 1)
31787 return attr_str;
31788
31789 args = XNEWVEC (char *, argnum);
31790
31791 i = 0;
31792 attr = strtok (attr_str, ",");
31793 while (attr != NULL)
31794 {
31795 args[i] = attr;
31796 i++;
31797 attr = strtok (NULL, ",");
31798 }
31799
31800 qsort (args, argnum, sizeof (char *), attr_strcmp);
31801
31802 ret_str = XNEWVEC (char, str_len_sum);
31803 str_len_sum = 0;
31804 for (i = 0; i < argnum; i++)
31805 {
31806 size_t len = strlen (args[i]);
31807 memcpy (ret_str + str_len_sum, args[i], len);
31808 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31809 str_len_sum += len + 1;
31810 }
31811
31812 XDELETEVEC (args);
31813 XDELETEVEC (attr_str);
31814 return ret_str;
31815 }
31816
31817 /* This function changes the assembler name for functions that are
31818 versions. If DECL is a function version and has a "target"
31819 attribute, it appends the attribute string to its assembler name. */
31820
31821 static tree
31822 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31823 {
31824 tree version_attr;
31825 const char *orig_name, *version_string;
31826 char *attr_str, *assembler_name;
31827
31828 if (DECL_DECLARED_INLINE_P (decl)
31829 && lookup_attribute ("gnu_inline",
31830 DECL_ATTRIBUTES (decl)))
31831 error_at (DECL_SOURCE_LOCATION (decl),
31832 "Function versions cannot be marked as gnu_inline,"
31833 " bodies have to be generated");
31834
31835 if (DECL_VIRTUAL_P (decl)
31836 || DECL_VINDEX (decl))
31837 sorry ("Virtual function multiversioning not supported");
31838
31839 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31840
31841 /* target attribute string cannot be NULL. */
31842 gcc_assert (version_attr != NULL_TREE);
31843
31844 orig_name = IDENTIFIER_POINTER (id);
31845 version_string
31846 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31847
31848 if (strcmp (version_string, "default") == 0)
31849 return id;
31850
31851 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31852 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31853
31854 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31855
31856 /* Allow assembler name to be modified if already set. */
31857 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31858 SET_DECL_RTL (decl, NULL);
31859
31860 tree ret = get_identifier (assembler_name);
31861 XDELETEVEC (attr_str);
31862 XDELETEVEC (assembler_name);
31863 return ret;
31864 }
31865
31866 /* This function returns true if FN1 and FN2 are versions of the same function,
31867 that is, the target strings of the function decls are different. This assumes
31868 that FN1 and FN2 have the same signature. */
31869
31870 static bool
31871 ix86_function_versions (tree fn1, tree fn2)
31872 {
31873 tree attr1, attr2;
31874 char *target1, *target2;
31875 bool result;
31876
31877 if (TREE_CODE (fn1) != FUNCTION_DECL
31878 || TREE_CODE (fn2) != FUNCTION_DECL)
31879 return false;
31880
31881 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31882 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31883
31884 /* At least one function decl should have the target attribute specified. */
31885 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31886 return false;
31887
31888 /* Diagnose missing target attribute if one of the decls is already
31889 multi-versioned. */
31890 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31891 {
31892 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31893 {
31894 if (attr2 != NULL_TREE)
31895 {
31896 tree tem = fn1;
31897 fn1 = fn2;
31898 fn2 = tem;
31899 attr1 = attr2;
31900 }
31901 error_at (DECL_SOURCE_LOCATION (fn2),
31902 "missing %<target%> attribute for multi-versioned %D",
31903 fn2);
31904 inform (DECL_SOURCE_LOCATION (fn1),
31905 "previous declaration of %D", fn1);
31906 /* Prevent diagnosing of the same error multiple times. */
31907 DECL_ATTRIBUTES (fn2)
31908 = tree_cons (get_identifier ("target"),
31909 copy_node (TREE_VALUE (attr1)),
31910 DECL_ATTRIBUTES (fn2));
31911 }
31912 return false;
31913 }
31914
31915 target1 = sorted_attr_string (TREE_VALUE (attr1));
31916 target2 = sorted_attr_string (TREE_VALUE (attr2));
31917
31918 /* The sorted target strings must be different for fn1 and fn2
31919 to be versions. */
31920 if (strcmp (target1, target2) == 0)
31921 result = false;
31922 else
31923 result = true;
31924
31925 XDELETEVEC (target1);
31926 XDELETEVEC (target2);
31927
31928 return result;
31929 }
31930
31931 static tree
31932 ix86_mangle_decl_assembler_name (tree decl, tree id)
31933 {
31934 /* For function version, add the target suffix to the assembler name. */
31935 if (TREE_CODE (decl) == FUNCTION_DECL
31936 && DECL_FUNCTION_VERSIONED (decl))
31937 id = ix86_mangle_function_version_assembler_name (decl, id);
31938 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31939 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31940 #endif
31941
31942 return id;
31943 }
31944
31945 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31946 is true, append the full path name of the source file. */
31947
31948 static char *
31949 make_name (tree decl, const char *suffix, bool make_unique)
31950 {
31951 char *global_var_name;
31952 int name_len;
31953 const char *name;
31954 const char *unique_name = NULL;
31955
31956 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31957
31958 /* Get a unique name that can be used globally without any chances
31959 of collision at link time. */
31960 if (make_unique)
31961 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31962
31963 name_len = strlen (name) + strlen (suffix) + 2;
31964
31965 if (make_unique)
31966 name_len += strlen (unique_name) + 1;
31967 global_var_name = XNEWVEC (char, name_len);
31968
31969 /* Use '.' to concatenate names as it is demangler friendly. */
31970 if (make_unique)
31971 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31972 suffix);
31973 else
31974 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31975
31976 return global_var_name;
31977 }
31978
31979 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31980
31981 /* Make a dispatcher declaration for the multi-versioned function DECL.
31982 Calls to DECL function will be replaced with calls to the dispatcher
31983 by the front-end. Return the decl created. */
31984
31985 static tree
31986 make_dispatcher_decl (const tree decl)
31987 {
31988 tree func_decl;
31989 char *func_name;
31990 tree fn_type, func_type;
31991 bool is_uniq = false;
31992
31993 if (TREE_PUBLIC (decl) == 0)
31994 is_uniq = true;
31995
31996 func_name = make_name (decl, "ifunc", is_uniq);
31997
31998 fn_type = TREE_TYPE (decl);
31999 func_type = build_function_type (TREE_TYPE (fn_type),
32000 TYPE_ARG_TYPES (fn_type));
32001
32002 func_decl = build_fn_decl (func_name, func_type);
32003 XDELETEVEC (func_name);
32004 TREE_USED (func_decl) = 1;
32005 DECL_CONTEXT (func_decl) = NULL_TREE;
32006 DECL_INITIAL (func_decl) = error_mark_node;
32007 DECL_ARTIFICIAL (func_decl) = 1;
32008 /* Mark this func as external, the resolver will flip it again if
32009 it gets generated. */
32010 DECL_EXTERNAL (func_decl) = 1;
32011 /* This will be of type IFUNCs have to be externally visible. */
32012 TREE_PUBLIC (func_decl) = 1;
32013
32014 return func_decl;
32015 }
32016
32017 #endif
32018
32019 /* Returns true if decl is multi-versioned and DECL is the default function,
32020 that is it is not tagged with target specific optimization. */
32021
32022 static bool
32023 is_function_default_version (const tree decl)
32024 {
32025 if (TREE_CODE (decl) != FUNCTION_DECL
32026 || !DECL_FUNCTION_VERSIONED (decl))
32027 return false;
32028 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32029 gcc_assert (attr);
32030 attr = TREE_VALUE (TREE_VALUE (attr));
32031 return (TREE_CODE (attr) == STRING_CST
32032 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32033 }
32034
32035 /* Make a dispatcher declaration for the multi-versioned function DECL.
32036 Calls to DECL function will be replaced with calls to the dispatcher
32037 by the front-end. Returns the decl of the dispatcher function. */
32038
32039 static tree
32040 ix86_get_function_versions_dispatcher (void *decl)
32041 {
32042 tree fn = (tree) decl;
32043 struct cgraph_node *node = NULL;
32044 struct cgraph_node *default_node = NULL;
32045 struct cgraph_function_version_info *node_v = NULL;
32046 struct cgraph_function_version_info *first_v = NULL;
32047
32048 tree dispatch_decl = NULL;
32049
32050 struct cgraph_function_version_info *default_version_info = NULL;
32051
32052 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32053
32054 node = cgraph_node::get (fn);
32055 gcc_assert (node != NULL);
32056
32057 node_v = node->function_version ();
32058 gcc_assert (node_v != NULL);
32059
32060 if (node_v->dispatcher_resolver != NULL)
32061 return node_v->dispatcher_resolver;
32062
32063 /* Find the default version and make it the first node. */
32064 first_v = node_v;
32065 /* Go to the beginning of the chain. */
32066 while (first_v->prev != NULL)
32067 first_v = first_v->prev;
32068 default_version_info = first_v;
32069 while (default_version_info != NULL)
32070 {
32071 if (is_function_default_version
32072 (default_version_info->this_node->decl))
32073 break;
32074 default_version_info = default_version_info->next;
32075 }
32076
32077 /* If there is no default node, just return NULL. */
32078 if (default_version_info == NULL)
32079 return NULL;
32080
32081 /* Make default info the first node. */
32082 if (first_v != default_version_info)
32083 {
32084 default_version_info->prev->next = default_version_info->next;
32085 if (default_version_info->next)
32086 default_version_info->next->prev = default_version_info->prev;
32087 first_v->prev = default_version_info;
32088 default_version_info->next = first_v;
32089 default_version_info->prev = NULL;
32090 }
32091
32092 default_node = default_version_info->this_node;
32093
32094 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32095 if (targetm.has_ifunc_p ())
32096 {
32097 struct cgraph_function_version_info *it_v = NULL;
32098 struct cgraph_node *dispatcher_node = NULL;
32099 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32100
32101 /* Right now, the dispatching is done via ifunc. */
32102 dispatch_decl = make_dispatcher_decl (default_node->decl);
32103
32104 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32105 gcc_assert (dispatcher_node != NULL);
32106 dispatcher_node->dispatcher_function = 1;
32107 dispatcher_version_info
32108 = dispatcher_node->insert_new_function_version ();
32109 dispatcher_version_info->next = default_version_info;
32110 dispatcher_node->definition = 1;
32111
32112 /* Set the dispatcher for all the versions. */
32113 it_v = default_version_info;
32114 while (it_v != NULL)
32115 {
32116 it_v->dispatcher_resolver = dispatch_decl;
32117 it_v = it_v->next;
32118 }
32119 }
32120 else
32121 #endif
32122 {
32123 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32124 "multiversioning needs ifunc which is not supported "
32125 "on this target");
32126 }
32127
32128 return dispatch_decl;
32129 }
32130
32131 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32132 it to CHAIN. */
32133
32134 static tree
32135 make_attribute (const char *name, const char *arg_name, tree chain)
32136 {
32137 tree attr_name;
32138 tree attr_arg_name;
32139 tree attr_args;
32140 tree attr;
32141
32142 attr_name = get_identifier (name);
32143 attr_arg_name = build_string (strlen (arg_name), arg_name);
32144 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32145 attr = tree_cons (attr_name, attr_args, chain);
32146 return attr;
32147 }
32148
32149 /* Make the resolver function decl to dispatch the versions of
32150 a multi-versioned function, DEFAULT_DECL. Create an
32151 empty basic block in the resolver and store the pointer in
32152 EMPTY_BB. Return the decl of the resolver function. */
32153
32154 static tree
32155 make_resolver_func (const tree default_decl,
32156 const tree dispatch_decl,
32157 basic_block *empty_bb)
32158 {
32159 char *resolver_name;
32160 tree decl, type, decl_name, t;
32161 bool is_uniq = false;
32162
32163 /* IFUNC's have to be globally visible. So, if the default_decl is
32164 not, then the name of the IFUNC should be made unique. */
32165 if (TREE_PUBLIC (default_decl) == 0)
32166 is_uniq = true;
32167
32168 /* Append the filename to the resolver function if the versions are
32169 not externally visible. This is because the resolver function has
32170 to be externally visible for the loader to find it. So, appending
32171 the filename will prevent conflicts with a resolver function from
32172 another module which is based on the same version name. */
32173 resolver_name = make_name (default_decl, "resolver", is_uniq);
32174
32175 /* The resolver function should return a (void *). */
32176 type = build_function_type_list (ptr_type_node, NULL_TREE);
32177
32178 decl = build_fn_decl (resolver_name, type);
32179 decl_name = get_identifier (resolver_name);
32180 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32181
32182 DECL_NAME (decl) = decl_name;
32183 TREE_USED (decl) = 1;
32184 DECL_ARTIFICIAL (decl) = 1;
32185 DECL_IGNORED_P (decl) = 0;
32186 /* IFUNC resolvers have to be externally visible. */
32187 TREE_PUBLIC (decl) = 1;
32188 DECL_UNINLINABLE (decl) = 1;
32189
32190 /* Resolver is not external, body is generated. */
32191 DECL_EXTERNAL (decl) = 0;
32192 DECL_EXTERNAL (dispatch_decl) = 0;
32193
32194 DECL_CONTEXT (decl) = NULL_TREE;
32195 DECL_INITIAL (decl) = make_node (BLOCK);
32196 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32197
32198 if (DECL_COMDAT_GROUP (default_decl)
32199 || TREE_PUBLIC (default_decl))
32200 {
32201 /* In this case, each translation unit with a call to this
32202 versioned function will put out a resolver. Ensure it
32203 is comdat to keep just one copy. */
32204 DECL_COMDAT (decl) = 1;
32205 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32206 }
32207 /* Build result decl and add to function_decl. */
32208 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32209 DECL_ARTIFICIAL (t) = 1;
32210 DECL_IGNORED_P (t) = 1;
32211 DECL_RESULT (decl) = t;
32212
32213 gimplify_function_tree (decl);
32214 push_cfun (DECL_STRUCT_FUNCTION (decl));
32215 *empty_bb = init_lowered_empty_function (decl, false);
32216
32217 cgraph_node::add_new_function (decl, true);
32218 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32219
32220 pop_cfun ();
32221
32222 gcc_assert (dispatch_decl != NULL);
32223 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32224 DECL_ATTRIBUTES (dispatch_decl)
32225 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32226
32227 /* Create the alias for dispatch to resolver here. */
32228 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32229 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32230 XDELETEVEC (resolver_name);
32231 return decl;
32232 }
32233
32234 /* Generate the dispatching code body to dispatch multi-versioned function
32235 DECL. The target hook is called to process the "target" attributes and
32236 provide the code to dispatch the right function at run-time. NODE points
32237 to the dispatcher decl whose body will be created. */
32238
32239 static tree
32240 ix86_generate_version_dispatcher_body (void *node_p)
32241 {
32242 tree resolver_decl;
32243 basic_block empty_bb;
32244 tree default_ver_decl;
32245 struct cgraph_node *versn;
32246 struct cgraph_node *node;
32247
32248 struct cgraph_function_version_info *node_version_info = NULL;
32249 struct cgraph_function_version_info *versn_info = NULL;
32250
32251 node = (cgraph_node *)node_p;
32252
32253 node_version_info = node->function_version ();
32254 gcc_assert (node->dispatcher_function
32255 && node_version_info != NULL);
32256
32257 if (node_version_info->dispatcher_resolver)
32258 return node_version_info->dispatcher_resolver;
32259
32260 /* The first version in the chain corresponds to the default version. */
32261 default_ver_decl = node_version_info->next->this_node->decl;
32262
32263 /* node is going to be an alias, so remove the finalized bit. */
32264 node->definition = false;
32265
32266 resolver_decl = make_resolver_func (default_ver_decl,
32267 node->decl, &empty_bb);
32268
32269 node_version_info->dispatcher_resolver = resolver_decl;
32270
32271 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32272
32273 auto_vec<tree, 2> fn_ver_vec;
32274
32275 for (versn_info = node_version_info->next; versn_info;
32276 versn_info = versn_info->next)
32277 {
32278 versn = versn_info->this_node;
32279 /* Check for virtual functions here again, as by this time it should
32280 have been determined if this function needs a vtable index or
32281 not. This happens for methods in derived classes that override
32282 virtual methods in base classes but are not explicitly marked as
32283 virtual. */
32284 if (DECL_VINDEX (versn->decl))
32285 sorry ("Virtual function multiversioning not supported");
32286
32287 fn_ver_vec.safe_push (versn->decl);
32288 }
32289
32290 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32291 cgraph_edge::rebuild_edges ();
32292 pop_cfun ();
32293 return resolver_decl;
32294 }
32295 /* This builds the processor_model struct type defined in
32296 libgcc/config/i386/cpuinfo.c */
32297
32298 static tree
32299 build_processor_model_struct (void)
32300 {
32301 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32302 "__cpu_features"};
32303 tree field = NULL_TREE, field_chain = NULL_TREE;
32304 int i;
32305 tree type = make_node (RECORD_TYPE);
32306
32307 /* The first 3 fields are unsigned int. */
32308 for (i = 0; i < 3; ++i)
32309 {
32310 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32311 get_identifier (field_name[i]), unsigned_type_node);
32312 if (field_chain != NULL_TREE)
32313 DECL_CHAIN (field) = field_chain;
32314 field_chain = field;
32315 }
32316
32317 /* The last field is an array of unsigned integers of size one. */
32318 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32319 get_identifier (field_name[3]),
32320 build_array_type (unsigned_type_node,
32321 build_index_type (size_one_node)));
32322 if (field_chain != NULL_TREE)
32323 DECL_CHAIN (field) = field_chain;
32324 field_chain = field;
32325
32326 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32327 return type;
32328 }
32329
32330 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32331
32332 static tree
32333 make_var_decl (tree type, const char *name)
32334 {
32335 tree new_decl;
32336
32337 new_decl = build_decl (UNKNOWN_LOCATION,
32338 VAR_DECL,
32339 get_identifier(name),
32340 type);
32341
32342 DECL_EXTERNAL (new_decl) = 1;
32343 TREE_STATIC (new_decl) = 1;
32344 TREE_PUBLIC (new_decl) = 1;
32345 DECL_INITIAL (new_decl) = 0;
32346 DECL_ARTIFICIAL (new_decl) = 0;
32347 DECL_PRESERVE_P (new_decl) = 1;
32348
32349 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32350 assemble_variable (new_decl, 0, 0, 0);
32351
32352 return new_decl;
32353 }
32354
32355 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32356 into an integer defined in libgcc/config/i386/cpuinfo.c */
32357
32358 static tree
32359 fold_builtin_cpu (tree fndecl, tree *args)
32360 {
32361 unsigned int i;
32362 enum ix86_builtins fn_code = (enum ix86_builtins)
32363 DECL_FUNCTION_CODE (fndecl);
32364 tree param_string_cst = NULL;
32365
32366 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32367 enum processor_features
32368 {
32369 F_CMOV = 0,
32370 F_MMX,
32371 F_POPCNT,
32372 F_SSE,
32373 F_SSE2,
32374 F_SSE3,
32375 F_SSSE3,
32376 F_SSE4_1,
32377 F_SSE4_2,
32378 F_AVX,
32379 F_AVX2,
32380 F_SSE4_A,
32381 F_FMA4,
32382 F_XOP,
32383 F_FMA,
32384 F_MAX
32385 };
32386
32387 /* These are the values for vendor types and cpu types and subtypes
32388 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32389 the corresponding start value. */
32390 enum processor_model
32391 {
32392 M_INTEL = 1,
32393 M_AMD,
32394 M_CPU_TYPE_START,
32395 M_INTEL_BONNELL,
32396 M_INTEL_CORE2,
32397 M_INTEL_COREI7,
32398 M_AMDFAM10H,
32399 M_AMDFAM15H,
32400 M_INTEL_SILVERMONT,
32401 M_AMD_BTVER1,
32402 M_AMD_BTVER2,
32403 M_CPU_SUBTYPE_START,
32404 M_INTEL_COREI7_NEHALEM,
32405 M_INTEL_COREI7_WESTMERE,
32406 M_INTEL_COREI7_SANDYBRIDGE,
32407 M_AMDFAM10H_BARCELONA,
32408 M_AMDFAM10H_SHANGHAI,
32409 M_AMDFAM10H_ISTANBUL,
32410 M_AMDFAM15H_BDVER1,
32411 M_AMDFAM15H_BDVER2,
32412 M_AMDFAM15H_BDVER3,
32413 M_AMDFAM15H_BDVER4,
32414 M_INTEL_COREI7_IVYBRIDGE,
32415 M_INTEL_COREI7_HASWELL
32416 };
32417
32418 static struct _arch_names_table
32419 {
32420 const char *const name;
32421 const enum processor_model model;
32422 }
32423 const arch_names_table[] =
32424 {
32425 {"amd", M_AMD},
32426 {"intel", M_INTEL},
32427 {"atom", M_INTEL_BONNELL},
32428 {"slm", M_INTEL_SILVERMONT},
32429 {"core2", M_INTEL_CORE2},
32430 {"corei7", M_INTEL_COREI7},
32431 {"nehalem", M_INTEL_COREI7_NEHALEM},
32432 {"westmere", M_INTEL_COREI7_WESTMERE},
32433 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32434 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32435 {"haswell", M_INTEL_COREI7_HASWELL},
32436 {"bonnell", M_INTEL_BONNELL},
32437 {"silvermont", M_INTEL_SILVERMONT},
32438 {"amdfam10h", M_AMDFAM10H},
32439 {"barcelona", M_AMDFAM10H_BARCELONA},
32440 {"shanghai", M_AMDFAM10H_SHANGHAI},
32441 {"istanbul", M_AMDFAM10H_ISTANBUL},
32442 {"btver1", M_AMD_BTVER1},
32443 {"amdfam15h", M_AMDFAM15H},
32444 {"bdver1", M_AMDFAM15H_BDVER1},
32445 {"bdver2", M_AMDFAM15H_BDVER2},
32446 {"bdver3", M_AMDFAM15H_BDVER3},
32447 {"bdver4", M_AMDFAM15H_BDVER4},
32448 {"btver2", M_AMD_BTVER2},
32449 };
32450
32451 static struct _isa_names_table
32452 {
32453 const char *const name;
32454 const enum processor_features feature;
32455 }
32456 const isa_names_table[] =
32457 {
32458 {"cmov", F_CMOV},
32459 {"mmx", F_MMX},
32460 {"popcnt", F_POPCNT},
32461 {"sse", F_SSE},
32462 {"sse2", F_SSE2},
32463 {"sse3", F_SSE3},
32464 {"ssse3", F_SSSE3},
32465 {"sse4a", F_SSE4_A},
32466 {"sse4.1", F_SSE4_1},
32467 {"sse4.2", F_SSE4_2},
32468 {"avx", F_AVX},
32469 {"fma4", F_FMA4},
32470 {"xop", F_XOP},
32471 {"fma", F_FMA},
32472 {"avx2", F_AVX2}
32473 };
32474
32475 tree __processor_model_type = build_processor_model_struct ();
32476 tree __cpu_model_var = make_var_decl (__processor_model_type,
32477 "__cpu_model");
32478
32479
32480 varpool_node::add (__cpu_model_var);
32481
32482 gcc_assert ((args != NULL) && (*args != NULL));
32483
32484 param_string_cst = *args;
32485 while (param_string_cst
32486 && TREE_CODE (param_string_cst) != STRING_CST)
32487 {
32488 /* *args must be a expr that can contain other EXPRS leading to a
32489 STRING_CST. */
32490 if (!EXPR_P (param_string_cst))
32491 {
32492 error ("Parameter to builtin must be a string constant or literal");
32493 return integer_zero_node;
32494 }
32495 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32496 }
32497
32498 gcc_assert (param_string_cst);
32499
32500 if (fn_code == IX86_BUILTIN_CPU_IS)
32501 {
32502 tree ref;
32503 tree field;
32504 tree final;
32505
32506 unsigned int field_val = 0;
32507 unsigned int NUM_ARCH_NAMES
32508 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32509
32510 for (i = 0; i < NUM_ARCH_NAMES; i++)
32511 if (strcmp (arch_names_table[i].name,
32512 TREE_STRING_POINTER (param_string_cst)) == 0)
32513 break;
32514
32515 if (i == NUM_ARCH_NAMES)
32516 {
32517 error ("Parameter to builtin not valid: %s",
32518 TREE_STRING_POINTER (param_string_cst));
32519 return integer_zero_node;
32520 }
32521
32522 field = TYPE_FIELDS (__processor_model_type);
32523 field_val = arch_names_table[i].model;
32524
32525 /* CPU types are stored in the next field. */
32526 if (field_val > M_CPU_TYPE_START
32527 && field_val < M_CPU_SUBTYPE_START)
32528 {
32529 field = DECL_CHAIN (field);
32530 field_val -= M_CPU_TYPE_START;
32531 }
32532
32533 /* CPU subtypes are stored in the next field. */
32534 if (field_val > M_CPU_SUBTYPE_START)
32535 {
32536 field = DECL_CHAIN ( DECL_CHAIN (field));
32537 field_val -= M_CPU_SUBTYPE_START;
32538 }
32539
32540 /* Get the appropriate field in __cpu_model. */
32541 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32542 field, NULL_TREE);
32543
32544 /* Check the value. */
32545 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32546 build_int_cstu (unsigned_type_node, field_val));
32547 return build1 (CONVERT_EXPR, integer_type_node, final);
32548 }
32549 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32550 {
32551 tree ref;
32552 tree array_elt;
32553 tree field;
32554 tree final;
32555
32556 unsigned int field_val = 0;
32557 unsigned int NUM_ISA_NAMES
32558 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32559
32560 for (i = 0; i < NUM_ISA_NAMES; i++)
32561 if (strcmp (isa_names_table[i].name,
32562 TREE_STRING_POINTER (param_string_cst)) == 0)
32563 break;
32564
32565 if (i == NUM_ISA_NAMES)
32566 {
32567 error ("Parameter to builtin not valid: %s",
32568 TREE_STRING_POINTER (param_string_cst));
32569 return integer_zero_node;
32570 }
32571
32572 field = TYPE_FIELDS (__processor_model_type);
32573 /* Get the last field, which is __cpu_features. */
32574 while (DECL_CHAIN (field))
32575 field = DECL_CHAIN (field);
32576
32577 /* Get the appropriate field: __cpu_model.__cpu_features */
32578 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32579 field, NULL_TREE);
32580
32581 /* Access the 0th element of __cpu_features array. */
32582 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32583 integer_zero_node, NULL_TREE, NULL_TREE);
32584
32585 field_val = (1 << isa_names_table[i].feature);
32586 /* Return __cpu_model.__cpu_features[0] & field_val */
32587 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32588 build_int_cstu (unsigned_type_node, field_val));
32589 return build1 (CONVERT_EXPR, integer_type_node, final);
32590 }
32591 gcc_unreachable ();
32592 }
32593
32594 static tree
32595 ix86_fold_builtin (tree fndecl, int n_args,
32596 tree *args, bool ignore ATTRIBUTE_UNUSED)
32597 {
32598 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32599 {
32600 enum ix86_builtins fn_code = (enum ix86_builtins)
32601 DECL_FUNCTION_CODE (fndecl);
32602 if (fn_code == IX86_BUILTIN_CPU_IS
32603 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32604 {
32605 gcc_assert (n_args == 1);
32606 return fold_builtin_cpu (fndecl, args);
32607 }
32608 }
32609
32610 #ifdef SUBTARGET_FOLD_BUILTIN
32611 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32612 #endif
32613
32614 return NULL_TREE;
32615 }
32616
32617 /* Make builtins to detect cpu type and features supported. NAME is
32618 the builtin name, CODE is the builtin code, and FTYPE is the function
32619 type of the builtin. */
32620
32621 static void
32622 make_cpu_type_builtin (const char* name, int code,
32623 enum ix86_builtin_func_type ftype, bool is_const)
32624 {
32625 tree decl;
32626 tree type;
32627
32628 type = ix86_get_builtin_func_type (ftype);
32629 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32630 NULL, NULL_TREE);
32631 gcc_assert (decl != NULL_TREE);
32632 ix86_builtins[(int) code] = decl;
32633 TREE_READONLY (decl) = is_const;
32634 }
32635
32636 /* Make builtins to get CPU type and features supported. The created
32637 builtins are :
32638
32639 __builtin_cpu_init (), to detect cpu type and features,
32640 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32641 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32642 */
32643
32644 static void
32645 ix86_init_platform_type_builtins (void)
32646 {
32647 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32648 INT_FTYPE_VOID, false);
32649 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32650 INT_FTYPE_PCCHAR, true);
32651 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32652 INT_FTYPE_PCCHAR, true);
32653 }
32654
32655 /* Internal method for ix86_init_builtins. */
32656
32657 static void
32658 ix86_init_builtins_va_builtins_abi (void)
32659 {
32660 tree ms_va_ref, sysv_va_ref;
32661 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32662 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32663 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32664 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32665
32666 if (!TARGET_64BIT)
32667 return;
32668 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32669 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32670 ms_va_ref = build_reference_type (ms_va_list_type_node);
32671 sysv_va_ref =
32672 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32673
32674 fnvoid_va_end_ms =
32675 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32676 fnvoid_va_start_ms =
32677 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32678 fnvoid_va_end_sysv =
32679 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32680 fnvoid_va_start_sysv =
32681 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32682 NULL_TREE);
32683 fnvoid_va_copy_ms =
32684 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32685 NULL_TREE);
32686 fnvoid_va_copy_sysv =
32687 build_function_type_list (void_type_node, sysv_va_ref,
32688 sysv_va_ref, NULL_TREE);
32689
32690 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32691 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32692 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32693 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32694 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32695 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32696 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32697 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32698 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32699 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32700 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32701 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32702 }
32703
32704 static void
32705 ix86_init_builtin_types (void)
32706 {
32707 tree float128_type_node, float80_type_node;
32708
32709 /* The __float80 type. */
32710 float80_type_node = long_double_type_node;
32711 if (TYPE_MODE (float80_type_node) != XFmode)
32712 {
32713 /* The __float80 type. */
32714 float80_type_node = make_node (REAL_TYPE);
32715
32716 TYPE_PRECISION (float80_type_node) = 80;
32717 layout_type (float80_type_node);
32718 }
32719 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32720
32721 /* The __float128 type. */
32722 float128_type_node = make_node (REAL_TYPE);
32723 TYPE_PRECISION (float128_type_node) = 128;
32724 layout_type (float128_type_node);
32725 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32726
32727 /* This macro is built by i386-builtin-types.awk. */
32728 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32729 }
32730
32731 static void
32732 ix86_init_builtins (void)
32733 {
32734 tree t;
32735
32736 ix86_init_builtin_types ();
32737
32738 /* Builtins to get CPU type and features. */
32739 ix86_init_platform_type_builtins ();
32740
32741 /* TFmode support builtins. */
32742 def_builtin_const (0, "__builtin_infq",
32743 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32744 def_builtin_const (0, "__builtin_huge_valq",
32745 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32746
32747 /* We will expand them to normal call if SSE isn't available since
32748 they are used by libgcc. */
32749 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32750 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32751 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32752 TREE_READONLY (t) = 1;
32753 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32754
32755 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32756 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32757 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32758 TREE_READONLY (t) = 1;
32759 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32760
32761 ix86_init_tm_builtins ();
32762 ix86_init_mmx_sse_builtins ();
32763
32764 if (TARGET_LP64)
32765 ix86_init_builtins_va_builtins_abi ();
32766
32767 #ifdef SUBTARGET_INIT_BUILTINS
32768 SUBTARGET_INIT_BUILTINS;
32769 #endif
32770 }
32771
32772 /* Return the ix86 builtin for CODE. */
32773
32774 static tree
32775 ix86_builtin_decl (unsigned code, bool)
32776 {
32777 if (code >= IX86_BUILTIN_MAX)
32778 return error_mark_node;
32779
32780 return ix86_builtins[code];
32781 }
32782
32783 /* Errors in the source file can cause expand_expr to return const0_rtx
32784 where we expect a vector. To avoid crashing, use one of the vector
32785 clear instructions. */
32786 static rtx
32787 safe_vector_operand (rtx x, enum machine_mode mode)
32788 {
32789 if (x == const0_rtx)
32790 x = CONST0_RTX (mode);
32791 return x;
32792 }
32793
32794 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32795
32796 static rtx
32797 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32798 {
32799 rtx pat;
32800 tree arg0 = CALL_EXPR_ARG (exp, 0);
32801 tree arg1 = CALL_EXPR_ARG (exp, 1);
32802 rtx op0 = expand_normal (arg0);
32803 rtx op1 = expand_normal (arg1);
32804 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32805 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32806 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32807
32808 if (VECTOR_MODE_P (mode0))
32809 op0 = safe_vector_operand (op0, mode0);
32810 if (VECTOR_MODE_P (mode1))
32811 op1 = safe_vector_operand (op1, mode1);
32812
32813 if (optimize || !target
32814 || GET_MODE (target) != tmode
32815 || !insn_data[icode].operand[0].predicate (target, tmode))
32816 target = gen_reg_rtx (tmode);
32817
32818 if (GET_MODE (op1) == SImode && mode1 == TImode)
32819 {
32820 rtx x = gen_reg_rtx (V4SImode);
32821 emit_insn (gen_sse2_loadd (x, op1));
32822 op1 = gen_lowpart (TImode, x);
32823 }
32824
32825 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32826 op0 = copy_to_mode_reg (mode0, op0);
32827 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32828 op1 = copy_to_mode_reg (mode1, op1);
32829
32830 pat = GEN_FCN (icode) (target, op0, op1);
32831 if (! pat)
32832 return 0;
32833
32834 emit_insn (pat);
32835
32836 return target;
32837 }
32838
32839 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32840
32841 static rtx
32842 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32843 enum ix86_builtin_func_type m_type,
32844 enum rtx_code sub_code)
32845 {
32846 rtx pat;
32847 int i;
32848 int nargs;
32849 bool comparison_p = false;
32850 bool tf_p = false;
32851 bool last_arg_constant = false;
32852 int num_memory = 0;
32853 struct {
32854 rtx op;
32855 enum machine_mode mode;
32856 } args[4];
32857
32858 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32859
32860 switch (m_type)
32861 {
32862 case MULTI_ARG_4_DF2_DI_I:
32863 case MULTI_ARG_4_DF2_DI_I1:
32864 case MULTI_ARG_4_SF2_SI_I:
32865 case MULTI_ARG_4_SF2_SI_I1:
32866 nargs = 4;
32867 last_arg_constant = true;
32868 break;
32869
32870 case MULTI_ARG_3_SF:
32871 case MULTI_ARG_3_DF:
32872 case MULTI_ARG_3_SF2:
32873 case MULTI_ARG_3_DF2:
32874 case MULTI_ARG_3_DI:
32875 case MULTI_ARG_3_SI:
32876 case MULTI_ARG_3_SI_DI:
32877 case MULTI_ARG_3_HI:
32878 case MULTI_ARG_3_HI_SI:
32879 case MULTI_ARG_3_QI:
32880 case MULTI_ARG_3_DI2:
32881 case MULTI_ARG_3_SI2:
32882 case MULTI_ARG_3_HI2:
32883 case MULTI_ARG_3_QI2:
32884 nargs = 3;
32885 break;
32886
32887 case MULTI_ARG_2_SF:
32888 case MULTI_ARG_2_DF:
32889 case MULTI_ARG_2_DI:
32890 case MULTI_ARG_2_SI:
32891 case MULTI_ARG_2_HI:
32892 case MULTI_ARG_2_QI:
32893 nargs = 2;
32894 break;
32895
32896 case MULTI_ARG_2_DI_IMM:
32897 case MULTI_ARG_2_SI_IMM:
32898 case MULTI_ARG_2_HI_IMM:
32899 case MULTI_ARG_2_QI_IMM:
32900 nargs = 2;
32901 last_arg_constant = true;
32902 break;
32903
32904 case MULTI_ARG_1_SF:
32905 case MULTI_ARG_1_DF:
32906 case MULTI_ARG_1_SF2:
32907 case MULTI_ARG_1_DF2:
32908 case MULTI_ARG_1_DI:
32909 case MULTI_ARG_1_SI:
32910 case MULTI_ARG_1_HI:
32911 case MULTI_ARG_1_QI:
32912 case MULTI_ARG_1_SI_DI:
32913 case MULTI_ARG_1_HI_DI:
32914 case MULTI_ARG_1_HI_SI:
32915 case MULTI_ARG_1_QI_DI:
32916 case MULTI_ARG_1_QI_SI:
32917 case MULTI_ARG_1_QI_HI:
32918 nargs = 1;
32919 break;
32920
32921 case MULTI_ARG_2_DI_CMP:
32922 case MULTI_ARG_2_SI_CMP:
32923 case MULTI_ARG_2_HI_CMP:
32924 case MULTI_ARG_2_QI_CMP:
32925 nargs = 2;
32926 comparison_p = true;
32927 break;
32928
32929 case MULTI_ARG_2_SF_TF:
32930 case MULTI_ARG_2_DF_TF:
32931 case MULTI_ARG_2_DI_TF:
32932 case MULTI_ARG_2_SI_TF:
32933 case MULTI_ARG_2_HI_TF:
32934 case MULTI_ARG_2_QI_TF:
32935 nargs = 2;
32936 tf_p = true;
32937 break;
32938
32939 default:
32940 gcc_unreachable ();
32941 }
32942
32943 if (optimize || !target
32944 || GET_MODE (target) != tmode
32945 || !insn_data[icode].operand[0].predicate (target, tmode))
32946 target = gen_reg_rtx (tmode);
32947
32948 gcc_assert (nargs <= 4);
32949
32950 for (i = 0; i < nargs; i++)
32951 {
32952 tree arg = CALL_EXPR_ARG (exp, i);
32953 rtx op = expand_normal (arg);
32954 int adjust = (comparison_p) ? 1 : 0;
32955 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32956
32957 if (last_arg_constant && i == nargs - 1)
32958 {
32959 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32960 {
32961 enum insn_code new_icode = icode;
32962 switch (icode)
32963 {
32964 case CODE_FOR_xop_vpermil2v2df3:
32965 case CODE_FOR_xop_vpermil2v4sf3:
32966 case CODE_FOR_xop_vpermil2v4df3:
32967 case CODE_FOR_xop_vpermil2v8sf3:
32968 error ("the last argument must be a 2-bit immediate");
32969 return gen_reg_rtx (tmode);
32970 case CODE_FOR_xop_rotlv2di3:
32971 new_icode = CODE_FOR_rotlv2di3;
32972 goto xop_rotl;
32973 case CODE_FOR_xop_rotlv4si3:
32974 new_icode = CODE_FOR_rotlv4si3;
32975 goto xop_rotl;
32976 case CODE_FOR_xop_rotlv8hi3:
32977 new_icode = CODE_FOR_rotlv8hi3;
32978 goto xop_rotl;
32979 case CODE_FOR_xop_rotlv16qi3:
32980 new_icode = CODE_FOR_rotlv16qi3;
32981 xop_rotl:
32982 if (CONST_INT_P (op))
32983 {
32984 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32985 op = GEN_INT (INTVAL (op) & mask);
32986 gcc_checking_assert
32987 (insn_data[icode].operand[i + 1].predicate (op, mode));
32988 }
32989 else
32990 {
32991 gcc_checking_assert
32992 (nargs == 2
32993 && insn_data[new_icode].operand[0].mode == tmode
32994 && insn_data[new_icode].operand[1].mode == tmode
32995 && insn_data[new_icode].operand[2].mode == mode
32996 && insn_data[new_icode].operand[0].predicate
32997 == insn_data[icode].operand[0].predicate
32998 && insn_data[new_icode].operand[1].predicate
32999 == insn_data[icode].operand[1].predicate);
33000 icode = new_icode;
33001 goto non_constant;
33002 }
33003 break;
33004 default:
33005 gcc_unreachable ();
33006 }
33007 }
33008 }
33009 else
33010 {
33011 non_constant:
33012 if (VECTOR_MODE_P (mode))
33013 op = safe_vector_operand (op, mode);
33014
33015 /* If we aren't optimizing, only allow one memory operand to be
33016 generated. */
33017 if (memory_operand (op, mode))
33018 num_memory++;
33019
33020 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33021
33022 if (optimize
33023 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33024 || num_memory > 1)
33025 op = force_reg (mode, op);
33026 }
33027
33028 args[i].op = op;
33029 args[i].mode = mode;
33030 }
33031
33032 switch (nargs)
33033 {
33034 case 1:
33035 pat = GEN_FCN (icode) (target, args[0].op);
33036 break;
33037
33038 case 2:
33039 if (tf_p)
33040 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33041 GEN_INT ((int)sub_code));
33042 else if (! comparison_p)
33043 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33044 else
33045 {
33046 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33047 args[0].op,
33048 args[1].op);
33049
33050 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33051 }
33052 break;
33053
33054 case 3:
33055 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33056 break;
33057
33058 case 4:
33059 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33060 break;
33061
33062 default:
33063 gcc_unreachable ();
33064 }
33065
33066 if (! pat)
33067 return 0;
33068
33069 emit_insn (pat);
33070 return target;
33071 }
33072
33073 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33074 insns with vec_merge. */
33075
33076 static rtx
33077 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33078 rtx target)
33079 {
33080 rtx pat;
33081 tree arg0 = CALL_EXPR_ARG (exp, 0);
33082 rtx op1, op0 = expand_normal (arg0);
33083 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33084 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33085
33086 if (optimize || !target
33087 || GET_MODE (target) != tmode
33088 || !insn_data[icode].operand[0].predicate (target, tmode))
33089 target = gen_reg_rtx (tmode);
33090
33091 if (VECTOR_MODE_P (mode0))
33092 op0 = safe_vector_operand (op0, mode0);
33093
33094 if ((optimize && !register_operand (op0, mode0))
33095 || !insn_data[icode].operand[1].predicate (op0, mode0))
33096 op0 = copy_to_mode_reg (mode0, op0);
33097
33098 op1 = op0;
33099 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33100 op1 = copy_to_mode_reg (mode0, op1);
33101
33102 pat = GEN_FCN (icode) (target, op0, op1);
33103 if (! pat)
33104 return 0;
33105 emit_insn (pat);
33106 return target;
33107 }
33108
33109 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33110
33111 static rtx
33112 ix86_expand_sse_compare (const struct builtin_description *d,
33113 tree exp, rtx target, bool swap)
33114 {
33115 rtx pat;
33116 tree arg0 = CALL_EXPR_ARG (exp, 0);
33117 tree arg1 = CALL_EXPR_ARG (exp, 1);
33118 rtx op0 = expand_normal (arg0);
33119 rtx op1 = expand_normal (arg1);
33120 rtx op2;
33121 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33122 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33123 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33124 enum rtx_code comparison = d->comparison;
33125
33126 if (VECTOR_MODE_P (mode0))
33127 op0 = safe_vector_operand (op0, mode0);
33128 if (VECTOR_MODE_P (mode1))
33129 op1 = safe_vector_operand (op1, mode1);
33130
33131 /* Swap operands if we have a comparison that isn't available in
33132 hardware. */
33133 if (swap)
33134 {
33135 rtx tmp = gen_reg_rtx (mode1);
33136 emit_move_insn (tmp, op1);
33137 op1 = op0;
33138 op0 = tmp;
33139 }
33140
33141 if (optimize || !target
33142 || GET_MODE (target) != tmode
33143 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33144 target = gen_reg_rtx (tmode);
33145
33146 if ((optimize && !register_operand (op0, mode0))
33147 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33148 op0 = copy_to_mode_reg (mode0, op0);
33149 if ((optimize && !register_operand (op1, mode1))
33150 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33151 op1 = copy_to_mode_reg (mode1, op1);
33152
33153 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33154 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33155 if (! pat)
33156 return 0;
33157 emit_insn (pat);
33158 return target;
33159 }
33160
33161 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33162
33163 static rtx
33164 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33165 rtx target)
33166 {
33167 rtx pat;
33168 tree arg0 = CALL_EXPR_ARG (exp, 0);
33169 tree arg1 = CALL_EXPR_ARG (exp, 1);
33170 rtx op0 = expand_normal (arg0);
33171 rtx op1 = expand_normal (arg1);
33172 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33173 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33174 enum rtx_code comparison = d->comparison;
33175
33176 if (VECTOR_MODE_P (mode0))
33177 op0 = safe_vector_operand (op0, mode0);
33178 if (VECTOR_MODE_P (mode1))
33179 op1 = safe_vector_operand (op1, mode1);
33180
33181 /* Swap operands if we have a comparison that isn't available in
33182 hardware. */
33183 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33184 {
33185 rtx tmp = op1;
33186 op1 = op0;
33187 op0 = tmp;
33188 }
33189
33190 target = gen_reg_rtx (SImode);
33191 emit_move_insn (target, const0_rtx);
33192 target = gen_rtx_SUBREG (QImode, target, 0);
33193
33194 if ((optimize && !register_operand (op0, mode0))
33195 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33196 op0 = copy_to_mode_reg (mode0, op0);
33197 if ((optimize && !register_operand (op1, mode1))
33198 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33199 op1 = copy_to_mode_reg (mode1, op1);
33200
33201 pat = GEN_FCN (d->icode) (op0, op1);
33202 if (! pat)
33203 return 0;
33204 emit_insn (pat);
33205 emit_insn (gen_rtx_SET (VOIDmode,
33206 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33207 gen_rtx_fmt_ee (comparison, QImode,
33208 SET_DEST (pat),
33209 const0_rtx)));
33210
33211 return SUBREG_REG (target);
33212 }
33213
33214 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33215
33216 static rtx
33217 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33218 rtx target)
33219 {
33220 rtx pat;
33221 tree arg0 = CALL_EXPR_ARG (exp, 0);
33222 rtx op1, op0 = expand_normal (arg0);
33223 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33224 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33225
33226 if (optimize || target == 0
33227 || GET_MODE (target) != tmode
33228 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33229 target = gen_reg_rtx (tmode);
33230
33231 if (VECTOR_MODE_P (mode0))
33232 op0 = safe_vector_operand (op0, mode0);
33233
33234 if ((optimize && !register_operand (op0, mode0))
33235 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33236 op0 = copy_to_mode_reg (mode0, op0);
33237
33238 op1 = GEN_INT (d->comparison);
33239
33240 pat = GEN_FCN (d->icode) (target, op0, op1);
33241 if (! pat)
33242 return 0;
33243 emit_insn (pat);
33244 return target;
33245 }
33246
33247 static rtx
33248 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33249 tree exp, rtx target)
33250 {
33251 rtx pat;
33252 tree arg0 = CALL_EXPR_ARG (exp, 0);
33253 tree arg1 = CALL_EXPR_ARG (exp, 1);
33254 rtx op0 = expand_normal (arg0);
33255 rtx op1 = expand_normal (arg1);
33256 rtx op2;
33257 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33258 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33259 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33260
33261 if (optimize || target == 0
33262 || GET_MODE (target) != tmode
33263 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33264 target = gen_reg_rtx (tmode);
33265
33266 op0 = safe_vector_operand (op0, mode0);
33267 op1 = safe_vector_operand (op1, mode1);
33268
33269 if ((optimize && !register_operand (op0, mode0))
33270 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33271 op0 = copy_to_mode_reg (mode0, op0);
33272 if ((optimize && !register_operand (op1, mode1))
33273 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33274 op1 = copy_to_mode_reg (mode1, op1);
33275
33276 op2 = GEN_INT (d->comparison);
33277
33278 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33279 if (! pat)
33280 return 0;
33281 emit_insn (pat);
33282 return target;
33283 }
33284
33285 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33286
33287 static rtx
33288 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33289 rtx target)
33290 {
33291 rtx pat;
33292 tree arg0 = CALL_EXPR_ARG (exp, 0);
33293 tree arg1 = CALL_EXPR_ARG (exp, 1);
33294 rtx op0 = expand_normal (arg0);
33295 rtx op1 = expand_normal (arg1);
33296 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33297 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33298 enum rtx_code comparison = d->comparison;
33299
33300 if (VECTOR_MODE_P (mode0))
33301 op0 = safe_vector_operand (op0, mode0);
33302 if (VECTOR_MODE_P (mode1))
33303 op1 = safe_vector_operand (op1, mode1);
33304
33305 target = gen_reg_rtx (SImode);
33306 emit_move_insn (target, const0_rtx);
33307 target = gen_rtx_SUBREG (QImode, target, 0);
33308
33309 if ((optimize && !register_operand (op0, mode0))
33310 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33311 op0 = copy_to_mode_reg (mode0, op0);
33312 if ((optimize && !register_operand (op1, mode1))
33313 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33314 op1 = copy_to_mode_reg (mode1, op1);
33315
33316 pat = GEN_FCN (d->icode) (op0, op1);
33317 if (! pat)
33318 return 0;
33319 emit_insn (pat);
33320 emit_insn (gen_rtx_SET (VOIDmode,
33321 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33322 gen_rtx_fmt_ee (comparison, QImode,
33323 SET_DEST (pat),
33324 const0_rtx)));
33325
33326 return SUBREG_REG (target);
33327 }
33328
33329 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33330
33331 static rtx
33332 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33333 tree exp, rtx target)
33334 {
33335 rtx pat;
33336 tree arg0 = CALL_EXPR_ARG (exp, 0);
33337 tree arg1 = CALL_EXPR_ARG (exp, 1);
33338 tree arg2 = CALL_EXPR_ARG (exp, 2);
33339 tree arg3 = CALL_EXPR_ARG (exp, 3);
33340 tree arg4 = CALL_EXPR_ARG (exp, 4);
33341 rtx scratch0, scratch1;
33342 rtx op0 = expand_normal (arg0);
33343 rtx op1 = expand_normal (arg1);
33344 rtx op2 = expand_normal (arg2);
33345 rtx op3 = expand_normal (arg3);
33346 rtx op4 = expand_normal (arg4);
33347 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33348
33349 tmode0 = insn_data[d->icode].operand[0].mode;
33350 tmode1 = insn_data[d->icode].operand[1].mode;
33351 modev2 = insn_data[d->icode].operand[2].mode;
33352 modei3 = insn_data[d->icode].operand[3].mode;
33353 modev4 = insn_data[d->icode].operand[4].mode;
33354 modei5 = insn_data[d->icode].operand[5].mode;
33355 modeimm = insn_data[d->icode].operand[6].mode;
33356
33357 if (VECTOR_MODE_P (modev2))
33358 op0 = safe_vector_operand (op0, modev2);
33359 if (VECTOR_MODE_P (modev4))
33360 op2 = safe_vector_operand (op2, modev4);
33361
33362 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33363 op0 = copy_to_mode_reg (modev2, op0);
33364 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33365 op1 = copy_to_mode_reg (modei3, op1);
33366 if ((optimize && !register_operand (op2, modev4))
33367 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33368 op2 = copy_to_mode_reg (modev4, op2);
33369 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33370 op3 = copy_to_mode_reg (modei5, op3);
33371
33372 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33373 {
33374 error ("the fifth argument must be an 8-bit immediate");
33375 return const0_rtx;
33376 }
33377
33378 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33379 {
33380 if (optimize || !target
33381 || GET_MODE (target) != tmode0
33382 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33383 target = gen_reg_rtx (tmode0);
33384
33385 scratch1 = gen_reg_rtx (tmode1);
33386
33387 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33388 }
33389 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33390 {
33391 if (optimize || !target
33392 || GET_MODE (target) != tmode1
33393 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33394 target = gen_reg_rtx (tmode1);
33395
33396 scratch0 = gen_reg_rtx (tmode0);
33397
33398 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33399 }
33400 else
33401 {
33402 gcc_assert (d->flag);
33403
33404 scratch0 = gen_reg_rtx (tmode0);
33405 scratch1 = gen_reg_rtx (tmode1);
33406
33407 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33408 }
33409
33410 if (! pat)
33411 return 0;
33412
33413 emit_insn (pat);
33414
33415 if (d->flag)
33416 {
33417 target = gen_reg_rtx (SImode);
33418 emit_move_insn (target, const0_rtx);
33419 target = gen_rtx_SUBREG (QImode, target, 0);
33420
33421 emit_insn
33422 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33423 gen_rtx_fmt_ee (EQ, QImode,
33424 gen_rtx_REG ((enum machine_mode) d->flag,
33425 FLAGS_REG),
33426 const0_rtx)));
33427 return SUBREG_REG (target);
33428 }
33429 else
33430 return target;
33431 }
33432
33433
33434 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33435
33436 static rtx
33437 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33438 tree exp, rtx target)
33439 {
33440 rtx pat;
33441 tree arg0 = CALL_EXPR_ARG (exp, 0);
33442 tree arg1 = CALL_EXPR_ARG (exp, 1);
33443 tree arg2 = CALL_EXPR_ARG (exp, 2);
33444 rtx scratch0, scratch1;
33445 rtx op0 = expand_normal (arg0);
33446 rtx op1 = expand_normal (arg1);
33447 rtx op2 = expand_normal (arg2);
33448 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33449
33450 tmode0 = insn_data[d->icode].operand[0].mode;
33451 tmode1 = insn_data[d->icode].operand[1].mode;
33452 modev2 = insn_data[d->icode].operand[2].mode;
33453 modev3 = insn_data[d->icode].operand[3].mode;
33454 modeimm = insn_data[d->icode].operand[4].mode;
33455
33456 if (VECTOR_MODE_P (modev2))
33457 op0 = safe_vector_operand (op0, modev2);
33458 if (VECTOR_MODE_P (modev3))
33459 op1 = safe_vector_operand (op1, modev3);
33460
33461 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33462 op0 = copy_to_mode_reg (modev2, op0);
33463 if ((optimize && !register_operand (op1, modev3))
33464 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33465 op1 = copy_to_mode_reg (modev3, op1);
33466
33467 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33468 {
33469 error ("the third argument must be an 8-bit immediate");
33470 return const0_rtx;
33471 }
33472
33473 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33474 {
33475 if (optimize || !target
33476 || GET_MODE (target) != tmode0
33477 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33478 target = gen_reg_rtx (tmode0);
33479
33480 scratch1 = gen_reg_rtx (tmode1);
33481
33482 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33483 }
33484 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33485 {
33486 if (optimize || !target
33487 || GET_MODE (target) != tmode1
33488 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33489 target = gen_reg_rtx (tmode1);
33490
33491 scratch0 = gen_reg_rtx (tmode0);
33492
33493 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33494 }
33495 else
33496 {
33497 gcc_assert (d->flag);
33498
33499 scratch0 = gen_reg_rtx (tmode0);
33500 scratch1 = gen_reg_rtx (tmode1);
33501
33502 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33503 }
33504
33505 if (! pat)
33506 return 0;
33507
33508 emit_insn (pat);
33509
33510 if (d->flag)
33511 {
33512 target = gen_reg_rtx (SImode);
33513 emit_move_insn (target, const0_rtx);
33514 target = gen_rtx_SUBREG (QImode, target, 0);
33515
33516 emit_insn
33517 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33518 gen_rtx_fmt_ee (EQ, QImode,
33519 gen_rtx_REG ((enum machine_mode) d->flag,
33520 FLAGS_REG),
33521 const0_rtx)));
33522 return SUBREG_REG (target);
33523 }
33524 else
33525 return target;
33526 }
33527
33528 /* Subroutine of ix86_expand_builtin to take care of insns with
33529 variable number of operands. */
33530
33531 static rtx
33532 ix86_expand_args_builtin (const struct builtin_description *d,
33533 tree exp, rtx target)
33534 {
33535 rtx pat, real_target;
33536 unsigned int i, nargs;
33537 unsigned int nargs_constant = 0;
33538 unsigned int mask_pos = 0;
33539 int num_memory = 0;
33540 struct
33541 {
33542 rtx op;
33543 enum machine_mode mode;
33544 } args[6];
33545 bool last_arg_count = false;
33546 enum insn_code icode = d->icode;
33547 const struct insn_data_d *insn_p = &insn_data[icode];
33548 enum machine_mode tmode = insn_p->operand[0].mode;
33549 enum machine_mode rmode = VOIDmode;
33550 bool swap = false;
33551 enum rtx_code comparison = d->comparison;
33552
33553 switch ((enum ix86_builtin_func_type) d->flag)
33554 {
33555 case V2DF_FTYPE_V2DF_ROUND:
33556 case V4DF_FTYPE_V4DF_ROUND:
33557 case V4SF_FTYPE_V4SF_ROUND:
33558 case V8SF_FTYPE_V8SF_ROUND:
33559 case V4SI_FTYPE_V4SF_ROUND:
33560 case V8SI_FTYPE_V8SF_ROUND:
33561 return ix86_expand_sse_round (d, exp, target);
33562 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33563 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33564 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33565 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33566 case INT_FTYPE_V8SF_V8SF_PTEST:
33567 case INT_FTYPE_V4DI_V4DI_PTEST:
33568 case INT_FTYPE_V4DF_V4DF_PTEST:
33569 case INT_FTYPE_V4SF_V4SF_PTEST:
33570 case INT_FTYPE_V2DI_V2DI_PTEST:
33571 case INT_FTYPE_V2DF_V2DF_PTEST:
33572 return ix86_expand_sse_ptest (d, exp, target);
33573 case FLOAT128_FTYPE_FLOAT128:
33574 case FLOAT_FTYPE_FLOAT:
33575 case INT_FTYPE_INT:
33576 case UINT64_FTYPE_INT:
33577 case UINT16_FTYPE_UINT16:
33578 case INT64_FTYPE_INT64:
33579 case INT64_FTYPE_V4SF:
33580 case INT64_FTYPE_V2DF:
33581 case INT_FTYPE_V16QI:
33582 case INT_FTYPE_V8QI:
33583 case INT_FTYPE_V8SF:
33584 case INT_FTYPE_V4DF:
33585 case INT_FTYPE_V4SF:
33586 case INT_FTYPE_V2DF:
33587 case INT_FTYPE_V32QI:
33588 case V16QI_FTYPE_V16QI:
33589 case V8SI_FTYPE_V8SF:
33590 case V8SI_FTYPE_V4SI:
33591 case V8HI_FTYPE_V8HI:
33592 case V8HI_FTYPE_V16QI:
33593 case V8QI_FTYPE_V8QI:
33594 case V8SF_FTYPE_V8SF:
33595 case V8SF_FTYPE_V8SI:
33596 case V8SF_FTYPE_V4SF:
33597 case V8SF_FTYPE_V8HI:
33598 case V4SI_FTYPE_V4SI:
33599 case V4SI_FTYPE_V16QI:
33600 case V4SI_FTYPE_V4SF:
33601 case V4SI_FTYPE_V8SI:
33602 case V4SI_FTYPE_V8HI:
33603 case V4SI_FTYPE_V4DF:
33604 case V4SI_FTYPE_V2DF:
33605 case V4HI_FTYPE_V4HI:
33606 case V4DF_FTYPE_V4DF:
33607 case V4DF_FTYPE_V4SI:
33608 case V4DF_FTYPE_V4SF:
33609 case V4DF_FTYPE_V2DF:
33610 case V4SF_FTYPE_V4SF:
33611 case V4SF_FTYPE_V4SI:
33612 case V4SF_FTYPE_V8SF:
33613 case V4SF_FTYPE_V4DF:
33614 case V4SF_FTYPE_V8HI:
33615 case V4SF_FTYPE_V2DF:
33616 case V2DI_FTYPE_V2DI:
33617 case V2DI_FTYPE_V16QI:
33618 case V2DI_FTYPE_V8HI:
33619 case V2DI_FTYPE_V4SI:
33620 case V2DF_FTYPE_V2DF:
33621 case V2DF_FTYPE_V4SI:
33622 case V2DF_FTYPE_V4DF:
33623 case V2DF_FTYPE_V4SF:
33624 case V2DF_FTYPE_V2SI:
33625 case V2SI_FTYPE_V2SI:
33626 case V2SI_FTYPE_V4SF:
33627 case V2SI_FTYPE_V2SF:
33628 case V2SI_FTYPE_V2DF:
33629 case V2SF_FTYPE_V2SF:
33630 case V2SF_FTYPE_V2SI:
33631 case V32QI_FTYPE_V32QI:
33632 case V32QI_FTYPE_V16QI:
33633 case V16HI_FTYPE_V16HI:
33634 case V16HI_FTYPE_V8HI:
33635 case V8SI_FTYPE_V8SI:
33636 case V16HI_FTYPE_V16QI:
33637 case V8SI_FTYPE_V16QI:
33638 case V4DI_FTYPE_V16QI:
33639 case V8SI_FTYPE_V8HI:
33640 case V4DI_FTYPE_V8HI:
33641 case V4DI_FTYPE_V4SI:
33642 case V4DI_FTYPE_V2DI:
33643 case HI_FTYPE_HI:
33644 case UINT_FTYPE_V2DF:
33645 case UINT_FTYPE_V4SF:
33646 case UINT64_FTYPE_V2DF:
33647 case UINT64_FTYPE_V4SF:
33648 case V16QI_FTYPE_V8DI:
33649 case V16HI_FTYPE_V16SI:
33650 case V16SI_FTYPE_HI:
33651 case V16SI_FTYPE_V16SI:
33652 case V16SI_FTYPE_INT:
33653 case V16SF_FTYPE_FLOAT:
33654 case V16SF_FTYPE_V8SF:
33655 case V16SI_FTYPE_V8SI:
33656 case V16SF_FTYPE_V4SF:
33657 case V16SI_FTYPE_V4SI:
33658 case V16SF_FTYPE_V16SF:
33659 case V8HI_FTYPE_V8DI:
33660 case V8UHI_FTYPE_V8UHI:
33661 case V8SI_FTYPE_V8DI:
33662 case V8USI_FTYPE_V8USI:
33663 case V8SF_FTYPE_V8DF:
33664 case V8DI_FTYPE_QI:
33665 case V8DI_FTYPE_INT64:
33666 case V8DI_FTYPE_V4DI:
33667 case V8DI_FTYPE_V8DI:
33668 case V8DF_FTYPE_DOUBLE:
33669 case V8DF_FTYPE_V4DF:
33670 case V8DF_FTYPE_V2DF:
33671 case V8DF_FTYPE_V8DF:
33672 case V8DF_FTYPE_V8SI:
33673 nargs = 1;
33674 break;
33675 case V4SF_FTYPE_V4SF_VEC_MERGE:
33676 case V2DF_FTYPE_V2DF_VEC_MERGE:
33677 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33678 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33679 case V16QI_FTYPE_V16QI_V16QI:
33680 case V16QI_FTYPE_V8HI_V8HI:
33681 case V16SI_FTYPE_V16SI_V16SI:
33682 case V16SF_FTYPE_V16SF_V16SF:
33683 case V16SF_FTYPE_V16SF_V16SI:
33684 case V8QI_FTYPE_V8QI_V8QI:
33685 case V8QI_FTYPE_V4HI_V4HI:
33686 case V8HI_FTYPE_V8HI_V8HI:
33687 case V8HI_FTYPE_V16QI_V16QI:
33688 case V8HI_FTYPE_V4SI_V4SI:
33689 case V8SF_FTYPE_V8SF_V8SF:
33690 case V8SF_FTYPE_V8SF_V8SI:
33691 case V8DI_FTYPE_V8DI_V8DI:
33692 case V8DF_FTYPE_V8DF_V8DF:
33693 case V8DF_FTYPE_V8DF_V8DI:
33694 case V4SI_FTYPE_V4SI_V4SI:
33695 case V4SI_FTYPE_V8HI_V8HI:
33696 case V4SI_FTYPE_V4SF_V4SF:
33697 case V4SI_FTYPE_V2DF_V2DF:
33698 case V4HI_FTYPE_V4HI_V4HI:
33699 case V4HI_FTYPE_V8QI_V8QI:
33700 case V4HI_FTYPE_V2SI_V2SI:
33701 case V4DF_FTYPE_V4DF_V4DF:
33702 case V4DF_FTYPE_V4DF_V4DI:
33703 case V4SF_FTYPE_V4SF_V4SF:
33704 case V4SF_FTYPE_V4SF_V4SI:
33705 case V4SF_FTYPE_V4SF_V2SI:
33706 case V4SF_FTYPE_V4SF_V2DF:
33707 case V4SF_FTYPE_V4SF_UINT:
33708 case V4SF_FTYPE_V4SF_UINT64:
33709 case V4SF_FTYPE_V4SF_DI:
33710 case V4SF_FTYPE_V4SF_SI:
33711 case V2DI_FTYPE_V2DI_V2DI:
33712 case V2DI_FTYPE_V16QI_V16QI:
33713 case V2DI_FTYPE_V4SI_V4SI:
33714 case V2UDI_FTYPE_V4USI_V4USI:
33715 case V2DI_FTYPE_V2DI_V16QI:
33716 case V2DI_FTYPE_V2DF_V2DF:
33717 case V2SI_FTYPE_V2SI_V2SI:
33718 case V2SI_FTYPE_V4HI_V4HI:
33719 case V2SI_FTYPE_V2SF_V2SF:
33720 case V2DF_FTYPE_V2DF_V2DF:
33721 case V2DF_FTYPE_V2DF_V4SF:
33722 case V2DF_FTYPE_V2DF_V2DI:
33723 case V2DF_FTYPE_V2DF_DI:
33724 case V2DF_FTYPE_V2DF_SI:
33725 case V2DF_FTYPE_V2DF_UINT:
33726 case V2DF_FTYPE_V2DF_UINT64:
33727 case V2SF_FTYPE_V2SF_V2SF:
33728 case V1DI_FTYPE_V1DI_V1DI:
33729 case V1DI_FTYPE_V8QI_V8QI:
33730 case V1DI_FTYPE_V2SI_V2SI:
33731 case V32QI_FTYPE_V16HI_V16HI:
33732 case V16HI_FTYPE_V8SI_V8SI:
33733 case V32QI_FTYPE_V32QI_V32QI:
33734 case V16HI_FTYPE_V32QI_V32QI:
33735 case V16HI_FTYPE_V16HI_V16HI:
33736 case V8SI_FTYPE_V4DF_V4DF:
33737 case V8SI_FTYPE_V8SI_V8SI:
33738 case V8SI_FTYPE_V16HI_V16HI:
33739 case V4DI_FTYPE_V4DI_V4DI:
33740 case V4DI_FTYPE_V8SI_V8SI:
33741 case V4UDI_FTYPE_V8USI_V8USI:
33742 case QI_FTYPE_V8DI_V8DI:
33743 case HI_FTYPE_V16SI_V16SI:
33744 if (comparison == UNKNOWN)
33745 return ix86_expand_binop_builtin (icode, exp, target);
33746 nargs = 2;
33747 break;
33748 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33749 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33750 gcc_assert (comparison != UNKNOWN);
33751 nargs = 2;
33752 swap = true;
33753 break;
33754 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33755 case V16HI_FTYPE_V16HI_SI_COUNT:
33756 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33757 case V8SI_FTYPE_V8SI_SI_COUNT:
33758 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33759 case V4DI_FTYPE_V4DI_INT_COUNT:
33760 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33761 case V8HI_FTYPE_V8HI_SI_COUNT:
33762 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33763 case V4SI_FTYPE_V4SI_SI_COUNT:
33764 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33765 case V4HI_FTYPE_V4HI_SI_COUNT:
33766 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33767 case V2DI_FTYPE_V2DI_SI_COUNT:
33768 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33769 case V2SI_FTYPE_V2SI_SI_COUNT:
33770 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33771 case V1DI_FTYPE_V1DI_SI_COUNT:
33772 nargs = 2;
33773 last_arg_count = true;
33774 break;
33775 case UINT64_FTYPE_UINT64_UINT64:
33776 case UINT_FTYPE_UINT_UINT:
33777 case UINT_FTYPE_UINT_USHORT:
33778 case UINT_FTYPE_UINT_UCHAR:
33779 case UINT16_FTYPE_UINT16_INT:
33780 case UINT8_FTYPE_UINT8_INT:
33781 case HI_FTYPE_HI_HI:
33782 case V16SI_FTYPE_V8DF_V8DF:
33783 nargs = 2;
33784 break;
33785 case V2DI_FTYPE_V2DI_INT_CONVERT:
33786 nargs = 2;
33787 rmode = V1TImode;
33788 nargs_constant = 1;
33789 break;
33790 case V4DI_FTYPE_V4DI_INT_CONVERT:
33791 nargs = 2;
33792 rmode = V2TImode;
33793 nargs_constant = 1;
33794 break;
33795 case V8HI_FTYPE_V8HI_INT:
33796 case V8HI_FTYPE_V8SF_INT:
33797 case V16HI_FTYPE_V16SF_INT:
33798 case V8HI_FTYPE_V4SF_INT:
33799 case V8SF_FTYPE_V8SF_INT:
33800 case V4SF_FTYPE_V16SF_INT:
33801 case V16SF_FTYPE_V16SF_INT:
33802 case V4SI_FTYPE_V4SI_INT:
33803 case V4SI_FTYPE_V8SI_INT:
33804 case V4HI_FTYPE_V4HI_INT:
33805 case V4DF_FTYPE_V4DF_INT:
33806 case V4DF_FTYPE_V8DF_INT:
33807 case V4SF_FTYPE_V4SF_INT:
33808 case V4SF_FTYPE_V8SF_INT:
33809 case V2DI_FTYPE_V2DI_INT:
33810 case V2DF_FTYPE_V2DF_INT:
33811 case V2DF_FTYPE_V4DF_INT:
33812 case V16HI_FTYPE_V16HI_INT:
33813 case V8SI_FTYPE_V8SI_INT:
33814 case V16SI_FTYPE_V16SI_INT:
33815 case V4SI_FTYPE_V16SI_INT:
33816 case V4DI_FTYPE_V4DI_INT:
33817 case V2DI_FTYPE_V4DI_INT:
33818 case V4DI_FTYPE_V8DI_INT:
33819 case HI_FTYPE_HI_INT:
33820 nargs = 2;
33821 nargs_constant = 1;
33822 break;
33823 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33824 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33825 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33826 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33827 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33828 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33829 case HI_FTYPE_V16SI_V16SI_HI:
33830 case QI_FTYPE_V8DI_V8DI_QI:
33831 case V16HI_FTYPE_V16SI_V16HI_HI:
33832 case V16QI_FTYPE_V16SI_V16QI_HI:
33833 case V16QI_FTYPE_V8DI_V16QI_QI:
33834 case V16SF_FTYPE_V16SF_V16SF_HI:
33835 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33836 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33837 case V16SF_FTYPE_V16SI_V16SF_HI:
33838 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33839 case V16SF_FTYPE_V4SF_V16SF_HI:
33840 case V16SI_FTYPE_SI_V16SI_HI:
33841 case V16SI_FTYPE_V16HI_V16SI_HI:
33842 case V16SI_FTYPE_V16QI_V16SI_HI:
33843 case V16SI_FTYPE_V16SF_V16SI_HI:
33844 case V16SI_FTYPE_V16SI_V16SI_HI:
33845 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33846 case V16SI_FTYPE_V4SI_V16SI_HI:
33847 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33848 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33849 case V8DF_FTYPE_V2DF_V8DF_QI:
33850 case V8DF_FTYPE_V4DF_V8DF_QI:
33851 case V8DF_FTYPE_V8DF_V8DF_QI:
33852 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33853 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33854 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33855 case V8DF_FTYPE_V8SF_V8DF_QI:
33856 case V8DF_FTYPE_V8SI_V8DF_QI:
33857 case V8DI_FTYPE_DI_V8DI_QI:
33858 case V8DI_FTYPE_V16QI_V8DI_QI:
33859 case V8DI_FTYPE_V2DI_V8DI_QI:
33860 case V8DI_FTYPE_V4DI_V8DI_QI:
33861 case V8DI_FTYPE_V8DI_V8DI_QI:
33862 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33863 case V8DI_FTYPE_V8HI_V8DI_QI:
33864 case V8DI_FTYPE_V8SI_V8DI_QI:
33865 case V8HI_FTYPE_V8DI_V8HI_QI:
33866 case V8SF_FTYPE_V8DF_V8SF_QI:
33867 case V8SI_FTYPE_V8DF_V8SI_QI:
33868 case V8SI_FTYPE_V8DI_V8SI_QI:
33869 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33870 nargs = 3;
33871 break;
33872 case V32QI_FTYPE_V32QI_V32QI_INT:
33873 case V16HI_FTYPE_V16HI_V16HI_INT:
33874 case V16QI_FTYPE_V16QI_V16QI_INT:
33875 case V4DI_FTYPE_V4DI_V4DI_INT:
33876 case V8HI_FTYPE_V8HI_V8HI_INT:
33877 case V8SI_FTYPE_V8SI_V8SI_INT:
33878 case V8SI_FTYPE_V8SI_V4SI_INT:
33879 case V8SF_FTYPE_V8SF_V8SF_INT:
33880 case V8SF_FTYPE_V8SF_V4SF_INT:
33881 case V4SI_FTYPE_V4SI_V4SI_INT:
33882 case V4DF_FTYPE_V4DF_V4DF_INT:
33883 case V16SF_FTYPE_V16SF_V16SF_INT:
33884 case V16SF_FTYPE_V16SF_V4SF_INT:
33885 case V16SI_FTYPE_V16SI_V4SI_INT:
33886 case V4DF_FTYPE_V4DF_V2DF_INT:
33887 case V4SF_FTYPE_V4SF_V4SF_INT:
33888 case V2DI_FTYPE_V2DI_V2DI_INT:
33889 case V4DI_FTYPE_V4DI_V2DI_INT:
33890 case V2DF_FTYPE_V2DF_V2DF_INT:
33891 case QI_FTYPE_V8DI_V8DI_INT:
33892 case QI_FTYPE_V8DF_V8DF_INT:
33893 case QI_FTYPE_V2DF_V2DF_INT:
33894 case QI_FTYPE_V4SF_V4SF_INT:
33895 case HI_FTYPE_V16SI_V16SI_INT:
33896 case HI_FTYPE_V16SF_V16SF_INT:
33897 nargs = 3;
33898 nargs_constant = 1;
33899 break;
33900 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33901 nargs = 3;
33902 rmode = V4DImode;
33903 nargs_constant = 1;
33904 break;
33905 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33906 nargs = 3;
33907 rmode = V2DImode;
33908 nargs_constant = 1;
33909 break;
33910 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33911 nargs = 3;
33912 rmode = DImode;
33913 nargs_constant = 1;
33914 break;
33915 case V2DI_FTYPE_V2DI_UINT_UINT:
33916 nargs = 3;
33917 nargs_constant = 2;
33918 break;
33919 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33920 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33921 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33922 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33923 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33924 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33925 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33926 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33927 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33928 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33929 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33930 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33931 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33932 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33933 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33934 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33935 nargs = 4;
33936 break;
33937 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33938 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33939 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33940 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33941 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33942 nargs = 4;
33943 nargs_constant = 1;
33944 break;
33945 case QI_FTYPE_V2DF_V2DF_INT_QI:
33946 case QI_FTYPE_V4SF_V4SF_INT_QI:
33947 nargs = 4;
33948 mask_pos = 1;
33949 nargs_constant = 1;
33950 break;
33951 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33952 nargs = 4;
33953 nargs_constant = 2;
33954 break;
33955 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33956 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33957 nargs = 4;
33958 break;
33959 case QI_FTYPE_V8DI_V8DI_INT_QI:
33960 case HI_FTYPE_V16SI_V16SI_INT_HI:
33961 case QI_FTYPE_V8DF_V8DF_INT_QI:
33962 case HI_FTYPE_V16SF_V16SF_INT_HI:
33963 mask_pos = 1;
33964 nargs = 4;
33965 nargs_constant = 1;
33966 break;
33967 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33968 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33969 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33970 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33971 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33972 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33973 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33974 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33975 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33976 nargs = 4;
33977 mask_pos = 2;
33978 nargs_constant = 1;
33979 break;
33980 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33981 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33982 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33983 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33984 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33985 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33986 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33987 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33988 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33989 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33990 nargs = 5;
33991 mask_pos = 2;
33992 nargs_constant = 1;
33993 break;
33994 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33995 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33996 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33997 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33998 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33999 nargs = 5;
34000 mask_pos = 1;
34001 nargs_constant = 1;
34002 break;
34003
34004 default:
34005 gcc_unreachable ();
34006 }
34007
34008 gcc_assert (nargs <= ARRAY_SIZE (args));
34009
34010 if (comparison != UNKNOWN)
34011 {
34012 gcc_assert (nargs == 2);
34013 return ix86_expand_sse_compare (d, exp, target, swap);
34014 }
34015
34016 if (rmode == VOIDmode || rmode == tmode)
34017 {
34018 if (optimize
34019 || target == 0
34020 || GET_MODE (target) != tmode
34021 || !insn_p->operand[0].predicate (target, tmode))
34022 target = gen_reg_rtx (tmode);
34023 real_target = target;
34024 }
34025 else
34026 {
34027 real_target = gen_reg_rtx (tmode);
34028 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34029 }
34030
34031 for (i = 0; i < nargs; i++)
34032 {
34033 tree arg = CALL_EXPR_ARG (exp, i);
34034 rtx op = expand_normal (arg);
34035 enum machine_mode mode = insn_p->operand[i + 1].mode;
34036 bool match = insn_p->operand[i + 1].predicate (op, mode);
34037
34038 if (last_arg_count && (i + 1) == nargs)
34039 {
34040 /* SIMD shift insns take either an 8-bit immediate or
34041 register as count. But builtin functions take int as
34042 count. If count doesn't match, we put it in register. */
34043 if (!match)
34044 {
34045 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34046 if (!insn_p->operand[i + 1].predicate (op, mode))
34047 op = copy_to_reg (op);
34048 }
34049 }
34050 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34051 (!mask_pos && (nargs - i) <= nargs_constant))
34052 {
34053 if (!match)
34054 switch (icode)
34055 {
34056 case CODE_FOR_avx_vinsertf128v4di:
34057 case CODE_FOR_avx_vextractf128v4di:
34058 error ("the last argument must be an 1-bit immediate");
34059 return const0_rtx;
34060
34061 case CODE_FOR_avx512f_cmpv8di3_mask:
34062 case CODE_FOR_avx512f_cmpv16si3_mask:
34063 case CODE_FOR_avx512f_ucmpv8di3_mask:
34064 case CODE_FOR_avx512f_ucmpv16si3_mask:
34065 error ("the last argument must be a 3-bit immediate");
34066 return const0_rtx;
34067
34068 case CODE_FOR_sse4_1_roundsd:
34069 case CODE_FOR_sse4_1_roundss:
34070
34071 case CODE_FOR_sse4_1_roundpd:
34072 case CODE_FOR_sse4_1_roundps:
34073 case CODE_FOR_avx_roundpd256:
34074 case CODE_FOR_avx_roundps256:
34075
34076 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34077 case CODE_FOR_sse4_1_roundps_sfix:
34078 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34079 case CODE_FOR_avx_roundps_sfix256:
34080
34081 case CODE_FOR_sse4_1_blendps:
34082 case CODE_FOR_avx_blendpd256:
34083 case CODE_FOR_avx_vpermilv4df:
34084 case CODE_FOR_avx512f_getmantv8df_mask:
34085 case CODE_FOR_avx512f_getmantv16sf_mask:
34086 case CODE_FOR_avx512vl_getmantv8sf_mask:
34087 case CODE_FOR_avx512vl_getmantv4df_mask:
34088 case CODE_FOR_avx512vl_getmantv4sf_mask:
34089 case CODE_FOR_avx512vl_getmantv2df_mask:
34090 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34091 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34092 case CODE_FOR_avx512dq_rangepv4df_mask:
34093 case CODE_FOR_avx512dq_rangepv8sf_mask:
34094 case CODE_FOR_avx512dq_rangepv2df_mask:
34095 case CODE_FOR_avx512dq_rangepv4sf_mask:
34096 error ("the last argument must be a 4-bit immediate");
34097 return const0_rtx;
34098
34099 case CODE_FOR_sha1rnds4:
34100 case CODE_FOR_sse4_1_blendpd:
34101 case CODE_FOR_avx_vpermilv2df:
34102 case CODE_FOR_xop_vpermil2v2df3:
34103 case CODE_FOR_xop_vpermil2v4sf3:
34104 case CODE_FOR_xop_vpermil2v4df3:
34105 case CODE_FOR_xop_vpermil2v8sf3:
34106 case CODE_FOR_avx512f_vinsertf32x4_mask:
34107 case CODE_FOR_avx512f_vinserti32x4_mask:
34108 case CODE_FOR_avx512f_vextractf32x4_mask:
34109 case CODE_FOR_avx512f_vextracti32x4_mask:
34110 case CODE_FOR_sse2_shufpd:
34111 case CODE_FOR_sse2_shufpd_mask:
34112 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34113 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34114 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34115 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34116 error ("the last argument must be a 2-bit immediate");
34117 return const0_rtx;
34118
34119 case CODE_FOR_avx_vextractf128v4df:
34120 case CODE_FOR_avx_vextractf128v8sf:
34121 case CODE_FOR_avx_vextractf128v8si:
34122 case CODE_FOR_avx_vinsertf128v4df:
34123 case CODE_FOR_avx_vinsertf128v8sf:
34124 case CODE_FOR_avx_vinsertf128v8si:
34125 case CODE_FOR_avx512f_vinsertf64x4_mask:
34126 case CODE_FOR_avx512f_vinserti64x4_mask:
34127 case CODE_FOR_avx512f_vextractf64x4_mask:
34128 case CODE_FOR_avx512f_vextracti64x4_mask:
34129 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34130 case CODE_FOR_avx512dq_vinserti32x8_mask:
34131 case CODE_FOR_avx512vl_vinsertv4df:
34132 case CODE_FOR_avx512vl_vinsertv4di:
34133 case CODE_FOR_avx512vl_vinsertv8sf:
34134 case CODE_FOR_avx512vl_vinsertv8si:
34135 error ("the last argument must be a 1-bit immediate");
34136 return const0_rtx;
34137
34138 case CODE_FOR_avx_vmcmpv2df3:
34139 case CODE_FOR_avx_vmcmpv4sf3:
34140 case CODE_FOR_avx_cmpv2df3:
34141 case CODE_FOR_avx_cmpv4sf3:
34142 case CODE_FOR_avx_cmpv4df3:
34143 case CODE_FOR_avx_cmpv8sf3:
34144 case CODE_FOR_avx512f_cmpv8df3_mask:
34145 case CODE_FOR_avx512f_cmpv16sf3_mask:
34146 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34147 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34148 error ("the last argument must be a 5-bit immediate");
34149 return const0_rtx;
34150
34151 default:
34152 switch (nargs_constant)
34153 {
34154 case 2:
34155 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34156 (!mask_pos && (nargs - i) == nargs_constant))
34157 {
34158 error ("the next to last argument must be an 8-bit immediate");
34159 break;
34160 }
34161 case 1:
34162 error ("the last argument must be an 8-bit immediate");
34163 break;
34164 default:
34165 gcc_unreachable ();
34166 }
34167 return const0_rtx;
34168 }
34169 }
34170 else
34171 {
34172 if (VECTOR_MODE_P (mode))
34173 op = safe_vector_operand (op, mode);
34174
34175 /* If we aren't optimizing, only allow one memory operand to
34176 be generated. */
34177 if (memory_operand (op, mode))
34178 num_memory++;
34179
34180 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34181 {
34182 if (optimize || !match || num_memory > 1)
34183 op = copy_to_mode_reg (mode, op);
34184 }
34185 else
34186 {
34187 op = copy_to_reg (op);
34188 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34189 }
34190 }
34191
34192 args[i].op = op;
34193 args[i].mode = mode;
34194 }
34195
34196 switch (nargs)
34197 {
34198 case 1:
34199 pat = GEN_FCN (icode) (real_target, args[0].op);
34200 break;
34201 case 2:
34202 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34203 break;
34204 case 3:
34205 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34206 args[2].op);
34207 break;
34208 case 4:
34209 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34210 args[2].op, args[3].op);
34211 break;
34212 case 5:
34213 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34214 args[2].op, args[3].op, args[4].op);
34215 case 6:
34216 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34217 args[2].op, args[3].op, args[4].op,
34218 args[5].op);
34219 break;
34220 default:
34221 gcc_unreachable ();
34222 }
34223
34224 if (! pat)
34225 return 0;
34226
34227 emit_insn (pat);
34228 return target;
34229 }
34230
34231 /* Transform pattern of following layout:
34232 (parallel [
34233 set (A B)
34234 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34235 ])
34236 into:
34237 (set (A B))
34238
34239 Or:
34240 (parallel [ A B
34241 ...
34242 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34243 ...
34244 ])
34245 into:
34246 (parallel [ A B ... ]) */
34247
34248 static rtx
34249 ix86_erase_embedded_rounding (rtx pat)
34250 {
34251 if (GET_CODE (pat) == INSN)
34252 pat = PATTERN (pat);
34253
34254 gcc_assert (GET_CODE (pat) == PARALLEL);
34255
34256 if (XVECLEN (pat, 0) == 2)
34257 {
34258 rtx p0 = XVECEXP (pat, 0, 0);
34259 rtx p1 = XVECEXP (pat, 0, 1);
34260
34261 gcc_assert (GET_CODE (p0) == SET
34262 && GET_CODE (p1) == UNSPEC
34263 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34264
34265 return p0;
34266 }
34267 else
34268 {
34269 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34270 int i = 0;
34271 int j = 0;
34272
34273 for (; i < XVECLEN (pat, 0); ++i)
34274 {
34275 rtx elem = XVECEXP (pat, 0, i);
34276 if (GET_CODE (elem) != UNSPEC
34277 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34278 res [j++] = elem;
34279 }
34280
34281 /* No more than 1 occurence was removed. */
34282 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34283
34284 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34285 }
34286 }
34287
34288 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34289 with rounding. */
34290 static rtx
34291 ix86_expand_sse_comi_round (const struct builtin_description *d,
34292 tree exp, rtx target)
34293 {
34294 rtx pat, set_dst;
34295 tree arg0 = CALL_EXPR_ARG (exp, 0);
34296 tree arg1 = CALL_EXPR_ARG (exp, 1);
34297 tree arg2 = CALL_EXPR_ARG (exp, 2);
34298 tree arg3 = CALL_EXPR_ARG (exp, 3);
34299 rtx op0 = expand_normal (arg0);
34300 rtx op1 = expand_normal (arg1);
34301 rtx op2 = expand_normal (arg2);
34302 rtx op3 = expand_normal (arg3);
34303 enum insn_code icode = d->icode;
34304 const struct insn_data_d *insn_p = &insn_data[icode];
34305 enum machine_mode mode0 = insn_p->operand[0].mode;
34306 enum machine_mode mode1 = insn_p->operand[1].mode;
34307 enum rtx_code comparison = UNEQ;
34308 bool need_ucomi = false;
34309
34310 /* See avxintrin.h for values. */
34311 enum rtx_code comi_comparisons[32] =
34312 {
34313 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34314 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34315 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34316 };
34317 bool need_ucomi_values[32] =
34318 {
34319 true, false, false, true, true, false, false, true,
34320 true, false, false, true, true, false, false, true,
34321 false, true, true, false, false, true, true, false,
34322 false, true, true, false, false, true, true, false
34323 };
34324
34325 if (!CONST_INT_P (op2))
34326 {
34327 error ("the third argument must be comparison constant");
34328 return const0_rtx;
34329 }
34330 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34331 {
34332 error ("incorect comparison mode");
34333 return const0_rtx;
34334 }
34335
34336 if (!insn_p->operand[2].predicate (op3, SImode))
34337 {
34338 error ("incorrect rounding operand");
34339 return const0_rtx;
34340 }
34341
34342 comparison = comi_comparisons[INTVAL (op2)];
34343 need_ucomi = need_ucomi_values[INTVAL (op2)];
34344
34345 if (VECTOR_MODE_P (mode0))
34346 op0 = safe_vector_operand (op0, mode0);
34347 if (VECTOR_MODE_P (mode1))
34348 op1 = safe_vector_operand (op1, mode1);
34349
34350 target = gen_reg_rtx (SImode);
34351 emit_move_insn (target, const0_rtx);
34352 target = gen_rtx_SUBREG (QImode, target, 0);
34353
34354 if ((optimize && !register_operand (op0, mode0))
34355 || !insn_p->operand[0].predicate (op0, mode0))
34356 op0 = copy_to_mode_reg (mode0, op0);
34357 if ((optimize && !register_operand (op1, mode1))
34358 || !insn_p->operand[1].predicate (op1, mode1))
34359 op1 = copy_to_mode_reg (mode1, op1);
34360
34361 if (need_ucomi)
34362 icode = icode == CODE_FOR_sse_comi_round
34363 ? CODE_FOR_sse_ucomi_round
34364 : CODE_FOR_sse2_ucomi_round;
34365
34366 pat = GEN_FCN (icode) (op0, op1, op3);
34367 if (! pat)
34368 return 0;
34369
34370 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34371 if (INTVAL (op3) == NO_ROUND)
34372 {
34373 pat = ix86_erase_embedded_rounding (pat);
34374 if (! pat)
34375 return 0;
34376
34377 set_dst = SET_DEST (pat);
34378 }
34379 else
34380 {
34381 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34382 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34383 }
34384
34385 emit_insn (pat);
34386 emit_insn (gen_rtx_SET (VOIDmode,
34387 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34388 gen_rtx_fmt_ee (comparison, QImode,
34389 set_dst,
34390 const0_rtx)));
34391
34392 return SUBREG_REG (target);
34393 }
34394
34395 static rtx
34396 ix86_expand_round_builtin (const struct builtin_description *d,
34397 tree exp, rtx target)
34398 {
34399 rtx pat;
34400 unsigned int i, nargs;
34401 struct
34402 {
34403 rtx op;
34404 enum machine_mode mode;
34405 } args[6];
34406 enum insn_code icode = d->icode;
34407 const struct insn_data_d *insn_p = &insn_data[icode];
34408 enum machine_mode tmode = insn_p->operand[0].mode;
34409 unsigned int nargs_constant = 0;
34410 unsigned int redundant_embed_rnd = 0;
34411
34412 switch ((enum ix86_builtin_func_type) d->flag)
34413 {
34414 case UINT64_FTYPE_V2DF_INT:
34415 case UINT64_FTYPE_V4SF_INT:
34416 case UINT_FTYPE_V2DF_INT:
34417 case UINT_FTYPE_V4SF_INT:
34418 case INT64_FTYPE_V2DF_INT:
34419 case INT64_FTYPE_V4SF_INT:
34420 case INT_FTYPE_V2DF_INT:
34421 case INT_FTYPE_V4SF_INT:
34422 nargs = 2;
34423 break;
34424 case V4SF_FTYPE_V4SF_UINT_INT:
34425 case V4SF_FTYPE_V4SF_UINT64_INT:
34426 case V2DF_FTYPE_V2DF_UINT64_INT:
34427 case V4SF_FTYPE_V4SF_INT_INT:
34428 case V4SF_FTYPE_V4SF_INT64_INT:
34429 case V2DF_FTYPE_V2DF_INT64_INT:
34430 case V4SF_FTYPE_V4SF_V4SF_INT:
34431 case V2DF_FTYPE_V2DF_V2DF_INT:
34432 case V4SF_FTYPE_V4SF_V2DF_INT:
34433 case V2DF_FTYPE_V2DF_V4SF_INT:
34434 nargs = 3;
34435 break;
34436 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34437 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34438 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34439 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34440 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34441 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34442 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34443 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34444 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34445 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34446 nargs = 4;
34447 break;
34448 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34449 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34450 nargs_constant = 2;
34451 nargs = 4;
34452 break;
34453 case INT_FTYPE_V4SF_V4SF_INT_INT:
34454 case INT_FTYPE_V2DF_V2DF_INT_INT:
34455 return ix86_expand_sse_comi_round (d, exp, target);
34456 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34457 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34458 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34459 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34460 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34461 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34462 nargs = 5;
34463 break;
34464 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34465 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34466 nargs_constant = 4;
34467 nargs = 5;
34468 break;
34469 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34470 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34471 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34472 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34473 nargs_constant = 3;
34474 nargs = 5;
34475 break;
34476 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34477 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34478 nargs = 6;
34479 nargs_constant = 4;
34480 break;
34481 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34482 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34483 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34484 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34485 nargs = 6;
34486 nargs_constant = 3;
34487 break;
34488 default:
34489 gcc_unreachable ();
34490 }
34491 gcc_assert (nargs <= ARRAY_SIZE (args));
34492
34493 if (optimize
34494 || target == 0
34495 || GET_MODE (target) != tmode
34496 || !insn_p->operand[0].predicate (target, tmode))
34497 target = gen_reg_rtx (tmode);
34498
34499 for (i = 0; i < nargs; i++)
34500 {
34501 tree arg = CALL_EXPR_ARG (exp, i);
34502 rtx op = expand_normal (arg);
34503 enum machine_mode mode = insn_p->operand[i + 1].mode;
34504 bool match = insn_p->operand[i + 1].predicate (op, mode);
34505
34506 if (i == nargs - nargs_constant)
34507 {
34508 if (!match)
34509 {
34510 switch (icode)
34511 {
34512 case CODE_FOR_avx512f_getmantv8df_mask_round:
34513 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34514 case CODE_FOR_avx512f_vgetmantv2df_round:
34515 case CODE_FOR_avx512f_vgetmantv4sf_round:
34516 error ("the immediate argument must be a 4-bit immediate");
34517 return const0_rtx;
34518 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34519 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34520 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34521 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34522 error ("the immediate argument must be a 5-bit immediate");
34523 return const0_rtx;
34524 default:
34525 error ("the immediate argument must be an 8-bit immediate");
34526 return const0_rtx;
34527 }
34528 }
34529 }
34530 else if (i == nargs-1)
34531 {
34532 if (!insn_p->operand[nargs].predicate (op, SImode))
34533 {
34534 error ("incorrect rounding operand");
34535 return const0_rtx;
34536 }
34537
34538 /* If there is no rounding use normal version of the pattern. */
34539 if (INTVAL (op) == NO_ROUND)
34540 redundant_embed_rnd = 1;
34541 }
34542 else
34543 {
34544 if (VECTOR_MODE_P (mode))
34545 op = safe_vector_operand (op, mode);
34546
34547 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34548 {
34549 if (optimize || !match)
34550 op = copy_to_mode_reg (mode, op);
34551 }
34552 else
34553 {
34554 op = copy_to_reg (op);
34555 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34556 }
34557 }
34558
34559 args[i].op = op;
34560 args[i].mode = mode;
34561 }
34562
34563 switch (nargs)
34564 {
34565 case 1:
34566 pat = GEN_FCN (icode) (target, args[0].op);
34567 break;
34568 case 2:
34569 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34570 break;
34571 case 3:
34572 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34573 args[2].op);
34574 break;
34575 case 4:
34576 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34577 args[2].op, args[3].op);
34578 break;
34579 case 5:
34580 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34581 args[2].op, args[3].op, args[4].op);
34582 case 6:
34583 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34584 args[2].op, args[3].op, args[4].op,
34585 args[5].op);
34586 break;
34587 default:
34588 gcc_unreachable ();
34589 }
34590
34591 if (!pat)
34592 return 0;
34593
34594 if (redundant_embed_rnd)
34595 pat = ix86_erase_embedded_rounding (pat);
34596
34597 emit_insn (pat);
34598 return target;
34599 }
34600
34601 /* Subroutine of ix86_expand_builtin to take care of special insns
34602 with variable number of operands. */
34603
34604 static rtx
34605 ix86_expand_special_args_builtin (const struct builtin_description *d,
34606 tree exp, rtx target)
34607 {
34608 tree arg;
34609 rtx pat, op;
34610 unsigned int i, nargs, arg_adjust, memory;
34611 bool aligned_mem = false;
34612 struct
34613 {
34614 rtx op;
34615 enum machine_mode mode;
34616 } args[3];
34617 enum insn_code icode = d->icode;
34618 bool last_arg_constant = false;
34619 const struct insn_data_d *insn_p = &insn_data[icode];
34620 enum machine_mode tmode = insn_p->operand[0].mode;
34621 enum { load, store } klass;
34622
34623 switch ((enum ix86_builtin_func_type) d->flag)
34624 {
34625 case VOID_FTYPE_VOID:
34626 emit_insn (GEN_FCN (icode) (target));
34627 return 0;
34628 case VOID_FTYPE_UINT64:
34629 case VOID_FTYPE_UNSIGNED:
34630 nargs = 0;
34631 klass = store;
34632 memory = 0;
34633 break;
34634
34635 case INT_FTYPE_VOID:
34636 case USHORT_FTYPE_VOID:
34637 case UINT64_FTYPE_VOID:
34638 case UNSIGNED_FTYPE_VOID:
34639 nargs = 0;
34640 klass = load;
34641 memory = 0;
34642 break;
34643 case UINT64_FTYPE_PUNSIGNED:
34644 case V2DI_FTYPE_PV2DI:
34645 case V4DI_FTYPE_PV4DI:
34646 case V32QI_FTYPE_PCCHAR:
34647 case V16QI_FTYPE_PCCHAR:
34648 case V8SF_FTYPE_PCV4SF:
34649 case V8SF_FTYPE_PCFLOAT:
34650 case V4SF_FTYPE_PCFLOAT:
34651 case V4DF_FTYPE_PCV2DF:
34652 case V4DF_FTYPE_PCDOUBLE:
34653 case V2DF_FTYPE_PCDOUBLE:
34654 case VOID_FTYPE_PVOID:
34655 case V16SI_FTYPE_PV4SI:
34656 case V16SF_FTYPE_PV4SF:
34657 case V8DI_FTYPE_PV4DI:
34658 case V8DI_FTYPE_PV8DI:
34659 case V8DF_FTYPE_PV4DF:
34660 nargs = 1;
34661 klass = load;
34662 memory = 0;
34663 switch (icode)
34664 {
34665 case CODE_FOR_sse4_1_movntdqa:
34666 case CODE_FOR_avx2_movntdqa:
34667 case CODE_FOR_avx512f_movntdqa:
34668 aligned_mem = true;
34669 break;
34670 default:
34671 break;
34672 }
34673 break;
34674 case VOID_FTYPE_PV2SF_V4SF:
34675 case VOID_FTYPE_PV8DI_V8DI:
34676 case VOID_FTYPE_PV4DI_V4DI:
34677 case VOID_FTYPE_PV2DI_V2DI:
34678 case VOID_FTYPE_PCHAR_V32QI:
34679 case VOID_FTYPE_PCHAR_V16QI:
34680 case VOID_FTYPE_PFLOAT_V16SF:
34681 case VOID_FTYPE_PFLOAT_V8SF:
34682 case VOID_FTYPE_PFLOAT_V4SF:
34683 case VOID_FTYPE_PDOUBLE_V8DF:
34684 case VOID_FTYPE_PDOUBLE_V4DF:
34685 case VOID_FTYPE_PDOUBLE_V2DF:
34686 case VOID_FTYPE_PLONGLONG_LONGLONG:
34687 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34688 case VOID_FTYPE_PINT_INT:
34689 nargs = 1;
34690 klass = store;
34691 /* Reserve memory operand for target. */
34692 memory = ARRAY_SIZE (args);
34693 switch (icode)
34694 {
34695 /* These builtins and instructions require the memory
34696 to be properly aligned. */
34697 case CODE_FOR_avx_movntv4di:
34698 case CODE_FOR_sse2_movntv2di:
34699 case CODE_FOR_avx_movntv8sf:
34700 case CODE_FOR_sse_movntv4sf:
34701 case CODE_FOR_sse4a_vmmovntv4sf:
34702 case CODE_FOR_avx_movntv4df:
34703 case CODE_FOR_sse2_movntv2df:
34704 case CODE_FOR_sse4a_vmmovntv2df:
34705 case CODE_FOR_sse2_movntidi:
34706 case CODE_FOR_sse_movntq:
34707 case CODE_FOR_sse2_movntisi:
34708 case CODE_FOR_avx512f_movntv16sf:
34709 case CODE_FOR_avx512f_movntv8df:
34710 case CODE_FOR_avx512f_movntv8di:
34711 aligned_mem = true;
34712 break;
34713 default:
34714 break;
34715 }
34716 break;
34717 case V4SF_FTYPE_V4SF_PCV2SF:
34718 case V2DF_FTYPE_V2DF_PCDOUBLE:
34719 nargs = 2;
34720 klass = load;
34721 memory = 1;
34722 break;
34723 case V8SF_FTYPE_PCV8SF_V8SI:
34724 case V4DF_FTYPE_PCV4DF_V4DI:
34725 case V4SF_FTYPE_PCV4SF_V4SI:
34726 case V2DF_FTYPE_PCV2DF_V2DI:
34727 case V8SI_FTYPE_PCV8SI_V8SI:
34728 case V4DI_FTYPE_PCV4DI_V4DI:
34729 case V4SI_FTYPE_PCV4SI_V4SI:
34730 case V2DI_FTYPE_PCV2DI_V2DI:
34731 nargs = 2;
34732 klass = load;
34733 memory = 0;
34734 break;
34735 case VOID_FTYPE_PV8DF_V8DF_QI:
34736 case VOID_FTYPE_PV16SF_V16SF_HI:
34737 case VOID_FTYPE_PV8DI_V8DI_QI:
34738 case VOID_FTYPE_PV16SI_V16SI_HI:
34739 switch (icode)
34740 {
34741 /* These builtins and instructions require the memory
34742 to be properly aligned. */
34743 case CODE_FOR_avx512f_storev16sf_mask:
34744 case CODE_FOR_avx512f_storev16si_mask:
34745 case CODE_FOR_avx512f_storev8df_mask:
34746 case CODE_FOR_avx512f_storev8di_mask:
34747 case CODE_FOR_avx512vl_storev8sf_mask:
34748 case CODE_FOR_avx512vl_storev8si_mask:
34749 case CODE_FOR_avx512vl_storev4df_mask:
34750 case CODE_FOR_avx512vl_storev4di_mask:
34751 case CODE_FOR_avx512vl_storev4sf_mask:
34752 case CODE_FOR_avx512vl_storev4si_mask:
34753 case CODE_FOR_avx512vl_storev2df_mask:
34754 case CODE_FOR_avx512vl_storev2di_mask:
34755 aligned_mem = true;
34756 break;
34757 default:
34758 break;
34759 }
34760 /* FALLTHRU */
34761 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34762 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34763 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34764 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34765 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34766 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34767 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34768 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34769 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34770 case VOID_FTYPE_PFLOAT_V4SF_QI:
34771 case VOID_FTYPE_PV8SI_V8DI_QI:
34772 case VOID_FTYPE_PV8HI_V8DI_QI:
34773 case VOID_FTYPE_PV16HI_V16SI_HI:
34774 case VOID_FTYPE_PV16QI_V8DI_QI:
34775 case VOID_FTYPE_PV16QI_V16SI_HI:
34776 nargs = 2;
34777 klass = store;
34778 /* Reserve memory operand for target. */
34779 memory = ARRAY_SIZE (args);
34780 break;
34781 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34782 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34783 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34784 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34785 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34786 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34787 nargs = 3;
34788 klass = load;
34789 memory = 0;
34790 switch (icode)
34791 {
34792 /* These builtins and instructions require the memory
34793 to be properly aligned. */
34794 case CODE_FOR_avx512f_loadv16sf_mask:
34795 case CODE_FOR_avx512f_loadv16si_mask:
34796 case CODE_FOR_avx512f_loadv8df_mask:
34797 case CODE_FOR_avx512f_loadv8di_mask:
34798 case CODE_FOR_avx512vl_loadv8sf_mask:
34799 case CODE_FOR_avx512vl_loadv8si_mask:
34800 case CODE_FOR_avx512vl_loadv4df_mask:
34801 case CODE_FOR_avx512vl_loadv4di_mask:
34802 case CODE_FOR_avx512vl_loadv4sf_mask:
34803 case CODE_FOR_avx512vl_loadv4si_mask:
34804 case CODE_FOR_avx512vl_loadv2df_mask:
34805 case CODE_FOR_avx512vl_loadv2di_mask:
34806 case CODE_FOR_avx512bw_loadv64qi_mask:
34807 case CODE_FOR_avx512vl_loadv32qi_mask:
34808 case CODE_FOR_avx512vl_loadv16qi_mask:
34809 case CODE_FOR_avx512bw_loadv32hi_mask:
34810 case CODE_FOR_avx512vl_loadv16hi_mask:
34811 case CODE_FOR_avx512vl_loadv8hi_mask:
34812 aligned_mem = true;
34813 break;
34814 default:
34815 break;
34816 }
34817 break;
34818 case VOID_FTYPE_UINT_UINT_UINT:
34819 case VOID_FTYPE_UINT64_UINT_UINT:
34820 case UCHAR_FTYPE_UINT_UINT_UINT:
34821 case UCHAR_FTYPE_UINT64_UINT_UINT:
34822 nargs = 3;
34823 klass = load;
34824 memory = ARRAY_SIZE (args);
34825 last_arg_constant = true;
34826 break;
34827 default:
34828 gcc_unreachable ();
34829 }
34830
34831 gcc_assert (nargs <= ARRAY_SIZE (args));
34832
34833 if (klass == store)
34834 {
34835 arg = CALL_EXPR_ARG (exp, 0);
34836 op = expand_normal (arg);
34837 gcc_assert (target == 0);
34838 if (memory)
34839 {
34840 op = ix86_zero_extend_to_Pmode (op);
34841 target = gen_rtx_MEM (tmode, op);
34842 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34843 on it. Try to improve it using get_pointer_alignment,
34844 and if the special builtin is one that requires strict
34845 mode alignment, also from it's GET_MODE_ALIGNMENT.
34846 Failure to do so could lead to ix86_legitimate_combined_insn
34847 rejecting all changes to such insns. */
34848 unsigned int align = get_pointer_alignment (arg);
34849 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34850 align = GET_MODE_ALIGNMENT (tmode);
34851 if (MEM_ALIGN (target) < align)
34852 set_mem_align (target, align);
34853 }
34854 else
34855 target = force_reg (tmode, op);
34856 arg_adjust = 1;
34857 }
34858 else
34859 {
34860 arg_adjust = 0;
34861 if (optimize
34862 || target == 0
34863 || !register_operand (target, tmode)
34864 || GET_MODE (target) != tmode)
34865 target = gen_reg_rtx (tmode);
34866 }
34867
34868 for (i = 0; i < nargs; i++)
34869 {
34870 enum machine_mode mode = insn_p->operand[i + 1].mode;
34871 bool match;
34872
34873 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34874 op = expand_normal (arg);
34875 match = insn_p->operand[i + 1].predicate (op, mode);
34876
34877 if (last_arg_constant && (i + 1) == nargs)
34878 {
34879 if (!match)
34880 {
34881 if (icode == CODE_FOR_lwp_lwpvalsi3
34882 || icode == CODE_FOR_lwp_lwpinssi3
34883 || icode == CODE_FOR_lwp_lwpvaldi3
34884 || icode == CODE_FOR_lwp_lwpinsdi3)
34885 error ("the last argument must be a 32-bit immediate");
34886 else
34887 error ("the last argument must be an 8-bit immediate");
34888 return const0_rtx;
34889 }
34890 }
34891 else
34892 {
34893 if (i == memory)
34894 {
34895 /* This must be the memory operand. */
34896 op = ix86_zero_extend_to_Pmode (op);
34897 op = gen_rtx_MEM (mode, op);
34898 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34899 on it. Try to improve it using get_pointer_alignment,
34900 and if the special builtin is one that requires strict
34901 mode alignment, also from it's GET_MODE_ALIGNMENT.
34902 Failure to do so could lead to ix86_legitimate_combined_insn
34903 rejecting all changes to such insns. */
34904 unsigned int align = get_pointer_alignment (arg);
34905 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34906 align = GET_MODE_ALIGNMENT (mode);
34907 if (MEM_ALIGN (op) < align)
34908 set_mem_align (op, align);
34909 }
34910 else
34911 {
34912 /* This must be register. */
34913 if (VECTOR_MODE_P (mode))
34914 op = safe_vector_operand (op, mode);
34915
34916 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34917 op = copy_to_mode_reg (mode, op);
34918 else
34919 {
34920 op = copy_to_reg (op);
34921 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34922 }
34923 }
34924 }
34925
34926 args[i].op = op;
34927 args[i].mode = mode;
34928 }
34929
34930 switch (nargs)
34931 {
34932 case 0:
34933 pat = GEN_FCN (icode) (target);
34934 break;
34935 case 1:
34936 pat = GEN_FCN (icode) (target, args[0].op);
34937 break;
34938 case 2:
34939 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34940 break;
34941 case 3:
34942 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34943 break;
34944 default:
34945 gcc_unreachable ();
34946 }
34947
34948 if (! pat)
34949 return 0;
34950 emit_insn (pat);
34951 return klass == store ? 0 : target;
34952 }
34953
34954 /* Return the integer constant in ARG. Constrain it to be in the range
34955 of the subparts of VEC_TYPE; issue an error if not. */
34956
34957 static int
34958 get_element_number (tree vec_type, tree arg)
34959 {
34960 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34961
34962 if (!tree_fits_uhwi_p (arg)
34963 || (elt = tree_to_uhwi (arg), elt > max))
34964 {
34965 error ("selector must be an integer constant in the range 0..%wi", max);
34966 return 0;
34967 }
34968
34969 return elt;
34970 }
34971
34972 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34973 ix86_expand_vector_init. We DO have language-level syntax for this, in
34974 the form of (type){ init-list }. Except that since we can't place emms
34975 instructions from inside the compiler, we can't allow the use of MMX
34976 registers unless the user explicitly asks for it. So we do *not* define
34977 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34978 we have builtins invoked by mmintrin.h that gives us license to emit
34979 these sorts of instructions. */
34980
34981 static rtx
34982 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34983 {
34984 enum machine_mode tmode = TYPE_MODE (type);
34985 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34986 int i, n_elt = GET_MODE_NUNITS (tmode);
34987 rtvec v = rtvec_alloc (n_elt);
34988
34989 gcc_assert (VECTOR_MODE_P (tmode));
34990 gcc_assert (call_expr_nargs (exp) == n_elt);
34991
34992 for (i = 0; i < n_elt; ++i)
34993 {
34994 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34995 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34996 }
34997
34998 if (!target || !register_operand (target, tmode))
34999 target = gen_reg_rtx (tmode);
35000
35001 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35002 return target;
35003 }
35004
35005 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35006 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35007 had a language-level syntax for referencing vector elements. */
35008
35009 static rtx
35010 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35011 {
35012 enum machine_mode tmode, mode0;
35013 tree arg0, arg1;
35014 int elt;
35015 rtx op0;
35016
35017 arg0 = CALL_EXPR_ARG (exp, 0);
35018 arg1 = CALL_EXPR_ARG (exp, 1);
35019
35020 op0 = expand_normal (arg0);
35021 elt = get_element_number (TREE_TYPE (arg0), arg1);
35022
35023 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35024 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35025 gcc_assert (VECTOR_MODE_P (mode0));
35026
35027 op0 = force_reg (mode0, op0);
35028
35029 if (optimize || !target || !register_operand (target, tmode))
35030 target = gen_reg_rtx (tmode);
35031
35032 ix86_expand_vector_extract (true, target, op0, elt);
35033
35034 return target;
35035 }
35036
35037 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35038 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35039 a language-level syntax for referencing vector elements. */
35040
35041 static rtx
35042 ix86_expand_vec_set_builtin (tree exp)
35043 {
35044 enum machine_mode tmode, mode1;
35045 tree arg0, arg1, arg2;
35046 int elt;
35047 rtx op0, op1, target;
35048
35049 arg0 = CALL_EXPR_ARG (exp, 0);
35050 arg1 = CALL_EXPR_ARG (exp, 1);
35051 arg2 = CALL_EXPR_ARG (exp, 2);
35052
35053 tmode = TYPE_MODE (TREE_TYPE (arg0));
35054 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35055 gcc_assert (VECTOR_MODE_P (tmode));
35056
35057 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35058 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35059 elt = get_element_number (TREE_TYPE (arg0), arg2);
35060
35061 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35062 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35063
35064 op0 = force_reg (tmode, op0);
35065 op1 = force_reg (mode1, op1);
35066
35067 /* OP0 is the source of these builtin functions and shouldn't be
35068 modified. Create a copy, use it and return it as target. */
35069 target = gen_reg_rtx (tmode);
35070 emit_move_insn (target, op0);
35071 ix86_expand_vector_set (true, target, op1, elt);
35072
35073 return target;
35074 }
35075
35076 /* Expand an expression EXP that calls a built-in function,
35077 with result going to TARGET if that's convenient
35078 (and in mode MODE if that's convenient).
35079 SUBTARGET may be used as the target for computing one of EXP's operands.
35080 IGNORE is nonzero if the value is to be ignored. */
35081
35082 static rtx
35083 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35084 enum machine_mode mode, int ignore)
35085 {
35086 const struct builtin_description *d;
35087 size_t i;
35088 enum insn_code icode;
35089 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35090 tree arg0, arg1, arg2, arg3, arg4;
35091 rtx op0, op1, op2, op3, op4, pat, insn;
35092 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35093 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35094
35095 /* For CPU builtins that can be folded, fold first and expand the fold. */
35096 switch (fcode)
35097 {
35098 case IX86_BUILTIN_CPU_INIT:
35099 {
35100 /* Make it call __cpu_indicator_init in libgcc. */
35101 tree call_expr, fndecl, type;
35102 type = build_function_type_list (integer_type_node, NULL_TREE);
35103 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35104 call_expr = build_call_expr (fndecl, 0);
35105 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35106 }
35107 case IX86_BUILTIN_CPU_IS:
35108 case IX86_BUILTIN_CPU_SUPPORTS:
35109 {
35110 tree arg0 = CALL_EXPR_ARG (exp, 0);
35111 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35112 gcc_assert (fold_expr != NULL_TREE);
35113 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35114 }
35115 }
35116
35117 /* Determine whether the builtin function is available under the current ISA.
35118 Originally the builtin was not created if it wasn't applicable to the
35119 current ISA based on the command line switches. With function specific
35120 options, we need to check in the context of the function making the call
35121 whether it is supported. */
35122 if (ix86_builtins_isa[fcode].isa
35123 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35124 {
35125 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35126 NULL, (enum fpmath_unit) 0, false);
35127
35128 if (!opts)
35129 error ("%qE needs unknown isa option", fndecl);
35130 else
35131 {
35132 gcc_assert (opts != NULL);
35133 error ("%qE needs isa option %s", fndecl, opts);
35134 free (opts);
35135 }
35136 return const0_rtx;
35137 }
35138
35139 switch (fcode)
35140 {
35141 case IX86_BUILTIN_MASKMOVQ:
35142 case IX86_BUILTIN_MASKMOVDQU:
35143 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35144 ? CODE_FOR_mmx_maskmovq
35145 : CODE_FOR_sse2_maskmovdqu);
35146 /* Note the arg order is different from the operand order. */
35147 arg1 = CALL_EXPR_ARG (exp, 0);
35148 arg2 = CALL_EXPR_ARG (exp, 1);
35149 arg0 = CALL_EXPR_ARG (exp, 2);
35150 op0 = expand_normal (arg0);
35151 op1 = expand_normal (arg1);
35152 op2 = expand_normal (arg2);
35153 mode0 = insn_data[icode].operand[0].mode;
35154 mode1 = insn_data[icode].operand[1].mode;
35155 mode2 = insn_data[icode].operand[2].mode;
35156
35157 op0 = ix86_zero_extend_to_Pmode (op0);
35158 op0 = gen_rtx_MEM (mode1, op0);
35159
35160 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35161 op0 = copy_to_mode_reg (mode0, op0);
35162 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35163 op1 = copy_to_mode_reg (mode1, op1);
35164 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35165 op2 = copy_to_mode_reg (mode2, op2);
35166 pat = GEN_FCN (icode) (op0, op1, op2);
35167 if (! pat)
35168 return 0;
35169 emit_insn (pat);
35170 return 0;
35171
35172 case IX86_BUILTIN_LDMXCSR:
35173 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35174 target = assign_386_stack_local (SImode, SLOT_TEMP);
35175 emit_move_insn (target, op0);
35176 emit_insn (gen_sse_ldmxcsr (target));
35177 return 0;
35178
35179 case IX86_BUILTIN_STMXCSR:
35180 target = assign_386_stack_local (SImode, SLOT_TEMP);
35181 emit_insn (gen_sse_stmxcsr (target));
35182 return copy_to_mode_reg (SImode, target);
35183
35184 case IX86_BUILTIN_CLFLUSH:
35185 arg0 = CALL_EXPR_ARG (exp, 0);
35186 op0 = expand_normal (arg0);
35187 icode = CODE_FOR_sse2_clflush;
35188 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35189 op0 = ix86_zero_extend_to_Pmode (op0);
35190
35191 emit_insn (gen_sse2_clflush (op0));
35192 return 0;
35193
35194 case IX86_BUILTIN_CLFLUSHOPT:
35195 arg0 = CALL_EXPR_ARG (exp, 0);
35196 op0 = expand_normal (arg0);
35197 icode = CODE_FOR_clflushopt;
35198 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35199 op0 = ix86_zero_extend_to_Pmode (op0);
35200
35201 emit_insn (gen_clflushopt (op0));
35202 return 0;
35203
35204 case IX86_BUILTIN_MONITOR:
35205 arg0 = CALL_EXPR_ARG (exp, 0);
35206 arg1 = CALL_EXPR_ARG (exp, 1);
35207 arg2 = CALL_EXPR_ARG (exp, 2);
35208 op0 = expand_normal (arg0);
35209 op1 = expand_normal (arg1);
35210 op2 = expand_normal (arg2);
35211 if (!REG_P (op0))
35212 op0 = ix86_zero_extend_to_Pmode (op0);
35213 if (!REG_P (op1))
35214 op1 = copy_to_mode_reg (SImode, op1);
35215 if (!REG_P (op2))
35216 op2 = copy_to_mode_reg (SImode, op2);
35217 emit_insn (ix86_gen_monitor (op0, op1, op2));
35218 return 0;
35219
35220 case IX86_BUILTIN_MWAIT:
35221 arg0 = CALL_EXPR_ARG (exp, 0);
35222 arg1 = CALL_EXPR_ARG (exp, 1);
35223 op0 = expand_normal (arg0);
35224 op1 = expand_normal (arg1);
35225 if (!REG_P (op0))
35226 op0 = copy_to_mode_reg (SImode, op0);
35227 if (!REG_P (op1))
35228 op1 = copy_to_mode_reg (SImode, op1);
35229 emit_insn (gen_sse3_mwait (op0, op1));
35230 return 0;
35231
35232 case IX86_BUILTIN_VEC_INIT_V2SI:
35233 case IX86_BUILTIN_VEC_INIT_V4HI:
35234 case IX86_BUILTIN_VEC_INIT_V8QI:
35235 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35236
35237 case IX86_BUILTIN_VEC_EXT_V2DF:
35238 case IX86_BUILTIN_VEC_EXT_V2DI:
35239 case IX86_BUILTIN_VEC_EXT_V4SF:
35240 case IX86_BUILTIN_VEC_EXT_V4SI:
35241 case IX86_BUILTIN_VEC_EXT_V8HI:
35242 case IX86_BUILTIN_VEC_EXT_V2SI:
35243 case IX86_BUILTIN_VEC_EXT_V4HI:
35244 case IX86_BUILTIN_VEC_EXT_V16QI:
35245 return ix86_expand_vec_ext_builtin (exp, target);
35246
35247 case IX86_BUILTIN_VEC_SET_V2DI:
35248 case IX86_BUILTIN_VEC_SET_V4SF:
35249 case IX86_BUILTIN_VEC_SET_V4SI:
35250 case IX86_BUILTIN_VEC_SET_V8HI:
35251 case IX86_BUILTIN_VEC_SET_V4HI:
35252 case IX86_BUILTIN_VEC_SET_V16QI:
35253 return ix86_expand_vec_set_builtin (exp);
35254
35255 case IX86_BUILTIN_INFQ:
35256 case IX86_BUILTIN_HUGE_VALQ:
35257 {
35258 REAL_VALUE_TYPE inf;
35259 rtx tmp;
35260
35261 real_inf (&inf);
35262 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35263
35264 tmp = validize_mem (force_const_mem (mode, tmp));
35265
35266 if (target == 0)
35267 target = gen_reg_rtx (mode);
35268
35269 emit_move_insn (target, tmp);
35270 return target;
35271 }
35272
35273 case IX86_BUILTIN_RDPMC:
35274 case IX86_BUILTIN_RDTSC:
35275 case IX86_BUILTIN_RDTSCP:
35276
35277 op0 = gen_reg_rtx (DImode);
35278 op1 = gen_reg_rtx (DImode);
35279
35280 if (fcode == IX86_BUILTIN_RDPMC)
35281 {
35282 arg0 = CALL_EXPR_ARG (exp, 0);
35283 op2 = expand_normal (arg0);
35284 if (!register_operand (op2, SImode))
35285 op2 = copy_to_mode_reg (SImode, op2);
35286
35287 insn = (TARGET_64BIT
35288 ? gen_rdpmc_rex64 (op0, op1, op2)
35289 : gen_rdpmc (op0, op2));
35290 emit_insn (insn);
35291 }
35292 else if (fcode == IX86_BUILTIN_RDTSC)
35293 {
35294 insn = (TARGET_64BIT
35295 ? gen_rdtsc_rex64 (op0, op1)
35296 : gen_rdtsc (op0));
35297 emit_insn (insn);
35298 }
35299 else
35300 {
35301 op2 = gen_reg_rtx (SImode);
35302
35303 insn = (TARGET_64BIT
35304 ? gen_rdtscp_rex64 (op0, op1, op2)
35305 : gen_rdtscp (op0, op2));
35306 emit_insn (insn);
35307
35308 arg0 = CALL_EXPR_ARG (exp, 0);
35309 op4 = expand_normal (arg0);
35310 if (!address_operand (op4, VOIDmode))
35311 {
35312 op4 = convert_memory_address (Pmode, op4);
35313 op4 = copy_addr_to_reg (op4);
35314 }
35315 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35316 }
35317
35318 if (target == 0)
35319 {
35320 /* mode is VOIDmode if __builtin_rd* has been called
35321 without lhs. */
35322 if (mode == VOIDmode)
35323 return target;
35324 target = gen_reg_rtx (mode);
35325 }
35326
35327 if (TARGET_64BIT)
35328 {
35329 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35330 op1, 1, OPTAB_DIRECT);
35331 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35332 op0, 1, OPTAB_DIRECT);
35333 }
35334
35335 emit_move_insn (target, op0);
35336 return target;
35337
35338 case IX86_BUILTIN_FXSAVE:
35339 case IX86_BUILTIN_FXRSTOR:
35340 case IX86_BUILTIN_FXSAVE64:
35341 case IX86_BUILTIN_FXRSTOR64:
35342 case IX86_BUILTIN_FNSTENV:
35343 case IX86_BUILTIN_FLDENV:
35344 mode0 = BLKmode;
35345 switch (fcode)
35346 {
35347 case IX86_BUILTIN_FXSAVE:
35348 icode = CODE_FOR_fxsave;
35349 break;
35350 case IX86_BUILTIN_FXRSTOR:
35351 icode = CODE_FOR_fxrstor;
35352 break;
35353 case IX86_BUILTIN_FXSAVE64:
35354 icode = CODE_FOR_fxsave64;
35355 break;
35356 case IX86_BUILTIN_FXRSTOR64:
35357 icode = CODE_FOR_fxrstor64;
35358 break;
35359 case IX86_BUILTIN_FNSTENV:
35360 icode = CODE_FOR_fnstenv;
35361 break;
35362 case IX86_BUILTIN_FLDENV:
35363 icode = CODE_FOR_fldenv;
35364 break;
35365 default:
35366 gcc_unreachable ();
35367 }
35368
35369 arg0 = CALL_EXPR_ARG (exp, 0);
35370 op0 = expand_normal (arg0);
35371
35372 if (!address_operand (op0, VOIDmode))
35373 {
35374 op0 = convert_memory_address (Pmode, op0);
35375 op0 = copy_addr_to_reg (op0);
35376 }
35377 op0 = gen_rtx_MEM (mode0, op0);
35378
35379 pat = GEN_FCN (icode) (op0);
35380 if (pat)
35381 emit_insn (pat);
35382 return 0;
35383
35384 case IX86_BUILTIN_XSAVE:
35385 case IX86_BUILTIN_XRSTOR:
35386 case IX86_BUILTIN_XSAVE64:
35387 case IX86_BUILTIN_XRSTOR64:
35388 case IX86_BUILTIN_XSAVEOPT:
35389 case IX86_BUILTIN_XSAVEOPT64:
35390 case IX86_BUILTIN_XSAVES:
35391 case IX86_BUILTIN_XRSTORS:
35392 case IX86_BUILTIN_XSAVES64:
35393 case IX86_BUILTIN_XRSTORS64:
35394 case IX86_BUILTIN_XSAVEC:
35395 case IX86_BUILTIN_XSAVEC64:
35396 arg0 = CALL_EXPR_ARG (exp, 0);
35397 arg1 = CALL_EXPR_ARG (exp, 1);
35398 op0 = expand_normal (arg0);
35399 op1 = expand_normal (arg1);
35400
35401 if (!address_operand (op0, VOIDmode))
35402 {
35403 op0 = convert_memory_address (Pmode, op0);
35404 op0 = copy_addr_to_reg (op0);
35405 }
35406 op0 = gen_rtx_MEM (BLKmode, op0);
35407
35408 op1 = force_reg (DImode, op1);
35409
35410 if (TARGET_64BIT)
35411 {
35412 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35413 NULL, 1, OPTAB_DIRECT);
35414 switch (fcode)
35415 {
35416 case IX86_BUILTIN_XSAVE:
35417 icode = CODE_FOR_xsave_rex64;
35418 break;
35419 case IX86_BUILTIN_XRSTOR:
35420 icode = CODE_FOR_xrstor_rex64;
35421 break;
35422 case IX86_BUILTIN_XSAVE64:
35423 icode = CODE_FOR_xsave64;
35424 break;
35425 case IX86_BUILTIN_XRSTOR64:
35426 icode = CODE_FOR_xrstor64;
35427 break;
35428 case IX86_BUILTIN_XSAVEOPT:
35429 icode = CODE_FOR_xsaveopt_rex64;
35430 break;
35431 case IX86_BUILTIN_XSAVEOPT64:
35432 icode = CODE_FOR_xsaveopt64;
35433 break;
35434 case IX86_BUILTIN_XSAVES:
35435 icode = CODE_FOR_xsaves_rex64;
35436 break;
35437 case IX86_BUILTIN_XRSTORS:
35438 icode = CODE_FOR_xrstors_rex64;
35439 break;
35440 case IX86_BUILTIN_XSAVES64:
35441 icode = CODE_FOR_xsaves64;
35442 break;
35443 case IX86_BUILTIN_XRSTORS64:
35444 icode = CODE_FOR_xrstors64;
35445 break;
35446 case IX86_BUILTIN_XSAVEC:
35447 icode = CODE_FOR_xsavec_rex64;
35448 break;
35449 case IX86_BUILTIN_XSAVEC64:
35450 icode = CODE_FOR_xsavec64;
35451 break;
35452 default:
35453 gcc_unreachable ();
35454 }
35455
35456 op2 = gen_lowpart (SImode, op2);
35457 op1 = gen_lowpart (SImode, op1);
35458 pat = GEN_FCN (icode) (op0, op1, op2);
35459 }
35460 else
35461 {
35462 switch (fcode)
35463 {
35464 case IX86_BUILTIN_XSAVE:
35465 icode = CODE_FOR_xsave;
35466 break;
35467 case IX86_BUILTIN_XRSTOR:
35468 icode = CODE_FOR_xrstor;
35469 break;
35470 case IX86_BUILTIN_XSAVEOPT:
35471 icode = CODE_FOR_xsaveopt;
35472 break;
35473 case IX86_BUILTIN_XSAVES:
35474 icode = CODE_FOR_xsaves;
35475 break;
35476 case IX86_BUILTIN_XRSTORS:
35477 icode = CODE_FOR_xrstors;
35478 break;
35479 case IX86_BUILTIN_XSAVEC:
35480 icode = CODE_FOR_xsavec;
35481 break;
35482 default:
35483 gcc_unreachable ();
35484 }
35485 pat = GEN_FCN (icode) (op0, op1);
35486 }
35487
35488 if (pat)
35489 emit_insn (pat);
35490 return 0;
35491
35492 case IX86_BUILTIN_LLWPCB:
35493 arg0 = CALL_EXPR_ARG (exp, 0);
35494 op0 = expand_normal (arg0);
35495 icode = CODE_FOR_lwp_llwpcb;
35496 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35497 op0 = ix86_zero_extend_to_Pmode (op0);
35498 emit_insn (gen_lwp_llwpcb (op0));
35499 return 0;
35500
35501 case IX86_BUILTIN_SLWPCB:
35502 icode = CODE_FOR_lwp_slwpcb;
35503 if (!target
35504 || !insn_data[icode].operand[0].predicate (target, Pmode))
35505 target = gen_reg_rtx (Pmode);
35506 emit_insn (gen_lwp_slwpcb (target));
35507 return target;
35508
35509 case IX86_BUILTIN_BEXTRI32:
35510 case IX86_BUILTIN_BEXTRI64:
35511 arg0 = CALL_EXPR_ARG (exp, 0);
35512 arg1 = CALL_EXPR_ARG (exp, 1);
35513 op0 = expand_normal (arg0);
35514 op1 = expand_normal (arg1);
35515 icode = (fcode == IX86_BUILTIN_BEXTRI32
35516 ? CODE_FOR_tbm_bextri_si
35517 : CODE_FOR_tbm_bextri_di);
35518 if (!CONST_INT_P (op1))
35519 {
35520 error ("last argument must be an immediate");
35521 return const0_rtx;
35522 }
35523 else
35524 {
35525 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35526 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35527 op1 = GEN_INT (length);
35528 op2 = GEN_INT (lsb_index);
35529 pat = GEN_FCN (icode) (target, op0, op1, op2);
35530 if (pat)
35531 emit_insn (pat);
35532 return target;
35533 }
35534
35535 case IX86_BUILTIN_RDRAND16_STEP:
35536 icode = CODE_FOR_rdrandhi_1;
35537 mode0 = HImode;
35538 goto rdrand_step;
35539
35540 case IX86_BUILTIN_RDRAND32_STEP:
35541 icode = CODE_FOR_rdrandsi_1;
35542 mode0 = SImode;
35543 goto rdrand_step;
35544
35545 case IX86_BUILTIN_RDRAND64_STEP:
35546 icode = CODE_FOR_rdranddi_1;
35547 mode0 = DImode;
35548
35549 rdrand_step:
35550 op0 = gen_reg_rtx (mode0);
35551 emit_insn (GEN_FCN (icode) (op0));
35552
35553 arg0 = CALL_EXPR_ARG (exp, 0);
35554 op1 = expand_normal (arg0);
35555 if (!address_operand (op1, VOIDmode))
35556 {
35557 op1 = convert_memory_address (Pmode, op1);
35558 op1 = copy_addr_to_reg (op1);
35559 }
35560 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35561
35562 op1 = gen_reg_rtx (SImode);
35563 emit_move_insn (op1, CONST1_RTX (SImode));
35564
35565 /* Emit SImode conditional move. */
35566 if (mode0 == HImode)
35567 {
35568 op2 = gen_reg_rtx (SImode);
35569 emit_insn (gen_zero_extendhisi2 (op2, op0));
35570 }
35571 else if (mode0 == SImode)
35572 op2 = op0;
35573 else
35574 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35575
35576 if (target == 0
35577 || !register_operand (target, SImode))
35578 target = gen_reg_rtx (SImode);
35579
35580 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35581 const0_rtx);
35582 emit_insn (gen_rtx_SET (VOIDmode, target,
35583 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35584 return target;
35585
35586 case IX86_BUILTIN_RDSEED16_STEP:
35587 icode = CODE_FOR_rdseedhi_1;
35588 mode0 = HImode;
35589 goto rdseed_step;
35590
35591 case IX86_BUILTIN_RDSEED32_STEP:
35592 icode = CODE_FOR_rdseedsi_1;
35593 mode0 = SImode;
35594 goto rdseed_step;
35595
35596 case IX86_BUILTIN_RDSEED64_STEP:
35597 icode = CODE_FOR_rdseeddi_1;
35598 mode0 = DImode;
35599
35600 rdseed_step:
35601 op0 = gen_reg_rtx (mode0);
35602 emit_insn (GEN_FCN (icode) (op0));
35603
35604 arg0 = CALL_EXPR_ARG (exp, 0);
35605 op1 = expand_normal (arg0);
35606 if (!address_operand (op1, VOIDmode))
35607 {
35608 op1 = convert_memory_address (Pmode, op1);
35609 op1 = copy_addr_to_reg (op1);
35610 }
35611 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35612
35613 op2 = gen_reg_rtx (QImode);
35614
35615 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35616 const0_rtx);
35617 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35618
35619 if (target == 0
35620 || !register_operand (target, SImode))
35621 target = gen_reg_rtx (SImode);
35622
35623 emit_insn (gen_zero_extendqisi2 (target, op2));
35624 return target;
35625
35626 case IX86_BUILTIN_SBB32:
35627 icode = CODE_FOR_subsi3_carry;
35628 mode0 = SImode;
35629 goto addcarryx;
35630
35631 case IX86_BUILTIN_SBB64:
35632 icode = CODE_FOR_subdi3_carry;
35633 mode0 = DImode;
35634 goto addcarryx;
35635
35636 case IX86_BUILTIN_ADDCARRYX32:
35637 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35638 mode0 = SImode;
35639 goto addcarryx;
35640
35641 case IX86_BUILTIN_ADDCARRYX64:
35642 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35643 mode0 = DImode;
35644
35645 addcarryx:
35646 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35647 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35648 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35649 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35650
35651 op0 = gen_reg_rtx (QImode);
35652
35653 /* Generate CF from input operand. */
35654 op1 = expand_normal (arg0);
35655 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35656 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35657
35658 /* Gen ADCX instruction to compute X+Y+CF. */
35659 op2 = expand_normal (arg1);
35660 op3 = expand_normal (arg2);
35661
35662 if (!REG_P (op2))
35663 op2 = copy_to_mode_reg (mode0, op2);
35664 if (!REG_P (op3))
35665 op3 = copy_to_mode_reg (mode0, op3);
35666
35667 op0 = gen_reg_rtx (mode0);
35668
35669 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35670 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35671 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35672
35673 /* Store the result. */
35674 op4 = expand_normal (arg3);
35675 if (!address_operand (op4, VOIDmode))
35676 {
35677 op4 = convert_memory_address (Pmode, op4);
35678 op4 = copy_addr_to_reg (op4);
35679 }
35680 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35681
35682 /* Return current CF value. */
35683 if (target == 0)
35684 target = gen_reg_rtx (QImode);
35685
35686 PUT_MODE (pat, QImode);
35687 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35688 return target;
35689
35690 case IX86_BUILTIN_READ_FLAGS:
35691 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35692
35693 if (optimize
35694 || target == NULL_RTX
35695 || !nonimmediate_operand (target, word_mode)
35696 || GET_MODE (target) != word_mode)
35697 target = gen_reg_rtx (word_mode);
35698
35699 emit_insn (gen_pop (target));
35700 return target;
35701
35702 case IX86_BUILTIN_WRITE_FLAGS:
35703
35704 arg0 = CALL_EXPR_ARG (exp, 0);
35705 op0 = expand_normal (arg0);
35706 if (!general_no_elim_operand (op0, word_mode))
35707 op0 = copy_to_mode_reg (word_mode, op0);
35708
35709 emit_insn (gen_push (op0));
35710 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35711 return 0;
35712
35713 case IX86_BUILTIN_KORTESTC16:
35714 icode = CODE_FOR_kortestchi;
35715 mode0 = HImode;
35716 mode1 = CCCmode;
35717 goto kortest;
35718
35719 case IX86_BUILTIN_KORTESTZ16:
35720 icode = CODE_FOR_kortestzhi;
35721 mode0 = HImode;
35722 mode1 = CCZmode;
35723
35724 kortest:
35725 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35726 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35727 op0 = expand_normal (arg0);
35728 op1 = expand_normal (arg1);
35729
35730 op0 = copy_to_reg (op0);
35731 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35732 op1 = copy_to_reg (op1);
35733 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35734
35735 target = gen_reg_rtx (QImode);
35736 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35737
35738 /* Emit kortest. */
35739 emit_insn (GEN_FCN (icode) (op0, op1));
35740 /* And use setcc to return result from flags. */
35741 ix86_expand_setcc (target, EQ,
35742 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35743 return target;
35744
35745 case IX86_BUILTIN_GATHERSIV2DF:
35746 icode = CODE_FOR_avx2_gathersiv2df;
35747 goto gather_gen;
35748 case IX86_BUILTIN_GATHERSIV4DF:
35749 icode = CODE_FOR_avx2_gathersiv4df;
35750 goto gather_gen;
35751 case IX86_BUILTIN_GATHERDIV2DF:
35752 icode = CODE_FOR_avx2_gatherdiv2df;
35753 goto gather_gen;
35754 case IX86_BUILTIN_GATHERDIV4DF:
35755 icode = CODE_FOR_avx2_gatherdiv4df;
35756 goto gather_gen;
35757 case IX86_BUILTIN_GATHERSIV4SF:
35758 icode = CODE_FOR_avx2_gathersiv4sf;
35759 goto gather_gen;
35760 case IX86_BUILTIN_GATHERSIV8SF:
35761 icode = CODE_FOR_avx2_gathersiv8sf;
35762 goto gather_gen;
35763 case IX86_BUILTIN_GATHERDIV4SF:
35764 icode = CODE_FOR_avx2_gatherdiv4sf;
35765 goto gather_gen;
35766 case IX86_BUILTIN_GATHERDIV8SF:
35767 icode = CODE_FOR_avx2_gatherdiv8sf;
35768 goto gather_gen;
35769 case IX86_BUILTIN_GATHERSIV2DI:
35770 icode = CODE_FOR_avx2_gathersiv2di;
35771 goto gather_gen;
35772 case IX86_BUILTIN_GATHERSIV4DI:
35773 icode = CODE_FOR_avx2_gathersiv4di;
35774 goto gather_gen;
35775 case IX86_BUILTIN_GATHERDIV2DI:
35776 icode = CODE_FOR_avx2_gatherdiv2di;
35777 goto gather_gen;
35778 case IX86_BUILTIN_GATHERDIV4DI:
35779 icode = CODE_FOR_avx2_gatherdiv4di;
35780 goto gather_gen;
35781 case IX86_BUILTIN_GATHERSIV4SI:
35782 icode = CODE_FOR_avx2_gathersiv4si;
35783 goto gather_gen;
35784 case IX86_BUILTIN_GATHERSIV8SI:
35785 icode = CODE_FOR_avx2_gathersiv8si;
35786 goto gather_gen;
35787 case IX86_BUILTIN_GATHERDIV4SI:
35788 icode = CODE_FOR_avx2_gatherdiv4si;
35789 goto gather_gen;
35790 case IX86_BUILTIN_GATHERDIV8SI:
35791 icode = CODE_FOR_avx2_gatherdiv8si;
35792 goto gather_gen;
35793 case IX86_BUILTIN_GATHERALTSIV4DF:
35794 icode = CODE_FOR_avx2_gathersiv4df;
35795 goto gather_gen;
35796 case IX86_BUILTIN_GATHERALTDIV8SF:
35797 icode = CODE_FOR_avx2_gatherdiv8sf;
35798 goto gather_gen;
35799 case IX86_BUILTIN_GATHERALTSIV4DI:
35800 icode = CODE_FOR_avx2_gathersiv4di;
35801 goto gather_gen;
35802 case IX86_BUILTIN_GATHERALTDIV8SI:
35803 icode = CODE_FOR_avx2_gatherdiv8si;
35804 goto gather_gen;
35805 case IX86_BUILTIN_GATHER3SIV16SF:
35806 icode = CODE_FOR_avx512f_gathersiv16sf;
35807 goto gather_gen;
35808 case IX86_BUILTIN_GATHER3SIV8DF:
35809 icode = CODE_FOR_avx512f_gathersiv8df;
35810 goto gather_gen;
35811 case IX86_BUILTIN_GATHER3DIV16SF:
35812 icode = CODE_FOR_avx512f_gatherdiv16sf;
35813 goto gather_gen;
35814 case IX86_BUILTIN_GATHER3DIV8DF:
35815 icode = CODE_FOR_avx512f_gatherdiv8df;
35816 goto gather_gen;
35817 case IX86_BUILTIN_GATHER3SIV16SI:
35818 icode = CODE_FOR_avx512f_gathersiv16si;
35819 goto gather_gen;
35820 case IX86_BUILTIN_GATHER3SIV8DI:
35821 icode = CODE_FOR_avx512f_gathersiv8di;
35822 goto gather_gen;
35823 case IX86_BUILTIN_GATHER3DIV16SI:
35824 icode = CODE_FOR_avx512f_gatherdiv16si;
35825 goto gather_gen;
35826 case IX86_BUILTIN_GATHER3DIV8DI:
35827 icode = CODE_FOR_avx512f_gatherdiv8di;
35828 goto gather_gen;
35829 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35830 icode = CODE_FOR_avx512f_gathersiv8df;
35831 goto gather_gen;
35832 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35833 icode = CODE_FOR_avx512f_gatherdiv16sf;
35834 goto gather_gen;
35835 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35836 icode = CODE_FOR_avx512f_gathersiv8di;
35837 goto gather_gen;
35838 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35839 icode = CODE_FOR_avx512f_gatherdiv16si;
35840 goto gather_gen;
35841 case IX86_BUILTIN_SCATTERSIV16SF:
35842 icode = CODE_FOR_avx512f_scattersiv16sf;
35843 goto scatter_gen;
35844 case IX86_BUILTIN_SCATTERSIV8DF:
35845 icode = CODE_FOR_avx512f_scattersiv8df;
35846 goto scatter_gen;
35847 case IX86_BUILTIN_SCATTERDIV16SF:
35848 icode = CODE_FOR_avx512f_scatterdiv16sf;
35849 goto scatter_gen;
35850 case IX86_BUILTIN_SCATTERDIV8DF:
35851 icode = CODE_FOR_avx512f_scatterdiv8df;
35852 goto scatter_gen;
35853 case IX86_BUILTIN_SCATTERSIV16SI:
35854 icode = CODE_FOR_avx512f_scattersiv16si;
35855 goto scatter_gen;
35856 case IX86_BUILTIN_SCATTERSIV8DI:
35857 icode = CODE_FOR_avx512f_scattersiv8di;
35858 goto scatter_gen;
35859 case IX86_BUILTIN_SCATTERDIV16SI:
35860 icode = CODE_FOR_avx512f_scatterdiv16si;
35861 goto scatter_gen;
35862 case IX86_BUILTIN_SCATTERDIV8DI:
35863 icode = CODE_FOR_avx512f_scatterdiv8di;
35864 goto scatter_gen;
35865
35866 case IX86_BUILTIN_GATHERPFDPD:
35867 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35868 goto vec_prefetch_gen;
35869 case IX86_BUILTIN_GATHERPFDPS:
35870 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35871 goto vec_prefetch_gen;
35872 case IX86_BUILTIN_GATHERPFQPD:
35873 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35874 goto vec_prefetch_gen;
35875 case IX86_BUILTIN_GATHERPFQPS:
35876 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35877 goto vec_prefetch_gen;
35878 case IX86_BUILTIN_SCATTERPFDPD:
35879 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35880 goto vec_prefetch_gen;
35881 case IX86_BUILTIN_SCATTERPFDPS:
35882 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35883 goto vec_prefetch_gen;
35884 case IX86_BUILTIN_SCATTERPFQPD:
35885 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35886 goto vec_prefetch_gen;
35887 case IX86_BUILTIN_SCATTERPFQPS:
35888 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35889 goto vec_prefetch_gen;
35890
35891 gather_gen:
35892 rtx half;
35893 rtx (*gen) (rtx, rtx);
35894
35895 arg0 = CALL_EXPR_ARG (exp, 0);
35896 arg1 = CALL_EXPR_ARG (exp, 1);
35897 arg2 = CALL_EXPR_ARG (exp, 2);
35898 arg3 = CALL_EXPR_ARG (exp, 3);
35899 arg4 = CALL_EXPR_ARG (exp, 4);
35900 op0 = expand_normal (arg0);
35901 op1 = expand_normal (arg1);
35902 op2 = expand_normal (arg2);
35903 op3 = expand_normal (arg3);
35904 op4 = expand_normal (arg4);
35905 /* Note the arg order is different from the operand order. */
35906 mode0 = insn_data[icode].operand[1].mode;
35907 mode2 = insn_data[icode].operand[3].mode;
35908 mode3 = insn_data[icode].operand[4].mode;
35909 mode4 = insn_data[icode].operand[5].mode;
35910
35911 if (target == NULL_RTX
35912 || GET_MODE (target) != insn_data[icode].operand[0].mode
35913 || !insn_data[icode].operand[0].predicate (target,
35914 GET_MODE (target)))
35915 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35916 else
35917 subtarget = target;
35918
35919 switch (fcode)
35920 {
35921 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35922 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35923 half = gen_reg_rtx (V8SImode);
35924 if (!nonimmediate_operand (op2, V16SImode))
35925 op2 = copy_to_mode_reg (V16SImode, op2);
35926 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35927 op2 = half;
35928 break;
35929 case IX86_BUILTIN_GATHERALTSIV4DF:
35930 case IX86_BUILTIN_GATHERALTSIV4DI:
35931 half = gen_reg_rtx (V4SImode);
35932 if (!nonimmediate_operand (op2, V8SImode))
35933 op2 = copy_to_mode_reg (V8SImode, op2);
35934 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35935 op2 = half;
35936 break;
35937 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35938 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35939 half = gen_reg_rtx (mode0);
35940 if (mode0 == V8SFmode)
35941 gen = gen_vec_extract_lo_v16sf;
35942 else
35943 gen = gen_vec_extract_lo_v16si;
35944 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35945 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35946 emit_insn (gen (half, op0));
35947 op0 = half;
35948 if (GET_MODE (op3) != VOIDmode)
35949 {
35950 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35951 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35952 emit_insn (gen (half, op3));
35953 op3 = half;
35954 }
35955 break;
35956 case IX86_BUILTIN_GATHERALTDIV8SF:
35957 case IX86_BUILTIN_GATHERALTDIV8SI:
35958 half = gen_reg_rtx (mode0);
35959 if (mode0 == V4SFmode)
35960 gen = gen_vec_extract_lo_v8sf;
35961 else
35962 gen = gen_vec_extract_lo_v8si;
35963 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35964 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35965 emit_insn (gen (half, op0));
35966 op0 = half;
35967 if (GET_MODE (op3) != VOIDmode)
35968 {
35969 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35970 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35971 emit_insn (gen (half, op3));
35972 op3 = half;
35973 }
35974 break;
35975 default:
35976 break;
35977 }
35978
35979 /* Force memory operand only with base register here. But we
35980 don't want to do it on memory operand for other builtin
35981 functions. */
35982 op1 = ix86_zero_extend_to_Pmode (op1);
35983
35984 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35985 op0 = copy_to_mode_reg (mode0, op0);
35986 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35987 op1 = copy_to_mode_reg (Pmode, op1);
35988 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35989 op2 = copy_to_mode_reg (mode2, op2);
35990 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35991 {
35992 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35993 op3 = copy_to_mode_reg (mode3, op3);
35994 }
35995 else
35996 {
35997 op3 = copy_to_reg (op3);
35998 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35999 }
36000 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36001 {
36002 error ("the last argument must be scale 1, 2, 4, 8");
36003 return const0_rtx;
36004 }
36005
36006 /* Optimize. If mask is known to have all high bits set,
36007 replace op0 with pc_rtx to signal that the instruction
36008 overwrites the whole destination and doesn't use its
36009 previous contents. */
36010 if (optimize)
36011 {
36012 if (TREE_CODE (arg3) == INTEGER_CST)
36013 {
36014 if (integer_all_onesp (arg3))
36015 op0 = pc_rtx;
36016 }
36017 else if (TREE_CODE (arg3) == VECTOR_CST)
36018 {
36019 unsigned int negative = 0;
36020 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36021 {
36022 tree cst = VECTOR_CST_ELT (arg3, i);
36023 if (TREE_CODE (cst) == INTEGER_CST
36024 && tree_int_cst_sign_bit (cst))
36025 negative++;
36026 else if (TREE_CODE (cst) == REAL_CST
36027 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36028 negative++;
36029 }
36030 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36031 op0 = pc_rtx;
36032 }
36033 else if (TREE_CODE (arg3) == SSA_NAME
36034 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36035 {
36036 /* Recognize also when mask is like:
36037 __v2df src = _mm_setzero_pd ();
36038 __v2df mask = _mm_cmpeq_pd (src, src);
36039 or
36040 __v8sf src = _mm256_setzero_ps ();
36041 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36042 as that is a cheaper way to load all ones into
36043 a register than having to load a constant from
36044 memory. */
36045 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36046 if (is_gimple_call (def_stmt))
36047 {
36048 tree fndecl = gimple_call_fndecl (def_stmt);
36049 if (fndecl
36050 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36051 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36052 {
36053 case IX86_BUILTIN_CMPPD:
36054 case IX86_BUILTIN_CMPPS:
36055 case IX86_BUILTIN_CMPPD256:
36056 case IX86_BUILTIN_CMPPS256:
36057 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36058 break;
36059 /* FALLTHRU */
36060 case IX86_BUILTIN_CMPEQPD:
36061 case IX86_BUILTIN_CMPEQPS:
36062 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36063 && initializer_zerop (gimple_call_arg (def_stmt,
36064 1)))
36065 op0 = pc_rtx;
36066 break;
36067 default:
36068 break;
36069 }
36070 }
36071 }
36072 }
36073
36074 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36075 if (! pat)
36076 return const0_rtx;
36077 emit_insn (pat);
36078
36079 switch (fcode)
36080 {
36081 case IX86_BUILTIN_GATHER3DIV16SF:
36082 if (target == NULL_RTX)
36083 target = gen_reg_rtx (V8SFmode);
36084 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36085 break;
36086 case IX86_BUILTIN_GATHER3DIV16SI:
36087 if (target == NULL_RTX)
36088 target = gen_reg_rtx (V8SImode);
36089 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36090 break;
36091 case IX86_BUILTIN_GATHERDIV8SF:
36092 if (target == NULL_RTX)
36093 target = gen_reg_rtx (V4SFmode);
36094 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36095 break;
36096 case IX86_BUILTIN_GATHERDIV8SI:
36097 if (target == NULL_RTX)
36098 target = gen_reg_rtx (V4SImode);
36099 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36100 break;
36101 default:
36102 target = subtarget;
36103 break;
36104 }
36105 return target;
36106
36107 scatter_gen:
36108 arg0 = CALL_EXPR_ARG (exp, 0);
36109 arg1 = CALL_EXPR_ARG (exp, 1);
36110 arg2 = CALL_EXPR_ARG (exp, 2);
36111 arg3 = CALL_EXPR_ARG (exp, 3);
36112 arg4 = CALL_EXPR_ARG (exp, 4);
36113 op0 = expand_normal (arg0);
36114 op1 = expand_normal (arg1);
36115 op2 = expand_normal (arg2);
36116 op3 = expand_normal (arg3);
36117 op4 = expand_normal (arg4);
36118 mode1 = insn_data[icode].operand[1].mode;
36119 mode2 = insn_data[icode].operand[2].mode;
36120 mode3 = insn_data[icode].operand[3].mode;
36121 mode4 = insn_data[icode].operand[4].mode;
36122
36123 /* Force memory operand only with base register here. But we
36124 don't want to do it on memory operand for other builtin
36125 functions. */
36126 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36127
36128 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36129 op0 = copy_to_mode_reg (Pmode, op0);
36130
36131 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36132 {
36133 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36134 op1 = copy_to_mode_reg (mode1, op1);
36135 }
36136 else
36137 {
36138 op1 = copy_to_reg (op1);
36139 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36140 }
36141
36142 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36143 op2 = copy_to_mode_reg (mode2, op2);
36144
36145 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36146 op3 = copy_to_mode_reg (mode3, op3);
36147
36148 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36149 {
36150 error ("the last argument must be scale 1, 2, 4, 8");
36151 return const0_rtx;
36152 }
36153
36154 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36155 if (! pat)
36156 return const0_rtx;
36157
36158 emit_insn (pat);
36159 return 0;
36160
36161 vec_prefetch_gen:
36162 arg0 = CALL_EXPR_ARG (exp, 0);
36163 arg1 = CALL_EXPR_ARG (exp, 1);
36164 arg2 = CALL_EXPR_ARG (exp, 2);
36165 arg3 = CALL_EXPR_ARG (exp, 3);
36166 arg4 = CALL_EXPR_ARG (exp, 4);
36167 op0 = expand_normal (arg0);
36168 op1 = expand_normal (arg1);
36169 op2 = expand_normal (arg2);
36170 op3 = expand_normal (arg3);
36171 op4 = expand_normal (arg4);
36172 mode0 = insn_data[icode].operand[0].mode;
36173 mode1 = insn_data[icode].operand[1].mode;
36174 mode3 = insn_data[icode].operand[3].mode;
36175 mode4 = insn_data[icode].operand[4].mode;
36176
36177 if (GET_MODE (op0) == mode0
36178 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36179 {
36180 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36181 op0 = copy_to_mode_reg (mode0, op0);
36182 }
36183 else if (op0 != constm1_rtx)
36184 {
36185 op0 = copy_to_reg (op0);
36186 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36187 }
36188
36189 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36190 op1 = copy_to_mode_reg (mode1, op1);
36191
36192 /* Force memory operand only with base register here. But we
36193 don't want to do it on memory operand for other builtin
36194 functions. */
36195 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36196
36197 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36198 op2 = copy_to_mode_reg (Pmode, op2);
36199
36200 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36201 {
36202 error ("the forth argument must be scale 1, 2, 4, 8");
36203 return const0_rtx;
36204 }
36205
36206 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36207 {
36208 error ("incorrect hint operand");
36209 return const0_rtx;
36210 }
36211
36212 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36213 if (! pat)
36214 return const0_rtx;
36215
36216 emit_insn (pat);
36217
36218 return 0;
36219
36220 case IX86_BUILTIN_XABORT:
36221 icode = CODE_FOR_xabort;
36222 arg0 = CALL_EXPR_ARG (exp, 0);
36223 op0 = expand_normal (arg0);
36224 mode0 = insn_data[icode].operand[0].mode;
36225 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36226 {
36227 error ("the xabort's argument must be an 8-bit immediate");
36228 return const0_rtx;
36229 }
36230 emit_insn (gen_xabort (op0));
36231 return 0;
36232
36233 default:
36234 break;
36235 }
36236
36237 for (i = 0, d = bdesc_special_args;
36238 i < ARRAY_SIZE (bdesc_special_args);
36239 i++, d++)
36240 if (d->code == fcode)
36241 return ix86_expand_special_args_builtin (d, exp, target);
36242
36243 for (i = 0, d = bdesc_args;
36244 i < ARRAY_SIZE (bdesc_args);
36245 i++, d++)
36246 if (d->code == fcode)
36247 switch (fcode)
36248 {
36249 case IX86_BUILTIN_FABSQ:
36250 case IX86_BUILTIN_COPYSIGNQ:
36251 if (!TARGET_SSE)
36252 /* Emit a normal call if SSE isn't available. */
36253 return expand_call (exp, target, ignore);
36254 default:
36255 return ix86_expand_args_builtin (d, exp, target);
36256 }
36257
36258 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36259 if (d->code == fcode)
36260 return ix86_expand_sse_comi (d, exp, target);
36261
36262 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36263 if (d->code == fcode)
36264 return ix86_expand_round_builtin (d, exp, target);
36265
36266 for (i = 0, d = bdesc_pcmpestr;
36267 i < ARRAY_SIZE (bdesc_pcmpestr);
36268 i++, d++)
36269 if (d->code == fcode)
36270 return ix86_expand_sse_pcmpestr (d, exp, target);
36271
36272 for (i = 0, d = bdesc_pcmpistr;
36273 i < ARRAY_SIZE (bdesc_pcmpistr);
36274 i++, d++)
36275 if (d->code == fcode)
36276 return ix86_expand_sse_pcmpistr (d, exp, target);
36277
36278 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36279 if (d->code == fcode)
36280 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36281 (enum ix86_builtin_func_type)
36282 d->flag, d->comparison);
36283
36284 gcc_unreachable ();
36285 }
36286
36287 /* This returns the target-specific builtin with code CODE if
36288 current_function_decl has visibility on this builtin, which is checked
36289 using isa flags. Returns NULL_TREE otherwise. */
36290
36291 static tree ix86_get_builtin (enum ix86_builtins code)
36292 {
36293 struct cl_target_option *opts;
36294 tree target_tree = NULL_TREE;
36295
36296 /* Determine the isa flags of current_function_decl. */
36297
36298 if (current_function_decl)
36299 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36300
36301 if (target_tree == NULL)
36302 target_tree = target_option_default_node;
36303
36304 opts = TREE_TARGET_OPTION (target_tree);
36305
36306 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36307 return ix86_builtin_decl (code, true);
36308 else
36309 return NULL_TREE;
36310 }
36311
36312 /* Returns a function decl for a vectorized version of the builtin function
36313 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36314 if it is not available. */
36315
36316 static tree
36317 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36318 tree type_in)
36319 {
36320 enum machine_mode in_mode, out_mode;
36321 int in_n, out_n;
36322 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36323
36324 if (TREE_CODE (type_out) != VECTOR_TYPE
36325 || TREE_CODE (type_in) != VECTOR_TYPE
36326 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36327 return NULL_TREE;
36328
36329 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36330 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36331 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36332 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36333
36334 switch (fn)
36335 {
36336 case BUILT_IN_SQRT:
36337 if (out_mode == DFmode && in_mode == DFmode)
36338 {
36339 if (out_n == 2 && in_n == 2)
36340 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36341 else if (out_n == 4 && in_n == 4)
36342 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36343 else if (out_n == 8 && in_n == 8)
36344 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36345 }
36346 break;
36347
36348 case BUILT_IN_EXP2F:
36349 if (out_mode == SFmode && in_mode == SFmode)
36350 {
36351 if (out_n == 16 && in_n == 16)
36352 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36353 }
36354 break;
36355
36356 case BUILT_IN_SQRTF:
36357 if (out_mode == SFmode && in_mode == SFmode)
36358 {
36359 if (out_n == 4 && in_n == 4)
36360 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36361 else if (out_n == 8 && in_n == 8)
36362 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36363 else if (out_n == 16 && in_n == 16)
36364 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36365 }
36366 break;
36367
36368 case BUILT_IN_IFLOOR:
36369 case BUILT_IN_LFLOOR:
36370 case BUILT_IN_LLFLOOR:
36371 /* The round insn does not trap on denormals. */
36372 if (flag_trapping_math || !TARGET_ROUND)
36373 break;
36374
36375 if (out_mode == SImode && in_mode == DFmode)
36376 {
36377 if (out_n == 4 && in_n == 2)
36378 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36379 else if (out_n == 8 && in_n == 4)
36380 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36381 else if (out_n == 16 && in_n == 8)
36382 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36383 }
36384 break;
36385
36386 case BUILT_IN_IFLOORF:
36387 case BUILT_IN_LFLOORF:
36388 case BUILT_IN_LLFLOORF:
36389 /* The round insn does not trap on denormals. */
36390 if (flag_trapping_math || !TARGET_ROUND)
36391 break;
36392
36393 if (out_mode == SImode && in_mode == SFmode)
36394 {
36395 if (out_n == 4 && in_n == 4)
36396 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36397 else if (out_n == 8 && in_n == 8)
36398 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36399 }
36400 break;
36401
36402 case BUILT_IN_ICEIL:
36403 case BUILT_IN_LCEIL:
36404 case BUILT_IN_LLCEIL:
36405 /* The round insn does not trap on denormals. */
36406 if (flag_trapping_math || !TARGET_ROUND)
36407 break;
36408
36409 if (out_mode == SImode && in_mode == DFmode)
36410 {
36411 if (out_n == 4 && in_n == 2)
36412 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36413 else if (out_n == 8 && in_n == 4)
36414 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36415 else if (out_n == 16 && in_n == 8)
36416 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36417 }
36418 break;
36419
36420 case BUILT_IN_ICEILF:
36421 case BUILT_IN_LCEILF:
36422 case BUILT_IN_LLCEILF:
36423 /* The round insn does not trap on denormals. */
36424 if (flag_trapping_math || !TARGET_ROUND)
36425 break;
36426
36427 if (out_mode == SImode && in_mode == SFmode)
36428 {
36429 if (out_n == 4 && in_n == 4)
36430 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36431 else if (out_n == 8 && in_n == 8)
36432 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36433 }
36434 break;
36435
36436 case BUILT_IN_IRINT:
36437 case BUILT_IN_LRINT:
36438 case BUILT_IN_LLRINT:
36439 if (out_mode == SImode && in_mode == DFmode)
36440 {
36441 if (out_n == 4 && in_n == 2)
36442 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36443 else if (out_n == 8 && in_n == 4)
36444 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36445 }
36446 break;
36447
36448 case BUILT_IN_IRINTF:
36449 case BUILT_IN_LRINTF:
36450 case BUILT_IN_LLRINTF:
36451 if (out_mode == SImode && in_mode == SFmode)
36452 {
36453 if (out_n == 4 && in_n == 4)
36454 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36455 else if (out_n == 8 && in_n == 8)
36456 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36457 }
36458 break;
36459
36460 case BUILT_IN_IROUND:
36461 case BUILT_IN_LROUND:
36462 case BUILT_IN_LLROUND:
36463 /* The round insn does not trap on denormals. */
36464 if (flag_trapping_math || !TARGET_ROUND)
36465 break;
36466
36467 if (out_mode == SImode && in_mode == DFmode)
36468 {
36469 if (out_n == 4 && in_n == 2)
36470 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36471 else if (out_n == 8 && in_n == 4)
36472 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36473 else if (out_n == 16 && in_n == 8)
36474 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36475 }
36476 break;
36477
36478 case BUILT_IN_IROUNDF:
36479 case BUILT_IN_LROUNDF:
36480 case BUILT_IN_LLROUNDF:
36481 /* The round insn does not trap on denormals. */
36482 if (flag_trapping_math || !TARGET_ROUND)
36483 break;
36484
36485 if (out_mode == SImode && in_mode == SFmode)
36486 {
36487 if (out_n == 4 && in_n == 4)
36488 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36489 else if (out_n == 8 && in_n == 8)
36490 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36491 }
36492 break;
36493
36494 case BUILT_IN_COPYSIGN:
36495 if (out_mode == DFmode && in_mode == DFmode)
36496 {
36497 if (out_n == 2 && in_n == 2)
36498 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36499 else if (out_n == 4 && in_n == 4)
36500 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36501 else if (out_n == 8 && in_n == 8)
36502 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36503 }
36504 break;
36505
36506 case BUILT_IN_COPYSIGNF:
36507 if (out_mode == SFmode && in_mode == SFmode)
36508 {
36509 if (out_n == 4 && in_n == 4)
36510 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36511 else if (out_n == 8 && in_n == 8)
36512 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36513 else if (out_n == 16 && in_n == 16)
36514 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36515 }
36516 break;
36517
36518 case BUILT_IN_FLOOR:
36519 /* The round insn does not trap on denormals. */
36520 if (flag_trapping_math || !TARGET_ROUND)
36521 break;
36522
36523 if (out_mode == DFmode && in_mode == DFmode)
36524 {
36525 if (out_n == 2 && in_n == 2)
36526 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36527 else if (out_n == 4 && in_n == 4)
36528 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36529 }
36530 break;
36531
36532 case BUILT_IN_FLOORF:
36533 /* The round insn does not trap on denormals. */
36534 if (flag_trapping_math || !TARGET_ROUND)
36535 break;
36536
36537 if (out_mode == SFmode && in_mode == SFmode)
36538 {
36539 if (out_n == 4 && in_n == 4)
36540 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36541 else if (out_n == 8 && in_n == 8)
36542 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36543 }
36544 break;
36545
36546 case BUILT_IN_CEIL:
36547 /* The round insn does not trap on denormals. */
36548 if (flag_trapping_math || !TARGET_ROUND)
36549 break;
36550
36551 if (out_mode == DFmode && in_mode == DFmode)
36552 {
36553 if (out_n == 2 && in_n == 2)
36554 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36555 else if (out_n == 4 && in_n == 4)
36556 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36557 }
36558 break;
36559
36560 case BUILT_IN_CEILF:
36561 /* The round insn does not trap on denormals. */
36562 if (flag_trapping_math || !TARGET_ROUND)
36563 break;
36564
36565 if (out_mode == SFmode && in_mode == SFmode)
36566 {
36567 if (out_n == 4 && in_n == 4)
36568 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36569 else if (out_n == 8 && in_n == 8)
36570 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36571 }
36572 break;
36573
36574 case BUILT_IN_TRUNC:
36575 /* The round insn does not trap on denormals. */
36576 if (flag_trapping_math || !TARGET_ROUND)
36577 break;
36578
36579 if (out_mode == DFmode && in_mode == DFmode)
36580 {
36581 if (out_n == 2 && in_n == 2)
36582 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36583 else if (out_n == 4 && in_n == 4)
36584 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36585 }
36586 break;
36587
36588 case BUILT_IN_TRUNCF:
36589 /* The round insn does not trap on denormals. */
36590 if (flag_trapping_math || !TARGET_ROUND)
36591 break;
36592
36593 if (out_mode == SFmode && in_mode == SFmode)
36594 {
36595 if (out_n == 4 && in_n == 4)
36596 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36597 else if (out_n == 8 && in_n == 8)
36598 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36599 }
36600 break;
36601
36602 case BUILT_IN_RINT:
36603 /* The round insn does not trap on denormals. */
36604 if (flag_trapping_math || !TARGET_ROUND)
36605 break;
36606
36607 if (out_mode == DFmode && in_mode == DFmode)
36608 {
36609 if (out_n == 2 && in_n == 2)
36610 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36611 else if (out_n == 4 && in_n == 4)
36612 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36613 }
36614 break;
36615
36616 case BUILT_IN_RINTF:
36617 /* The round insn does not trap on denormals. */
36618 if (flag_trapping_math || !TARGET_ROUND)
36619 break;
36620
36621 if (out_mode == SFmode && in_mode == SFmode)
36622 {
36623 if (out_n == 4 && in_n == 4)
36624 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36625 else if (out_n == 8 && in_n == 8)
36626 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36627 }
36628 break;
36629
36630 case BUILT_IN_ROUND:
36631 /* The round insn does not trap on denormals. */
36632 if (flag_trapping_math || !TARGET_ROUND)
36633 break;
36634
36635 if (out_mode == DFmode && in_mode == DFmode)
36636 {
36637 if (out_n == 2 && in_n == 2)
36638 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36639 else if (out_n == 4 && in_n == 4)
36640 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36641 }
36642 break;
36643
36644 case BUILT_IN_ROUNDF:
36645 /* The round insn does not trap on denormals. */
36646 if (flag_trapping_math || !TARGET_ROUND)
36647 break;
36648
36649 if (out_mode == SFmode && in_mode == SFmode)
36650 {
36651 if (out_n == 4 && in_n == 4)
36652 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36653 else if (out_n == 8 && in_n == 8)
36654 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36655 }
36656 break;
36657
36658 case BUILT_IN_FMA:
36659 if (out_mode == DFmode && in_mode == DFmode)
36660 {
36661 if (out_n == 2 && in_n == 2)
36662 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36663 if (out_n == 4 && in_n == 4)
36664 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36665 }
36666 break;
36667
36668 case BUILT_IN_FMAF:
36669 if (out_mode == SFmode && in_mode == SFmode)
36670 {
36671 if (out_n == 4 && in_n == 4)
36672 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36673 if (out_n == 8 && in_n == 8)
36674 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36675 }
36676 break;
36677
36678 default:
36679 break;
36680 }
36681
36682 /* Dispatch to a handler for a vectorization library. */
36683 if (ix86_veclib_handler)
36684 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36685 type_in);
36686
36687 return NULL_TREE;
36688 }
36689
36690 /* Handler for an SVML-style interface to
36691 a library with vectorized intrinsics. */
36692
36693 static tree
36694 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36695 {
36696 char name[20];
36697 tree fntype, new_fndecl, args;
36698 unsigned arity;
36699 const char *bname;
36700 enum machine_mode el_mode, in_mode;
36701 int n, in_n;
36702
36703 /* The SVML is suitable for unsafe math only. */
36704 if (!flag_unsafe_math_optimizations)
36705 return NULL_TREE;
36706
36707 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36708 n = TYPE_VECTOR_SUBPARTS (type_out);
36709 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36710 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36711 if (el_mode != in_mode
36712 || n != in_n)
36713 return NULL_TREE;
36714
36715 switch (fn)
36716 {
36717 case BUILT_IN_EXP:
36718 case BUILT_IN_LOG:
36719 case BUILT_IN_LOG10:
36720 case BUILT_IN_POW:
36721 case BUILT_IN_TANH:
36722 case BUILT_IN_TAN:
36723 case BUILT_IN_ATAN:
36724 case BUILT_IN_ATAN2:
36725 case BUILT_IN_ATANH:
36726 case BUILT_IN_CBRT:
36727 case BUILT_IN_SINH:
36728 case BUILT_IN_SIN:
36729 case BUILT_IN_ASINH:
36730 case BUILT_IN_ASIN:
36731 case BUILT_IN_COSH:
36732 case BUILT_IN_COS:
36733 case BUILT_IN_ACOSH:
36734 case BUILT_IN_ACOS:
36735 if (el_mode != DFmode || n != 2)
36736 return NULL_TREE;
36737 break;
36738
36739 case BUILT_IN_EXPF:
36740 case BUILT_IN_LOGF:
36741 case BUILT_IN_LOG10F:
36742 case BUILT_IN_POWF:
36743 case BUILT_IN_TANHF:
36744 case BUILT_IN_TANF:
36745 case BUILT_IN_ATANF:
36746 case BUILT_IN_ATAN2F:
36747 case BUILT_IN_ATANHF:
36748 case BUILT_IN_CBRTF:
36749 case BUILT_IN_SINHF:
36750 case BUILT_IN_SINF:
36751 case BUILT_IN_ASINHF:
36752 case BUILT_IN_ASINF:
36753 case BUILT_IN_COSHF:
36754 case BUILT_IN_COSF:
36755 case BUILT_IN_ACOSHF:
36756 case BUILT_IN_ACOSF:
36757 if (el_mode != SFmode || n != 4)
36758 return NULL_TREE;
36759 break;
36760
36761 default:
36762 return NULL_TREE;
36763 }
36764
36765 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36766
36767 if (fn == BUILT_IN_LOGF)
36768 strcpy (name, "vmlsLn4");
36769 else if (fn == BUILT_IN_LOG)
36770 strcpy (name, "vmldLn2");
36771 else if (n == 4)
36772 {
36773 sprintf (name, "vmls%s", bname+10);
36774 name[strlen (name)-1] = '4';
36775 }
36776 else
36777 sprintf (name, "vmld%s2", bname+10);
36778
36779 /* Convert to uppercase. */
36780 name[4] &= ~0x20;
36781
36782 arity = 0;
36783 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36784 args;
36785 args = TREE_CHAIN (args))
36786 arity++;
36787
36788 if (arity == 1)
36789 fntype = build_function_type_list (type_out, type_in, NULL);
36790 else
36791 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36792
36793 /* Build a function declaration for the vectorized function. */
36794 new_fndecl = build_decl (BUILTINS_LOCATION,
36795 FUNCTION_DECL, get_identifier (name), fntype);
36796 TREE_PUBLIC (new_fndecl) = 1;
36797 DECL_EXTERNAL (new_fndecl) = 1;
36798 DECL_IS_NOVOPS (new_fndecl) = 1;
36799 TREE_READONLY (new_fndecl) = 1;
36800
36801 return new_fndecl;
36802 }
36803
36804 /* Handler for an ACML-style interface to
36805 a library with vectorized intrinsics. */
36806
36807 static tree
36808 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36809 {
36810 char name[20] = "__vr.._";
36811 tree fntype, new_fndecl, args;
36812 unsigned arity;
36813 const char *bname;
36814 enum machine_mode el_mode, in_mode;
36815 int n, in_n;
36816
36817 /* The ACML is 64bits only and suitable for unsafe math only as
36818 it does not correctly support parts of IEEE with the required
36819 precision such as denormals. */
36820 if (!TARGET_64BIT
36821 || !flag_unsafe_math_optimizations)
36822 return NULL_TREE;
36823
36824 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36825 n = TYPE_VECTOR_SUBPARTS (type_out);
36826 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36827 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36828 if (el_mode != in_mode
36829 || n != in_n)
36830 return NULL_TREE;
36831
36832 switch (fn)
36833 {
36834 case BUILT_IN_SIN:
36835 case BUILT_IN_COS:
36836 case BUILT_IN_EXP:
36837 case BUILT_IN_LOG:
36838 case BUILT_IN_LOG2:
36839 case BUILT_IN_LOG10:
36840 name[4] = 'd';
36841 name[5] = '2';
36842 if (el_mode != DFmode
36843 || n != 2)
36844 return NULL_TREE;
36845 break;
36846
36847 case BUILT_IN_SINF:
36848 case BUILT_IN_COSF:
36849 case BUILT_IN_EXPF:
36850 case BUILT_IN_POWF:
36851 case BUILT_IN_LOGF:
36852 case BUILT_IN_LOG2F:
36853 case BUILT_IN_LOG10F:
36854 name[4] = 's';
36855 name[5] = '4';
36856 if (el_mode != SFmode
36857 || n != 4)
36858 return NULL_TREE;
36859 break;
36860
36861 default:
36862 return NULL_TREE;
36863 }
36864
36865 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36866 sprintf (name + 7, "%s", bname+10);
36867
36868 arity = 0;
36869 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36870 args;
36871 args = TREE_CHAIN (args))
36872 arity++;
36873
36874 if (arity == 1)
36875 fntype = build_function_type_list (type_out, type_in, NULL);
36876 else
36877 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36878
36879 /* Build a function declaration for the vectorized function. */
36880 new_fndecl = build_decl (BUILTINS_LOCATION,
36881 FUNCTION_DECL, get_identifier (name), fntype);
36882 TREE_PUBLIC (new_fndecl) = 1;
36883 DECL_EXTERNAL (new_fndecl) = 1;
36884 DECL_IS_NOVOPS (new_fndecl) = 1;
36885 TREE_READONLY (new_fndecl) = 1;
36886
36887 return new_fndecl;
36888 }
36889
36890 /* Returns a decl of a function that implements gather load with
36891 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36892 Return NULL_TREE if it is not available. */
36893
36894 static tree
36895 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36896 const_tree index_type, int scale)
36897 {
36898 bool si;
36899 enum ix86_builtins code;
36900
36901 if (! TARGET_AVX2)
36902 return NULL_TREE;
36903
36904 if ((TREE_CODE (index_type) != INTEGER_TYPE
36905 && !POINTER_TYPE_P (index_type))
36906 || (TYPE_MODE (index_type) != SImode
36907 && TYPE_MODE (index_type) != DImode))
36908 return NULL_TREE;
36909
36910 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36911 return NULL_TREE;
36912
36913 /* v*gather* insn sign extends index to pointer mode. */
36914 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36915 && TYPE_UNSIGNED (index_type))
36916 return NULL_TREE;
36917
36918 if (scale <= 0
36919 || scale > 8
36920 || (scale & (scale - 1)) != 0)
36921 return NULL_TREE;
36922
36923 si = TYPE_MODE (index_type) == SImode;
36924 switch (TYPE_MODE (mem_vectype))
36925 {
36926 case V2DFmode:
36927 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36928 break;
36929 case V4DFmode:
36930 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36931 break;
36932 case V2DImode:
36933 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36934 break;
36935 case V4DImode:
36936 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36937 break;
36938 case V4SFmode:
36939 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36940 break;
36941 case V8SFmode:
36942 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36943 break;
36944 case V4SImode:
36945 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36946 break;
36947 case V8SImode:
36948 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36949 break;
36950 case V8DFmode:
36951 if (TARGET_AVX512F)
36952 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36953 else
36954 return NULL_TREE;
36955 break;
36956 case V8DImode:
36957 if (TARGET_AVX512F)
36958 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36959 else
36960 return NULL_TREE;
36961 break;
36962 case V16SFmode:
36963 if (TARGET_AVX512F)
36964 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36965 else
36966 return NULL_TREE;
36967 break;
36968 case V16SImode:
36969 if (TARGET_AVX512F)
36970 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36971 else
36972 return NULL_TREE;
36973 break;
36974 default:
36975 return NULL_TREE;
36976 }
36977
36978 return ix86_get_builtin (code);
36979 }
36980
36981 /* Returns a code for a target-specific builtin that implements
36982 reciprocal of the function, or NULL_TREE if not available. */
36983
36984 static tree
36985 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
36986 {
36987 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36988 && flag_finite_math_only && !flag_trapping_math
36989 && flag_unsafe_math_optimizations))
36990 return NULL_TREE;
36991
36992 if (md_fn)
36993 /* Machine dependent builtins. */
36994 switch (fn)
36995 {
36996 /* Vectorized version of sqrt to rsqrt conversion. */
36997 case IX86_BUILTIN_SQRTPS_NR:
36998 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36999
37000 case IX86_BUILTIN_SQRTPS_NR256:
37001 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37002
37003 default:
37004 return NULL_TREE;
37005 }
37006 else
37007 /* Normal builtins. */
37008 switch (fn)
37009 {
37010 /* Sqrt to rsqrt conversion. */
37011 case BUILT_IN_SQRTF:
37012 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37013
37014 default:
37015 return NULL_TREE;
37016 }
37017 }
37018 \f
37019 /* Helper for avx_vpermilps256_operand et al. This is also used by
37020 the expansion functions to turn the parallel back into a mask.
37021 The return value is 0 for no match and the imm8+1 for a match. */
37022
37023 int
37024 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37025 {
37026 unsigned i, nelt = GET_MODE_NUNITS (mode);
37027 unsigned mask = 0;
37028 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37029
37030 if (XVECLEN (par, 0) != (int) nelt)
37031 return 0;
37032
37033 /* Validate that all of the elements are constants, and not totally
37034 out of range. Copy the data into an integral array to make the
37035 subsequent checks easier. */
37036 for (i = 0; i < nelt; ++i)
37037 {
37038 rtx er = XVECEXP (par, 0, i);
37039 unsigned HOST_WIDE_INT ei;
37040
37041 if (!CONST_INT_P (er))
37042 return 0;
37043 ei = INTVAL (er);
37044 if (ei >= nelt)
37045 return 0;
37046 ipar[i] = ei;
37047 }
37048
37049 switch (mode)
37050 {
37051 case V8DFmode:
37052 /* In the 512-bit DFmode case, we can only move elements within
37053 a 128-bit lane. First fill the second part of the mask,
37054 then fallthru. */
37055 for (i = 4; i < 6; ++i)
37056 {
37057 if (ipar[i] < 4 || ipar[i] >= 6)
37058 return 0;
37059 mask |= (ipar[i] - 4) << i;
37060 }
37061 for (i = 6; i < 8; ++i)
37062 {
37063 if (ipar[i] < 6)
37064 return 0;
37065 mask |= (ipar[i] - 6) << i;
37066 }
37067 /* FALLTHRU */
37068
37069 case V4DFmode:
37070 /* In the 256-bit DFmode case, we can only move elements within
37071 a 128-bit lane. */
37072 for (i = 0; i < 2; ++i)
37073 {
37074 if (ipar[i] >= 2)
37075 return 0;
37076 mask |= ipar[i] << i;
37077 }
37078 for (i = 2; i < 4; ++i)
37079 {
37080 if (ipar[i] < 2)
37081 return 0;
37082 mask |= (ipar[i] - 2) << i;
37083 }
37084 break;
37085
37086 case V16SFmode:
37087 /* In 512 bit SFmode case, permutation in the upper 256 bits
37088 must mirror the permutation in the lower 256-bits. */
37089 for (i = 0; i < 8; ++i)
37090 if (ipar[i] + 8 != ipar[i + 8])
37091 return 0;
37092 /* FALLTHRU */
37093
37094 case V8SFmode:
37095 /* In 256 bit SFmode case, we have full freedom of
37096 movement within the low 128-bit lane, but the high 128-bit
37097 lane must mirror the exact same pattern. */
37098 for (i = 0; i < 4; ++i)
37099 if (ipar[i] + 4 != ipar[i + 4])
37100 return 0;
37101 nelt = 4;
37102 /* FALLTHRU */
37103
37104 case V2DFmode:
37105 case V4SFmode:
37106 /* In the 128-bit case, we've full freedom in the placement of
37107 the elements from the source operand. */
37108 for (i = 0; i < nelt; ++i)
37109 mask |= ipar[i] << (i * (nelt / 2));
37110 break;
37111
37112 default:
37113 gcc_unreachable ();
37114 }
37115
37116 /* Make sure success has a non-zero value by adding one. */
37117 return mask + 1;
37118 }
37119
37120 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37121 the expansion functions to turn the parallel back into a mask.
37122 The return value is 0 for no match and the imm8+1 for a match. */
37123
37124 int
37125 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37126 {
37127 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37128 unsigned mask = 0;
37129 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37130
37131 if (XVECLEN (par, 0) != (int) nelt)
37132 return 0;
37133
37134 /* Validate that all of the elements are constants, and not totally
37135 out of range. Copy the data into an integral array to make the
37136 subsequent checks easier. */
37137 for (i = 0; i < nelt; ++i)
37138 {
37139 rtx er = XVECEXP (par, 0, i);
37140 unsigned HOST_WIDE_INT ei;
37141
37142 if (!CONST_INT_P (er))
37143 return 0;
37144 ei = INTVAL (er);
37145 if (ei >= 2 * nelt)
37146 return 0;
37147 ipar[i] = ei;
37148 }
37149
37150 /* Validate that the halves of the permute are halves. */
37151 for (i = 0; i < nelt2 - 1; ++i)
37152 if (ipar[i] + 1 != ipar[i + 1])
37153 return 0;
37154 for (i = nelt2; i < nelt - 1; ++i)
37155 if (ipar[i] + 1 != ipar[i + 1])
37156 return 0;
37157
37158 /* Reconstruct the mask. */
37159 for (i = 0; i < 2; ++i)
37160 {
37161 unsigned e = ipar[i * nelt2];
37162 if (e % nelt2)
37163 return 0;
37164 e /= nelt2;
37165 mask |= e << (i * 4);
37166 }
37167
37168 /* Make sure success has a non-zero value by adding one. */
37169 return mask + 1;
37170 }
37171 \f
37172 /* Return a register priority for hard reg REGNO. */
37173 static int
37174 ix86_register_priority (int hard_regno)
37175 {
37176 /* ebp and r13 as the base always wants a displacement, r12 as the
37177 base always wants an index. So discourage their usage in an
37178 address. */
37179 if (hard_regno == R12_REG || hard_regno == R13_REG)
37180 return 0;
37181 if (hard_regno == BP_REG)
37182 return 1;
37183 /* New x86-64 int registers result in bigger code size. Discourage
37184 them. */
37185 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37186 return 2;
37187 /* New x86-64 SSE registers result in bigger code size. Discourage
37188 them. */
37189 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37190 return 2;
37191 /* Usage of AX register results in smaller code. Prefer it. */
37192 if (hard_regno == 0)
37193 return 4;
37194 return 3;
37195 }
37196
37197 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37198
37199 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37200 QImode must go into class Q_REGS.
37201 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37202 movdf to do mem-to-mem moves through integer regs. */
37203
37204 static reg_class_t
37205 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37206 {
37207 enum machine_mode mode = GET_MODE (x);
37208
37209 /* We're only allowed to return a subclass of CLASS. Many of the
37210 following checks fail for NO_REGS, so eliminate that early. */
37211 if (regclass == NO_REGS)
37212 return NO_REGS;
37213
37214 /* All classes can load zeros. */
37215 if (x == CONST0_RTX (mode))
37216 return regclass;
37217
37218 /* Force constants into memory if we are loading a (nonzero) constant into
37219 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37220 instructions to load from a constant. */
37221 if (CONSTANT_P (x)
37222 && (MAYBE_MMX_CLASS_P (regclass)
37223 || MAYBE_SSE_CLASS_P (regclass)
37224 || MAYBE_MASK_CLASS_P (regclass)))
37225 return NO_REGS;
37226
37227 /* Prefer SSE regs only, if we can use them for math. */
37228 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37229 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37230
37231 /* Floating-point constants need more complex checks. */
37232 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37233 {
37234 /* General regs can load everything. */
37235 if (reg_class_subset_p (regclass, GENERAL_REGS))
37236 return regclass;
37237
37238 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37239 zero above. We only want to wind up preferring 80387 registers if
37240 we plan on doing computation with them. */
37241 if (TARGET_80387
37242 && standard_80387_constant_p (x) > 0)
37243 {
37244 /* Limit class to non-sse. */
37245 if (regclass == FLOAT_SSE_REGS)
37246 return FLOAT_REGS;
37247 if (regclass == FP_TOP_SSE_REGS)
37248 return FP_TOP_REG;
37249 if (regclass == FP_SECOND_SSE_REGS)
37250 return FP_SECOND_REG;
37251 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37252 return regclass;
37253 }
37254
37255 return NO_REGS;
37256 }
37257
37258 /* Generally when we see PLUS here, it's the function invariant
37259 (plus soft-fp const_int). Which can only be computed into general
37260 regs. */
37261 if (GET_CODE (x) == PLUS)
37262 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37263
37264 /* QImode constants are easy to load, but non-constant QImode data
37265 must go into Q_REGS. */
37266 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37267 {
37268 if (reg_class_subset_p (regclass, Q_REGS))
37269 return regclass;
37270 if (reg_class_subset_p (Q_REGS, regclass))
37271 return Q_REGS;
37272 return NO_REGS;
37273 }
37274
37275 return regclass;
37276 }
37277
37278 /* Discourage putting floating-point values in SSE registers unless
37279 SSE math is being used, and likewise for the 387 registers. */
37280 static reg_class_t
37281 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37282 {
37283 enum machine_mode mode = GET_MODE (x);
37284
37285 /* Restrict the output reload class to the register bank that we are doing
37286 math on. If we would like not to return a subset of CLASS, reject this
37287 alternative: if reload cannot do this, it will still use its choice. */
37288 mode = GET_MODE (x);
37289 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37290 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37291
37292 if (X87_FLOAT_MODE_P (mode))
37293 {
37294 if (regclass == FP_TOP_SSE_REGS)
37295 return FP_TOP_REG;
37296 else if (regclass == FP_SECOND_SSE_REGS)
37297 return FP_SECOND_REG;
37298 else
37299 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37300 }
37301
37302 return regclass;
37303 }
37304
37305 static reg_class_t
37306 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37307 enum machine_mode mode, secondary_reload_info *sri)
37308 {
37309 /* Double-word spills from general registers to non-offsettable memory
37310 references (zero-extended addresses) require special handling. */
37311 if (TARGET_64BIT
37312 && MEM_P (x)
37313 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37314 && INTEGER_CLASS_P (rclass)
37315 && !offsettable_memref_p (x))
37316 {
37317 sri->icode = (in_p
37318 ? CODE_FOR_reload_noff_load
37319 : CODE_FOR_reload_noff_store);
37320 /* Add the cost of moving address to a temporary. */
37321 sri->extra_cost = 1;
37322
37323 return NO_REGS;
37324 }
37325
37326 /* QImode spills from non-QI registers require
37327 intermediate register on 32bit targets. */
37328 if (mode == QImode
37329 && (MAYBE_MASK_CLASS_P (rclass)
37330 || (!TARGET_64BIT && !in_p
37331 && INTEGER_CLASS_P (rclass)
37332 && MAYBE_NON_Q_CLASS_P (rclass))))
37333 {
37334 int regno;
37335
37336 if (REG_P (x))
37337 regno = REGNO (x);
37338 else
37339 regno = -1;
37340
37341 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37342 regno = true_regnum (x);
37343
37344 /* Return Q_REGS if the operand is in memory. */
37345 if (regno == -1)
37346 return Q_REGS;
37347 }
37348
37349 /* This condition handles corner case where an expression involving
37350 pointers gets vectorized. We're trying to use the address of a
37351 stack slot as a vector initializer.
37352
37353 (set (reg:V2DI 74 [ vect_cst_.2 ])
37354 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37355
37356 Eventually frame gets turned into sp+offset like this:
37357
37358 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37359 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37360 (const_int 392 [0x188]))))
37361
37362 That later gets turned into:
37363
37364 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37365 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37366 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37367
37368 We'll have the following reload recorded:
37369
37370 Reload 0: reload_in (DI) =
37371 (plus:DI (reg/f:DI 7 sp)
37372 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37373 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37374 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37375 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37376 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37377 reload_reg_rtx: (reg:V2DI 22 xmm1)
37378
37379 Which isn't going to work since SSE instructions can't handle scalar
37380 additions. Returning GENERAL_REGS forces the addition into integer
37381 register and reload can handle subsequent reloads without problems. */
37382
37383 if (in_p && GET_CODE (x) == PLUS
37384 && SSE_CLASS_P (rclass)
37385 && SCALAR_INT_MODE_P (mode))
37386 return GENERAL_REGS;
37387
37388 return NO_REGS;
37389 }
37390
37391 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37392
37393 static bool
37394 ix86_class_likely_spilled_p (reg_class_t rclass)
37395 {
37396 switch (rclass)
37397 {
37398 case AREG:
37399 case DREG:
37400 case CREG:
37401 case BREG:
37402 case AD_REGS:
37403 case SIREG:
37404 case DIREG:
37405 case SSE_FIRST_REG:
37406 case FP_TOP_REG:
37407 case FP_SECOND_REG:
37408 return true;
37409
37410 default:
37411 break;
37412 }
37413
37414 return false;
37415 }
37416
37417 /* If we are copying between general and FP registers, we need a memory
37418 location. The same is true for SSE and MMX registers.
37419
37420 To optimize register_move_cost performance, allow inline variant.
37421
37422 The macro can't work reliably when one of the CLASSES is class containing
37423 registers from multiple units (SSE, MMX, integer). We avoid this by never
37424 combining those units in single alternative in the machine description.
37425 Ensure that this constraint holds to avoid unexpected surprises.
37426
37427 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37428 enforce these sanity checks. */
37429
37430 static inline bool
37431 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37432 enum machine_mode mode, int strict)
37433 {
37434 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37435 return false;
37436 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37437 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37438 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37439 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37440 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37441 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37442 {
37443 gcc_assert (!strict || lra_in_progress);
37444 return true;
37445 }
37446
37447 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37448 return true;
37449
37450 /* Between mask and general, we have moves no larger than word size. */
37451 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37452 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37453 return true;
37454
37455 /* ??? This is a lie. We do have moves between mmx/general, and for
37456 mmx/sse2. But by saying we need secondary memory we discourage the
37457 register allocator from using the mmx registers unless needed. */
37458 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37459 return true;
37460
37461 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37462 {
37463 /* SSE1 doesn't have any direct moves from other classes. */
37464 if (!TARGET_SSE2)
37465 return true;
37466
37467 /* If the target says that inter-unit moves are more expensive
37468 than moving through memory, then don't generate them. */
37469 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37470 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37471 return true;
37472
37473 /* Between SSE and general, we have moves no larger than word size. */
37474 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37475 return true;
37476 }
37477
37478 return false;
37479 }
37480
37481 bool
37482 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37483 enum machine_mode mode, int strict)
37484 {
37485 return inline_secondary_memory_needed (class1, class2, mode, strict);
37486 }
37487
37488 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37489
37490 On the 80386, this is the size of MODE in words,
37491 except in the FP regs, where a single reg is always enough. */
37492
37493 static unsigned char
37494 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37495 {
37496 if (MAYBE_INTEGER_CLASS_P (rclass))
37497 {
37498 if (mode == XFmode)
37499 return (TARGET_64BIT ? 2 : 3);
37500 else if (mode == XCmode)
37501 return (TARGET_64BIT ? 4 : 6);
37502 else
37503 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37504 }
37505 else
37506 {
37507 if (COMPLEX_MODE_P (mode))
37508 return 2;
37509 else
37510 return 1;
37511 }
37512 }
37513
37514 /* Return true if the registers in CLASS cannot represent the change from
37515 modes FROM to TO. */
37516
37517 bool
37518 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37519 enum reg_class regclass)
37520 {
37521 if (from == to)
37522 return false;
37523
37524 /* x87 registers can't do subreg at all, as all values are reformatted
37525 to extended precision. */
37526 if (MAYBE_FLOAT_CLASS_P (regclass))
37527 return true;
37528
37529 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37530 {
37531 /* Vector registers do not support QI or HImode loads. If we don't
37532 disallow a change to these modes, reload will assume it's ok to
37533 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37534 the vec_dupv4hi pattern. */
37535 if (GET_MODE_SIZE (from) < 4)
37536 return true;
37537 }
37538
37539 return false;
37540 }
37541
37542 /* Return the cost of moving data of mode M between a
37543 register and memory. A value of 2 is the default; this cost is
37544 relative to those in `REGISTER_MOVE_COST'.
37545
37546 This function is used extensively by register_move_cost that is used to
37547 build tables at startup. Make it inline in this case.
37548 When IN is 2, return maximum of in and out move cost.
37549
37550 If moving between registers and memory is more expensive than
37551 between two registers, you should define this macro to express the
37552 relative cost.
37553
37554 Model also increased moving costs of QImode registers in non
37555 Q_REGS classes.
37556 */
37557 static inline int
37558 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37559 int in)
37560 {
37561 int cost;
37562 if (FLOAT_CLASS_P (regclass))
37563 {
37564 int index;
37565 switch (mode)
37566 {
37567 case SFmode:
37568 index = 0;
37569 break;
37570 case DFmode:
37571 index = 1;
37572 break;
37573 case XFmode:
37574 index = 2;
37575 break;
37576 default:
37577 return 100;
37578 }
37579 if (in == 2)
37580 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37581 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37582 }
37583 if (SSE_CLASS_P (regclass))
37584 {
37585 int index;
37586 switch (GET_MODE_SIZE (mode))
37587 {
37588 case 4:
37589 index = 0;
37590 break;
37591 case 8:
37592 index = 1;
37593 break;
37594 case 16:
37595 index = 2;
37596 break;
37597 default:
37598 return 100;
37599 }
37600 if (in == 2)
37601 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37602 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37603 }
37604 if (MMX_CLASS_P (regclass))
37605 {
37606 int index;
37607 switch (GET_MODE_SIZE (mode))
37608 {
37609 case 4:
37610 index = 0;
37611 break;
37612 case 8:
37613 index = 1;
37614 break;
37615 default:
37616 return 100;
37617 }
37618 if (in)
37619 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37620 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37621 }
37622 switch (GET_MODE_SIZE (mode))
37623 {
37624 case 1:
37625 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37626 {
37627 if (!in)
37628 return ix86_cost->int_store[0];
37629 if (TARGET_PARTIAL_REG_DEPENDENCY
37630 && optimize_function_for_speed_p (cfun))
37631 cost = ix86_cost->movzbl_load;
37632 else
37633 cost = ix86_cost->int_load[0];
37634 if (in == 2)
37635 return MAX (cost, ix86_cost->int_store[0]);
37636 return cost;
37637 }
37638 else
37639 {
37640 if (in == 2)
37641 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37642 if (in)
37643 return ix86_cost->movzbl_load;
37644 else
37645 return ix86_cost->int_store[0] + 4;
37646 }
37647 break;
37648 case 2:
37649 if (in == 2)
37650 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37651 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37652 default:
37653 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37654 if (mode == TFmode)
37655 mode = XFmode;
37656 if (in == 2)
37657 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37658 else if (in)
37659 cost = ix86_cost->int_load[2];
37660 else
37661 cost = ix86_cost->int_store[2];
37662 return (cost * (((int) GET_MODE_SIZE (mode)
37663 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37664 }
37665 }
37666
37667 static int
37668 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37669 bool in)
37670 {
37671 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37672 }
37673
37674
37675 /* Return the cost of moving data from a register in class CLASS1 to
37676 one in class CLASS2.
37677
37678 It is not required that the cost always equal 2 when FROM is the same as TO;
37679 on some machines it is expensive to move between registers if they are not
37680 general registers. */
37681
37682 static int
37683 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37684 reg_class_t class2_i)
37685 {
37686 enum reg_class class1 = (enum reg_class) class1_i;
37687 enum reg_class class2 = (enum reg_class) class2_i;
37688
37689 /* In case we require secondary memory, compute cost of the store followed
37690 by load. In order to avoid bad register allocation choices, we need
37691 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37692
37693 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37694 {
37695 int cost = 1;
37696
37697 cost += inline_memory_move_cost (mode, class1, 2);
37698 cost += inline_memory_move_cost (mode, class2, 2);
37699
37700 /* In case of copying from general_purpose_register we may emit multiple
37701 stores followed by single load causing memory size mismatch stall.
37702 Count this as arbitrarily high cost of 20. */
37703 if (targetm.class_max_nregs (class1, mode)
37704 > targetm.class_max_nregs (class2, mode))
37705 cost += 20;
37706
37707 /* In the case of FP/MMX moves, the registers actually overlap, and we
37708 have to switch modes in order to treat them differently. */
37709 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37710 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37711 cost += 20;
37712
37713 return cost;
37714 }
37715
37716 /* Moves between SSE/MMX and integer unit are expensive. */
37717 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37718 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37719
37720 /* ??? By keeping returned value relatively high, we limit the number
37721 of moves between integer and MMX/SSE registers for all targets.
37722 Additionally, high value prevents problem with x86_modes_tieable_p(),
37723 where integer modes in MMX/SSE registers are not tieable
37724 because of missing QImode and HImode moves to, from or between
37725 MMX/SSE registers. */
37726 return MAX (8, ix86_cost->mmxsse_to_integer);
37727
37728 if (MAYBE_FLOAT_CLASS_P (class1))
37729 return ix86_cost->fp_move;
37730 if (MAYBE_SSE_CLASS_P (class1))
37731 return ix86_cost->sse_move;
37732 if (MAYBE_MMX_CLASS_P (class1))
37733 return ix86_cost->mmx_move;
37734 return 2;
37735 }
37736
37737 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37738 MODE. */
37739
37740 bool
37741 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37742 {
37743 /* Flags and only flags can only hold CCmode values. */
37744 if (CC_REGNO_P (regno))
37745 return GET_MODE_CLASS (mode) == MODE_CC;
37746 if (GET_MODE_CLASS (mode) == MODE_CC
37747 || GET_MODE_CLASS (mode) == MODE_RANDOM
37748 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37749 return false;
37750 if (STACK_REGNO_P (regno))
37751 return VALID_FP_MODE_P (mode);
37752 if (MASK_REGNO_P (regno))
37753 return (VALID_MASK_REG_MODE (mode)
37754 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37755 if (SSE_REGNO_P (regno))
37756 {
37757 /* We implement the move patterns for all vector modes into and
37758 out of SSE registers, even when no operation instructions
37759 are available. */
37760
37761 /* For AVX-512 we allow, regardless of regno:
37762 - XI mode
37763 - any of 512-bit wide vector mode
37764 - any scalar mode. */
37765 if (TARGET_AVX512F
37766 && (mode == XImode
37767 || VALID_AVX512F_REG_MODE (mode)
37768 || VALID_AVX512F_SCALAR_MODE (mode)))
37769 return true;
37770
37771 /* TODO check for QI/HI scalars. */
37772 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37773 if (TARGET_AVX512VL
37774 && (mode == OImode
37775 || mode == TImode
37776 || VALID_AVX256_REG_MODE (mode)
37777 || VALID_AVX512VL_128_REG_MODE (mode)))
37778 return true;
37779
37780 /* xmm16-xmm31 are only available for AVX-512. */
37781 if (EXT_REX_SSE_REGNO_P (regno))
37782 return false;
37783
37784 /* OImode and AVX modes are available only when AVX is enabled. */
37785 return ((TARGET_AVX
37786 && VALID_AVX256_REG_OR_OI_MODE (mode))
37787 || VALID_SSE_REG_MODE (mode)
37788 || VALID_SSE2_REG_MODE (mode)
37789 || VALID_MMX_REG_MODE (mode)
37790 || VALID_MMX_REG_MODE_3DNOW (mode));
37791 }
37792 if (MMX_REGNO_P (regno))
37793 {
37794 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37795 so if the register is available at all, then we can move data of
37796 the given mode into or out of it. */
37797 return (VALID_MMX_REG_MODE (mode)
37798 || VALID_MMX_REG_MODE_3DNOW (mode));
37799 }
37800
37801 if (mode == QImode)
37802 {
37803 /* Take care for QImode values - they can be in non-QI regs,
37804 but then they do cause partial register stalls. */
37805 if (ANY_QI_REGNO_P (regno))
37806 return true;
37807 if (!TARGET_PARTIAL_REG_STALL)
37808 return true;
37809 /* LRA checks if the hard register is OK for the given mode.
37810 QImode values can live in non-QI regs, so we allow all
37811 registers here. */
37812 if (lra_in_progress)
37813 return true;
37814 return !can_create_pseudo_p ();
37815 }
37816 /* We handle both integer and floats in the general purpose registers. */
37817 else if (VALID_INT_MODE_P (mode))
37818 return true;
37819 else if (VALID_FP_MODE_P (mode))
37820 return true;
37821 else if (VALID_DFP_MODE_P (mode))
37822 return true;
37823 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37824 on to use that value in smaller contexts, this can easily force a
37825 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37826 supporting DImode, allow it. */
37827 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37828 return true;
37829
37830 return false;
37831 }
37832
37833 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37834 tieable integer mode. */
37835
37836 static bool
37837 ix86_tieable_integer_mode_p (enum machine_mode mode)
37838 {
37839 switch (mode)
37840 {
37841 case HImode:
37842 case SImode:
37843 return true;
37844
37845 case QImode:
37846 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37847
37848 case DImode:
37849 return TARGET_64BIT;
37850
37851 default:
37852 return false;
37853 }
37854 }
37855
37856 /* Return true if MODE1 is accessible in a register that can hold MODE2
37857 without copying. That is, all register classes that can hold MODE2
37858 can also hold MODE1. */
37859
37860 bool
37861 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37862 {
37863 if (mode1 == mode2)
37864 return true;
37865
37866 if (ix86_tieable_integer_mode_p (mode1)
37867 && ix86_tieable_integer_mode_p (mode2))
37868 return true;
37869
37870 /* MODE2 being XFmode implies fp stack or general regs, which means we
37871 can tie any smaller floating point modes to it. Note that we do not
37872 tie this with TFmode. */
37873 if (mode2 == XFmode)
37874 return mode1 == SFmode || mode1 == DFmode;
37875
37876 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37877 that we can tie it with SFmode. */
37878 if (mode2 == DFmode)
37879 return mode1 == SFmode;
37880
37881 /* If MODE2 is only appropriate for an SSE register, then tie with
37882 any other mode acceptable to SSE registers. */
37883 if (GET_MODE_SIZE (mode2) == 32
37884 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37885 return (GET_MODE_SIZE (mode1) == 32
37886 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37887 if (GET_MODE_SIZE (mode2) == 16
37888 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37889 return (GET_MODE_SIZE (mode1) == 16
37890 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37891
37892 /* If MODE2 is appropriate for an MMX register, then tie
37893 with any other mode acceptable to MMX registers. */
37894 if (GET_MODE_SIZE (mode2) == 8
37895 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37896 return (GET_MODE_SIZE (mode1) == 8
37897 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37898
37899 return false;
37900 }
37901
37902 /* Return the cost of moving between two registers of mode MODE. */
37903
37904 static int
37905 ix86_set_reg_reg_cost (enum machine_mode mode)
37906 {
37907 unsigned int units = UNITS_PER_WORD;
37908
37909 switch (GET_MODE_CLASS (mode))
37910 {
37911 default:
37912 break;
37913
37914 case MODE_CC:
37915 units = GET_MODE_SIZE (CCmode);
37916 break;
37917
37918 case MODE_FLOAT:
37919 if ((TARGET_SSE && mode == TFmode)
37920 || (TARGET_80387 && mode == XFmode)
37921 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37922 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37923 units = GET_MODE_SIZE (mode);
37924 break;
37925
37926 case MODE_COMPLEX_FLOAT:
37927 if ((TARGET_SSE && mode == TCmode)
37928 || (TARGET_80387 && mode == XCmode)
37929 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37930 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37931 units = GET_MODE_SIZE (mode);
37932 break;
37933
37934 case MODE_VECTOR_INT:
37935 case MODE_VECTOR_FLOAT:
37936 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37937 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37938 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37939 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37940 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37941 units = GET_MODE_SIZE (mode);
37942 }
37943
37944 /* Return the cost of moving between two registers of mode MODE,
37945 assuming that the move will be in pieces of at most UNITS bytes. */
37946 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37947 }
37948
37949 /* Compute a (partial) cost for rtx X. Return true if the complete
37950 cost has been computed, and false if subexpressions should be
37951 scanned. In either case, *TOTAL contains the cost result. */
37952
37953 static bool
37954 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37955 bool speed)
37956 {
37957 rtx mask;
37958 enum rtx_code code = (enum rtx_code) code_i;
37959 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37960 enum machine_mode mode = GET_MODE (x);
37961 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37962
37963 switch (code)
37964 {
37965 case SET:
37966 if (register_operand (SET_DEST (x), VOIDmode)
37967 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37968 {
37969 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37970 return true;
37971 }
37972 return false;
37973
37974 case CONST_INT:
37975 case CONST:
37976 case LABEL_REF:
37977 case SYMBOL_REF:
37978 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37979 *total = 3;
37980 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37981 *total = 2;
37982 else if (flag_pic && SYMBOLIC_CONST (x)
37983 && !(TARGET_64BIT
37984 && (GET_CODE (x) == LABEL_REF
37985 || (GET_CODE (x) == SYMBOL_REF
37986 && SYMBOL_REF_LOCAL_P (x)))))
37987 *total = 1;
37988 else
37989 *total = 0;
37990 return true;
37991
37992 case CONST_DOUBLE:
37993 if (mode == VOIDmode)
37994 {
37995 *total = 0;
37996 return true;
37997 }
37998 switch (standard_80387_constant_p (x))
37999 {
38000 case 1: /* 0.0 */
38001 *total = 1;
38002 return true;
38003 default: /* Other constants */
38004 *total = 2;
38005 return true;
38006 case 0:
38007 case -1:
38008 break;
38009 }
38010 if (SSE_FLOAT_MODE_P (mode))
38011 {
38012 case CONST_VECTOR:
38013 switch (standard_sse_constant_p (x))
38014 {
38015 case 0:
38016 break;
38017 case 1: /* 0: xor eliminates false dependency */
38018 *total = 0;
38019 return true;
38020 default: /* -1: cmp contains false dependency */
38021 *total = 1;
38022 return true;
38023 }
38024 }
38025 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38026 it'll probably end up. Add a penalty for size. */
38027 *total = (COSTS_N_INSNS (1)
38028 + (flag_pic != 0 && !TARGET_64BIT)
38029 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38030 return true;
38031
38032 case ZERO_EXTEND:
38033 /* The zero extensions is often completely free on x86_64, so make
38034 it as cheap as possible. */
38035 if (TARGET_64BIT && mode == DImode
38036 && GET_MODE (XEXP (x, 0)) == SImode)
38037 *total = 1;
38038 else if (TARGET_ZERO_EXTEND_WITH_AND)
38039 *total = cost->add;
38040 else
38041 *total = cost->movzx;
38042 return false;
38043
38044 case SIGN_EXTEND:
38045 *total = cost->movsx;
38046 return false;
38047
38048 case ASHIFT:
38049 if (SCALAR_INT_MODE_P (mode)
38050 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38051 && CONST_INT_P (XEXP (x, 1)))
38052 {
38053 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38054 if (value == 1)
38055 {
38056 *total = cost->add;
38057 return false;
38058 }
38059 if ((value == 2 || value == 3)
38060 && cost->lea <= cost->shift_const)
38061 {
38062 *total = cost->lea;
38063 return false;
38064 }
38065 }
38066 /* FALLTHRU */
38067
38068 case ROTATE:
38069 case ASHIFTRT:
38070 case LSHIFTRT:
38071 case ROTATERT:
38072 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38073 {
38074 /* ??? Should be SSE vector operation cost. */
38075 /* At least for published AMD latencies, this really is the same
38076 as the latency for a simple fpu operation like fabs. */
38077 /* V*QImode is emulated with 1-11 insns. */
38078 if (mode == V16QImode || mode == V32QImode)
38079 {
38080 int count = 11;
38081 if (TARGET_XOP && mode == V16QImode)
38082 {
38083 /* For XOP we use vpshab, which requires a broadcast of the
38084 value to the variable shift insn. For constants this
38085 means a V16Q const in mem; even when we can perform the
38086 shift with one insn set the cost to prefer paddb. */
38087 if (CONSTANT_P (XEXP (x, 1)))
38088 {
38089 *total = (cost->fabs
38090 + rtx_cost (XEXP (x, 0), code, 0, speed)
38091 + (speed ? 2 : COSTS_N_BYTES (16)));
38092 return true;
38093 }
38094 count = 3;
38095 }
38096 else if (TARGET_SSSE3)
38097 count = 7;
38098 *total = cost->fabs * count;
38099 }
38100 else
38101 *total = cost->fabs;
38102 }
38103 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38104 {
38105 if (CONST_INT_P (XEXP (x, 1)))
38106 {
38107 if (INTVAL (XEXP (x, 1)) > 32)
38108 *total = cost->shift_const + COSTS_N_INSNS (2);
38109 else
38110 *total = cost->shift_const * 2;
38111 }
38112 else
38113 {
38114 if (GET_CODE (XEXP (x, 1)) == AND)
38115 *total = cost->shift_var * 2;
38116 else
38117 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38118 }
38119 }
38120 else
38121 {
38122 if (CONST_INT_P (XEXP (x, 1)))
38123 *total = cost->shift_const;
38124 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38125 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38126 {
38127 /* Return the cost after shift-and truncation. */
38128 *total = cost->shift_var;
38129 return true;
38130 }
38131 else
38132 *total = cost->shift_var;
38133 }
38134 return false;
38135
38136 case FMA:
38137 {
38138 rtx sub;
38139
38140 gcc_assert (FLOAT_MODE_P (mode));
38141 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38142
38143 /* ??? SSE scalar/vector cost should be used here. */
38144 /* ??? Bald assumption that fma has the same cost as fmul. */
38145 *total = cost->fmul;
38146 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38147
38148 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38149 sub = XEXP (x, 0);
38150 if (GET_CODE (sub) == NEG)
38151 sub = XEXP (sub, 0);
38152 *total += rtx_cost (sub, FMA, 0, speed);
38153
38154 sub = XEXP (x, 2);
38155 if (GET_CODE (sub) == NEG)
38156 sub = XEXP (sub, 0);
38157 *total += rtx_cost (sub, FMA, 2, speed);
38158 return true;
38159 }
38160
38161 case MULT:
38162 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38163 {
38164 /* ??? SSE scalar cost should be used here. */
38165 *total = cost->fmul;
38166 return false;
38167 }
38168 else if (X87_FLOAT_MODE_P (mode))
38169 {
38170 *total = cost->fmul;
38171 return false;
38172 }
38173 else if (FLOAT_MODE_P (mode))
38174 {
38175 /* ??? SSE vector cost should be used here. */
38176 *total = cost->fmul;
38177 return false;
38178 }
38179 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38180 {
38181 /* V*QImode is emulated with 7-13 insns. */
38182 if (mode == V16QImode || mode == V32QImode)
38183 {
38184 int extra = 11;
38185 if (TARGET_XOP && mode == V16QImode)
38186 extra = 5;
38187 else if (TARGET_SSSE3)
38188 extra = 6;
38189 *total = cost->fmul * 2 + cost->fabs * extra;
38190 }
38191 /* V*DImode is emulated with 5-8 insns. */
38192 else if (mode == V2DImode || mode == V4DImode)
38193 {
38194 if (TARGET_XOP && mode == V2DImode)
38195 *total = cost->fmul * 2 + cost->fabs * 3;
38196 else
38197 *total = cost->fmul * 3 + cost->fabs * 5;
38198 }
38199 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38200 insns, including two PMULUDQ. */
38201 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38202 *total = cost->fmul * 2 + cost->fabs * 5;
38203 else
38204 *total = cost->fmul;
38205 return false;
38206 }
38207 else
38208 {
38209 rtx op0 = XEXP (x, 0);
38210 rtx op1 = XEXP (x, 1);
38211 int nbits;
38212 if (CONST_INT_P (XEXP (x, 1)))
38213 {
38214 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38215 for (nbits = 0; value != 0; value &= value - 1)
38216 nbits++;
38217 }
38218 else
38219 /* This is arbitrary. */
38220 nbits = 7;
38221
38222 /* Compute costs correctly for widening multiplication. */
38223 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38224 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38225 == GET_MODE_SIZE (mode))
38226 {
38227 int is_mulwiden = 0;
38228 enum machine_mode inner_mode = GET_MODE (op0);
38229
38230 if (GET_CODE (op0) == GET_CODE (op1))
38231 is_mulwiden = 1, op1 = XEXP (op1, 0);
38232 else if (CONST_INT_P (op1))
38233 {
38234 if (GET_CODE (op0) == SIGN_EXTEND)
38235 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38236 == INTVAL (op1);
38237 else
38238 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38239 }
38240
38241 if (is_mulwiden)
38242 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38243 }
38244
38245 *total = (cost->mult_init[MODE_INDEX (mode)]
38246 + nbits * cost->mult_bit
38247 + rtx_cost (op0, outer_code, opno, speed)
38248 + rtx_cost (op1, outer_code, opno, speed));
38249
38250 return true;
38251 }
38252
38253 case DIV:
38254 case UDIV:
38255 case MOD:
38256 case UMOD:
38257 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38258 /* ??? SSE cost should be used here. */
38259 *total = cost->fdiv;
38260 else if (X87_FLOAT_MODE_P (mode))
38261 *total = cost->fdiv;
38262 else if (FLOAT_MODE_P (mode))
38263 /* ??? SSE vector cost should be used here. */
38264 *total = cost->fdiv;
38265 else
38266 *total = cost->divide[MODE_INDEX (mode)];
38267 return false;
38268
38269 case PLUS:
38270 if (GET_MODE_CLASS (mode) == MODE_INT
38271 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38272 {
38273 if (GET_CODE (XEXP (x, 0)) == PLUS
38274 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38275 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38276 && CONSTANT_P (XEXP (x, 1)))
38277 {
38278 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38279 if (val == 2 || val == 4 || val == 8)
38280 {
38281 *total = cost->lea;
38282 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38283 outer_code, opno, speed);
38284 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38285 outer_code, opno, speed);
38286 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38287 return true;
38288 }
38289 }
38290 else if (GET_CODE (XEXP (x, 0)) == MULT
38291 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38292 {
38293 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38294 if (val == 2 || val == 4 || val == 8)
38295 {
38296 *total = cost->lea;
38297 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38298 outer_code, opno, speed);
38299 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38300 return true;
38301 }
38302 }
38303 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38304 {
38305 *total = cost->lea;
38306 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38307 outer_code, opno, speed);
38308 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38309 outer_code, opno, speed);
38310 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38311 return true;
38312 }
38313 }
38314 /* FALLTHRU */
38315
38316 case MINUS:
38317 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38318 {
38319 /* ??? SSE cost should be used here. */
38320 *total = cost->fadd;
38321 return false;
38322 }
38323 else if (X87_FLOAT_MODE_P (mode))
38324 {
38325 *total = cost->fadd;
38326 return false;
38327 }
38328 else if (FLOAT_MODE_P (mode))
38329 {
38330 /* ??? SSE vector cost should be used here. */
38331 *total = cost->fadd;
38332 return false;
38333 }
38334 /* FALLTHRU */
38335
38336 case AND:
38337 case IOR:
38338 case XOR:
38339 if (GET_MODE_CLASS (mode) == MODE_INT
38340 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38341 {
38342 *total = (cost->add * 2
38343 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38344 << (GET_MODE (XEXP (x, 0)) != DImode))
38345 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38346 << (GET_MODE (XEXP (x, 1)) != DImode)));
38347 return true;
38348 }
38349 /* FALLTHRU */
38350
38351 case NEG:
38352 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38353 {
38354 /* ??? SSE cost should be used here. */
38355 *total = cost->fchs;
38356 return false;
38357 }
38358 else if (X87_FLOAT_MODE_P (mode))
38359 {
38360 *total = cost->fchs;
38361 return false;
38362 }
38363 else if (FLOAT_MODE_P (mode))
38364 {
38365 /* ??? SSE vector cost should be used here. */
38366 *total = cost->fchs;
38367 return false;
38368 }
38369 /* FALLTHRU */
38370
38371 case NOT:
38372 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38373 {
38374 /* ??? Should be SSE vector operation cost. */
38375 /* At least for published AMD latencies, this really is the same
38376 as the latency for a simple fpu operation like fabs. */
38377 *total = cost->fabs;
38378 }
38379 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38380 *total = cost->add * 2;
38381 else
38382 *total = cost->add;
38383 return false;
38384
38385 case COMPARE:
38386 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38387 && XEXP (XEXP (x, 0), 1) == const1_rtx
38388 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38389 && XEXP (x, 1) == const0_rtx)
38390 {
38391 /* This kind of construct is implemented using test[bwl].
38392 Treat it as if we had an AND. */
38393 *total = (cost->add
38394 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38395 + rtx_cost (const1_rtx, outer_code, opno, speed));
38396 return true;
38397 }
38398 return false;
38399
38400 case FLOAT_EXTEND:
38401 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38402 *total = 0;
38403 return false;
38404
38405 case ABS:
38406 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38407 /* ??? SSE cost should be used here. */
38408 *total = cost->fabs;
38409 else if (X87_FLOAT_MODE_P (mode))
38410 *total = cost->fabs;
38411 else if (FLOAT_MODE_P (mode))
38412 /* ??? SSE vector cost should be used here. */
38413 *total = cost->fabs;
38414 return false;
38415
38416 case SQRT:
38417 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38418 /* ??? SSE cost should be used here. */
38419 *total = cost->fsqrt;
38420 else if (X87_FLOAT_MODE_P (mode))
38421 *total = cost->fsqrt;
38422 else if (FLOAT_MODE_P (mode))
38423 /* ??? SSE vector cost should be used here. */
38424 *total = cost->fsqrt;
38425 return false;
38426
38427 case UNSPEC:
38428 if (XINT (x, 1) == UNSPEC_TP)
38429 *total = 0;
38430 return false;
38431
38432 case VEC_SELECT:
38433 case VEC_CONCAT:
38434 case VEC_DUPLICATE:
38435 /* ??? Assume all of these vector manipulation patterns are
38436 recognizable. In which case they all pretty much have the
38437 same cost. */
38438 *total = cost->fabs;
38439 return true;
38440 case VEC_MERGE:
38441 mask = XEXP (x, 2);
38442 /* This is masked instruction, assume the same cost,
38443 as nonmasked variant. */
38444 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38445 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38446 else
38447 *total = cost->fabs;
38448 return true;
38449
38450 default:
38451 return false;
38452 }
38453 }
38454
38455 #if TARGET_MACHO
38456
38457 static int current_machopic_label_num;
38458
38459 /* Given a symbol name and its associated stub, write out the
38460 definition of the stub. */
38461
38462 void
38463 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38464 {
38465 unsigned int length;
38466 char *binder_name, *symbol_name, lazy_ptr_name[32];
38467 int label = ++current_machopic_label_num;
38468
38469 /* For 64-bit we shouldn't get here. */
38470 gcc_assert (!TARGET_64BIT);
38471
38472 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38473 symb = targetm.strip_name_encoding (symb);
38474
38475 length = strlen (stub);
38476 binder_name = XALLOCAVEC (char, length + 32);
38477 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38478
38479 length = strlen (symb);
38480 symbol_name = XALLOCAVEC (char, length + 32);
38481 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38482
38483 sprintf (lazy_ptr_name, "L%d$lz", label);
38484
38485 if (MACHOPIC_ATT_STUB)
38486 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38487 else if (MACHOPIC_PURE)
38488 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38489 else
38490 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38491
38492 fprintf (file, "%s:\n", stub);
38493 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38494
38495 if (MACHOPIC_ATT_STUB)
38496 {
38497 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38498 }
38499 else if (MACHOPIC_PURE)
38500 {
38501 /* PIC stub. */
38502 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38503 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38504 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38505 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38506 label, lazy_ptr_name, label);
38507 fprintf (file, "\tjmp\t*%%ecx\n");
38508 }
38509 else
38510 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38511
38512 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38513 it needs no stub-binding-helper. */
38514 if (MACHOPIC_ATT_STUB)
38515 return;
38516
38517 fprintf (file, "%s:\n", binder_name);
38518
38519 if (MACHOPIC_PURE)
38520 {
38521 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38522 fprintf (file, "\tpushl\t%%ecx\n");
38523 }
38524 else
38525 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38526
38527 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38528
38529 /* N.B. Keep the correspondence of these
38530 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38531 old-pic/new-pic/non-pic stubs; altering this will break
38532 compatibility with existing dylibs. */
38533 if (MACHOPIC_PURE)
38534 {
38535 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38536 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38537 }
38538 else
38539 /* 16-byte -mdynamic-no-pic stub. */
38540 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38541
38542 fprintf (file, "%s:\n", lazy_ptr_name);
38543 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38544 fprintf (file, ASM_LONG "%s\n", binder_name);
38545 }
38546 #endif /* TARGET_MACHO */
38547
38548 /* Order the registers for register allocator. */
38549
38550 void
38551 x86_order_regs_for_local_alloc (void)
38552 {
38553 int pos = 0;
38554 int i;
38555
38556 /* First allocate the local general purpose registers. */
38557 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38558 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38559 reg_alloc_order [pos++] = i;
38560
38561 /* Global general purpose registers. */
38562 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38563 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38564 reg_alloc_order [pos++] = i;
38565
38566 /* x87 registers come first in case we are doing FP math
38567 using them. */
38568 if (!TARGET_SSE_MATH)
38569 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38570 reg_alloc_order [pos++] = i;
38571
38572 /* SSE registers. */
38573 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38574 reg_alloc_order [pos++] = i;
38575 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38576 reg_alloc_order [pos++] = i;
38577
38578 /* Extended REX SSE registers. */
38579 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38580 reg_alloc_order [pos++] = i;
38581
38582 /* Mask register. */
38583 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38584 reg_alloc_order [pos++] = i;
38585
38586 /* x87 registers. */
38587 if (TARGET_SSE_MATH)
38588 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38589 reg_alloc_order [pos++] = i;
38590
38591 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38592 reg_alloc_order [pos++] = i;
38593
38594 /* Initialize the rest of array as we do not allocate some registers
38595 at all. */
38596 while (pos < FIRST_PSEUDO_REGISTER)
38597 reg_alloc_order [pos++] = 0;
38598 }
38599
38600 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38601 in struct attribute_spec handler. */
38602 static tree
38603 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38604 tree args,
38605 int,
38606 bool *no_add_attrs)
38607 {
38608 if (TREE_CODE (*node) != FUNCTION_TYPE
38609 && TREE_CODE (*node) != METHOD_TYPE
38610 && TREE_CODE (*node) != FIELD_DECL
38611 && TREE_CODE (*node) != TYPE_DECL)
38612 {
38613 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38614 name);
38615 *no_add_attrs = true;
38616 return NULL_TREE;
38617 }
38618 if (TARGET_64BIT)
38619 {
38620 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38621 name);
38622 *no_add_attrs = true;
38623 return NULL_TREE;
38624 }
38625 if (is_attribute_p ("callee_pop_aggregate_return", name))
38626 {
38627 tree cst;
38628
38629 cst = TREE_VALUE (args);
38630 if (TREE_CODE (cst) != INTEGER_CST)
38631 {
38632 warning (OPT_Wattributes,
38633 "%qE attribute requires an integer constant argument",
38634 name);
38635 *no_add_attrs = true;
38636 }
38637 else if (compare_tree_int (cst, 0) != 0
38638 && compare_tree_int (cst, 1) != 0)
38639 {
38640 warning (OPT_Wattributes,
38641 "argument to %qE attribute is neither zero, nor one",
38642 name);
38643 *no_add_attrs = true;
38644 }
38645
38646 return NULL_TREE;
38647 }
38648
38649 return NULL_TREE;
38650 }
38651
38652 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38653 struct attribute_spec.handler. */
38654 static tree
38655 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38656 bool *no_add_attrs)
38657 {
38658 if (TREE_CODE (*node) != FUNCTION_TYPE
38659 && TREE_CODE (*node) != METHOD_TYPE
38660 && TREE_CODE (*node) != FIELD_DECL
38661 && TREE_CODE (*node) != TYPE_DECL)
38662 {
38663 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38664 name);
38665 *no_add_attrs = true;
38666 return NULL_TREE;
38667 }
38668
38669 /* Can combine regparm with all attributes but fastcall. */
38670 if (is_attribute_p ("ms_abi", name))
38671 {
38672 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38673 {
38674 error ("ms_abi and sysv_abi attributes are not compatible");
38675 }
38676
38677 return NULL_TREE;
38678 }
38679 else if (is_attribute_p ("sysv_abi", name))
38680 {
38681 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38682 {
38683 error ("ms_abi and sysv_abi attributes are not compatible");
38684 }
38685
38686 return NULL_TREE;
38687 }
38688
38689 return NULL_TREE;
38690 }
38691
38692 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38693 struct attribute_spec.handler. */
38694 static tree
38695 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38696 bool *no_add_attrs)
38697 {
38698 tree *type = NULL;
38699 if (DECL_P (*node))
38700 {
38701 if (TREE_CODE (*node) == TYPE_DECL)
38702 type = &TREE_TYPE (*node);
38703 }
38704 else
38705 type = node;
38706
38707 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38708 {
38709 warning (OPT_Wattributes, "%qE attribute ignored",
38710 name);
38711 *no_add_attrs = true;
38712 }
38713
38714 else if ((is_attribute_p ("ms_struct", name)
38715 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38716 || ((is_attribute_p ("gcc_struct", name)
38717 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38718 {
38719 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38720 name);
38721 *no_add_attrs = true;
38722 }
38723
38724 return NULL_TREE;
38725 }
38726
38727 static tree
38728 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38729 bool *no_add_attrs)
38730 {
38731 if (TREE_CODE (*node) != FUNCTION_DECL)
38732 {
38733 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38734 name);
38735 *no_add_attrs = true;
38736 }
38737 return NULL_TREE;
38738 }
38739
38740 static bool
38741 ix86_ms_bitfield_layout_p (const_tree record_type)
38742 {
38743 return ((TARGET_MS_BITFIELD_LAYOUT
38744 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38745 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38746 }
38747
38748 /* Returns an expression indicating where the this parameter is
38749 located on entry to the FUNCTION. */
38750
38751 static rtx
38752 x86_this_parameter (tree function)
38753 {
38754 tree type = TREE_TYPE (function);
38755 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38756 int nregs;
38757
38758 if (TARGET_64BIT)
38759 {
38760 const int *parm_regs;
38761
38762 if (ix86_function_type_abi (type) == MS_ABI)
38763 parm_regs = x86_64_ms_abi_int_parameter_registers;
38764 else
38765 parm_regs = x86_64_int_parameter_registers;
38766 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38767 }
38768
38769 nregs = ix86_function_regparm (type, function);
38770
38771 if (nregs > 0 && !stdarg_p (type))
38772 {
38773 int regno;
38774 unsigned int ccvt = ix86_get_callcvt (type);
38775
38776 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38777 regno = aggr ? DX_REG : CX_REG;
38778 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38779 {
38780 regno = CX_REG;
38781 if (aggr)
38782 return gen_rtx_MEM (SImode,
38783 plus_constant (Pmode, stack_pointer_rtx, 4));
38784 }
38785 else
38786 {
38787 regno = AX_REG;
38788 if (aggr)
38789 {
38790 regno = DX_REG;
38791 if (nregs == 1)
38792 return gen_rtx_MEM (SImode,
38793 plus_constant (Pmode,
38794 stack_pointer_rtx, 4));
38795 }
38796 }
38797 return gen_rtx_REG (SImode, regno);
38798 }
38799
38800 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38801 aggr ? 8 : 4));
38802 }
38803
38804 /* Determine whether x86_output_mi_thunk can succeed. */
38805
38806 static bool
38807 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38808 const_tree function)
38809 {
38810 /* 64-bit can handle anything. */
38811 if (TARGET_64BIT)
38812 return true;
38813
38814 /* For 32-bit, everything's fine if we have one free register. */
38815 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38816 return true;
38817
38818 /* Need a free register for vcall_offset. */
38819 if (vcall_offset)
38820 return false;
38821
38822 /* Need a free register for GOT references. */
38823 if (flag_pic && !targetm.binds_local_p (function))
38824 return false;
38825
38826 /* Otherwise ok. */
38827 return true;
38828 }
38829
38830 /* Output the assembler code for a thunk function. THUNK_DECL is the
38831 declaration for the thunk function itself, FUNCTION is the decl for
38832 the target function. DELTA is an immediate constant offset to be
38833 added to THIS. If VCALL_OFFSET is nonzero, the word at
38834 *(*this + vcall_offset) should be added to THIS. */
38835
38836 static void
38837 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38838 HOST_WIDE_INT vcall_offset, tree function)
38839 {
38840 rtx this_param = x86_this_parameter (function);
38841 rtx this_reg, tmp, fnaddr;
38842 unsigned int tmp_regno;
38843 rtx_insn *insn;
38844
38845 if (TARGET_64BIT)
38846 tmp_regno = R10_REG;
38847 else
38848 {
38849 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38850 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38851 tmp_regno = AX_REG;
38852 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38853 tmp_regno = DX_REG;
38854 else
38855 tmp_regno = CX_REG;
38856 }
38857
38858 emit_note (NOTE_INSN_PROLOGUE_END);
38859
38860 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38861 pull it in now and let DELTA benefit. */
38862 if (REG_P (this_param))
38863 this_reg = this_param;
38864 else if (vcall_offset)
38865 {
38866 /* Put the this parameter into %eax. */
38867 this_reg = gen_rtx_REG (Pmode, AX_REG);
38868 emit_move_insn (this_reg, this_param);
38869 }
38870 else
38871 this_reg = NULL_RTX;
38872
38873 /* Adjust the this parameter by a fixed constant. */
38874 if (delta)
38875 {
38876 rtx delta_rtx = GEN_INT (delta);
38877 rtx delta_dst = this_reg ? this_reg : this_param;
38878
38879 if (TARGET_64BIT)
38880 {
38881 if (!x86_64_general_operand (delta_rtx, Pmode))
38882 {
38883 tmp = gen_rtx_REG (Pmode, tmp_regno);
38884 emit_move_insn (tmp, delta_rtx);
38885 delta_rtx = tmp;
38886 }
38887 }
38888
38889 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38890 }
38891
38892 /* Adjust the this parameter by a value stored in the vtable. */
38893 if (vcall_offset)
38894 {
38895 rtx vcall_addr, vcall_mem, this_mem;
38896
38897 tmp = gen_rtx_REG (Pmode, tmp_regno);
38898
38899 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38900 if (Pmode != ptr_mode)
38901 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38902 emit_move_insn (tmp, this_mem);
38903
38904 /* Adjust the this parameter. */
38905 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38906 if (TARGET_64BIT
38907 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38908 {
38909 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38910 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38911 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38912 }
38913
38914 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38915 if (Pmode != ptr_mode)
38916 emit_insn (gen_addsi_1_zext (this_reg,
38917 gen_rtx_REG (ptr_mode,
38918 REGNO (this_reg)),
38919 vcall_mem));
38920 else
38921 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38922 }
38923
38924 /* If necessary, drop THIS back to its stack slot. */
38925 if (this_reg && this_reg != this_param)
38926 emit_move_insn (this_param, this_reg);
38927
38928 fnaddr = XEXP (DECL_RTL (function), 0);
38929 if (TARGET_64BIT)
38930 {
38931 if (!flag_pic || targetm.binds_local_p (function)
38932 || TARGET_PECOFF)
38933 ;
38934 else
38935 {
38936 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38937 tmp = gen_rtx_CONST (Pmode, tmp);
38938 fnaddr = gen_const_mem (Pmode, tmp);
38939 }
38940 }
38941 else
38942 {
38943 if (!flag_pic || targetm.binds_local_p (function))
38944 ;
38945 #if TARGET_MACHO
38946 else if (TARGET_MACHO)
38947 {
38948 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38949 fnaddr = XEXP (fnaddr, 0);
38950 }
38951 #endif /* TARGET_MACHO */
38952 else
38953 {
38954 tmp = gen_rtx_REG (Pmode, CX_REG);
38955 output_set_got (tmp, NULL_RTX);
38956
38957 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38958 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38959 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38960 fnaddr = gen_const_mem (Pmode, fnaddr);
38961 }
38962 }
38963
38964 /* Our sibling call patterns do not allow memories, because we have no
38965 predicate that can distinguish between frame and non-frame memory.
38966 For our purposes here, we can get away with (ab)using a jump pattern,
38967 because we're going to do no optimization. */
38968 if (MEM_P (fnaddr))
38969 {
38970 if (sibcall_insn_operand (fnaddr, word_mode))
38971 {
38972 tmp = gen_rtx_CALL (VOIDmode, fnaddr, const0_rtx);
38973 tmp = emit_call_insn (tmp);
38974 SIBLING_CALL_P (tmp) = 1;
38975 }
38976 else
38977 emit_jump_insn (gen_indirect_jump (fnaddr));
38978 }
38979 else
38980 {
38981 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38982 fnaddr = legitimize_pic_address (fnaddr,
38983 gen_rtx_REG (Pmode, tmp_regno));
38984
38985 if (!sibcall_insn_operand (fnaddr, word_mode))
38986 {
38987 tmp = gen_rtx_REG (word_mode, tmp_regno);
38988 if (GET_MODE (fnaddr) != word_mode)
38989 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38990 emit_move_insn (tmp, fnaddr);
38991 fnaddr = tmp;
38992 }
38993
38994 tmp = gen_rtx_MEM (QImode, fnaddr);
38995 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38996 tmp = emit_call_insn (tmp);
38997 SIBLING_CALL_P (tmp) = 1;
38998 }
38999 emit_barrier ();
39000
39001 /* Emit just enough of rest_of_compilation to get the insns emitted.
39002 Note that use_thunk calls assemble_start_function et al. */
39003 insn = get_insns ();
39004 shorten_branches (insn);
39005 final_start_function (insn, file, 1);
39006 final (insn, file, 1);
39007 final_end_function ();
39008 }
39009
39010 static void
39011 x86_file_start (void)
39012 {
39013 default_file_start ();
39014 if (TARGET_16BIT)
39015 fputs ("\t.code16gcc\n", asm_out_file);
39016 #if TARGET_MACHO
39017 darwin_file_start ();
39018 #endif
39019 if (X86_FILE_START_VERSION_DIRECTIVE)
39020 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39021 if (X86_FILE_START_FLTUSED)
39022 fputs ("\t.global\t__fltused\n", asm_out_file);
39023 if (ix86_asm_dialect == ASM_INTEL)
39024 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39025 }
39026
39027 int
39028 x86_field_alignment (tree field, int computed)
39029 {
39030 enum machine_mode mode;
39031 tree type = TREE_TYPE (field);
39032
39033 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39034 return computed;
39035 mode = TYPE_MODE (strip_array_types (type));
39036 if (mode == DFmode || mode == DCmode
39037 || GET_MODE_CLASS (mode) == MODE_INT
39038 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39039 return MIN (32, computed);
39040 return computed;
39041 }
39042
39043 /* Output assembler code to FILE to increment profiler label # LABELNO
39044 for profiling a function entry. */
39045 void
39046 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39047 {
39048 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39049 : MCOUNT_NAME);
39050
39051 if (TARGET_64BIT)
39052 {
39053 #ifndef NO_PROFILE_COUNTERS
39054 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39055 #endif
39056
39057 if (!TARGET_PECOFF && flag_pic)
39058 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39059 else
39060 fprintf (file, "\tcall\t%s\n", mcount_name);
39061 }
39062 else if (flag_pic)
39063 {
39064 #ifndef NO_PROFILE_COUNTERS
39065 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39066 LPREFIX, labelno);
39067 #endif
39068 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39069 }
39070 else
39071 {
39072 #ifndef NO_PROFILE_COUNTERS
39073 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39074 LPREFIX, labelno);
39075 #endif
39076 fprintf (file, "\tcall\t%s\n", mcount_name);
39077 }
39078 }
39079
39080 /* We don't have exact information about the insn sizes, but we may assume
39081 quite safely that we are informed about all 1 byte insns and memory
39082 address sizes. This is enough to eliminate unnecessary padding in
39083 99% of cases. */
39084
39085 static int
39086 min_insn_size (rtx_insn *insn)
39087 {
39088 int l = 0, len;
39089
39090 if (!INSN_P (insn) || !active_insn_p (insn))
39091 return 0;
39092
39093 /* Discard alignments we've emit and jump instructions. */
39094 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39095 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39096 return 0;
39097
39098 /* Important case - calls are always 5 bytes.
39099 It is common to have many calls in the row. */
39100 if (CALL_P (insn)
39101 && symbolic_reference_mentioned_p (PATTERN (insn))
39102 && !SIBLING_CALL_P (insn))
39103 return 5;
39104 len = get_attr_length (insn);
39105 if (len <= 1)
39106 return 1;
39107
39108 /* For normal instructions we rely on get_attr_length being exact,
39109 with a few exceptions. */
39110 if (!JUMP_P (insn))
39111 {
39112 enum attr_type type = get_attr_type (insn);
39113
39114 switch (type)
39115 {
39116 case TYPE_MULTI:
39117 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39118 || asm_noperands (PATTERN (insn)) >= 0)
39119 return 0;
39120 break;
39121 case TYPE_OTHER:
39122 case TYPE_FCMP:
39123 break;
39124 default:
39125 /* Otherwise trust get_attr_length. */
39126 return len;
39127 }
39128
39129 l = get_attr_length_address (insn);
39130 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39131 l = 4;
39132 }
39133 if (l)
39134 return 1+l;
39135 else
39136 return 2;
39137 }
39138
39139 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39140
39141 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39142 window. */
39143
39144 static void
39145 ix86_avoid_jump_mispredicts (void)
39146 {
39147 rtx_insn *insn, *start = get_insns ();
39148 int nbytes = 0, njumps = 0;
39149 int isjump = 0;
39150
39151 /* Look for all minimal intervals of instructions containing 4 jumps.
39152 The intervals are bounded by START and INSN. NBYTES is the total
39153 size of instructions in the interval including INSN and not including
39154 START. When the NBYTES is smaller than 16 bytes, it is possible
39155 that the end of START and INSN ends up in the same 16byte page.
39156
39157 The smallest offset in the page INSN can start is the case where START
39158 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39159 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39160
39161 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39162 have to, control transfer to label(s) can be performed through other
39163 means, and also we estimate minimum length of all asm stmts as 0. */
39164 for (insn = start; insn; insn = NEXT_INSN (insn))
39165 {
39166 int min_size;
39167
39168 if (LABEL_P (insn))
39169 {
39170 int align = label_to_alignment (insn);
39171 int max_skip = label_to_max_skip (insn);
39172
39173 if (max_skip > 15)
39174 max_skip = 15;
39175 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39176 already in the current 16 byte page, because otherwise
39177 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39178 bytes to reach 16 byte boundary. */
39179 if (align <= 0
39180 || (align <= 3 && max_skip != (1 << align) - 1))
39181 max_skip = 0;
39182 if (dump_file)
39183 fprintf (dump_file, "Label %i with max_skip %i\n",
39184 INSN_UID (insn), max_skip);
39185 if (max_skip)
39186 {
39187 while (nbytes + max_skip >= 16)
39188 {
39189 start = NEXT_INSN (start);
39190 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39191 || CALL_P (start))
39192 njumps--, isjump = 1;
39193 else
39194 isjump = 0;
39195 nbytes -= min_insn_size (start);
39196 }
39197 }
39198 continue;
39199 }
39200
39201 min_size = min_insn_size (insn);
39202 nbytes += min_size;
39203 if (dump_file)
39204 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39205 INSN_UID (insn), min_size);
39206 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39207 || CALL_P (insn))
39208 njumps++;
39209 else
39210 continue;
39211
39212 while (njumps > 3)
39213 {
39214 start = NEXT_INSN (start);
39215 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39216 || CALL_P (start))
39217 njumps--, isjump = 1;
39218 else
39219 isjump = 0;
39220 nbytes -= min_insn_size (start);
39221 }
39222 gcc_assert (njumps >= 0);
39223 if (dump_file)
39224 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39225 INSN_UID (start), INSN_UID (insn), nbytes);
39226
39227 if (njumps == 3 && isjump && nbytes < 16)
39228 {
39229 int padsize = 15 - nbytes + min_insn_size (insn);
39230
39231 if (dump_file)
39232 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39233 INSN_UID (insn), padsize);
39234 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39235 }
39236 }
39237 }
39238 #endif
39239
39240 /* AMD Athlon works faster
39241 when RET is not destination of conditional jump or directly preceded
39242 by other jump instruction. We avoid the penalty by inserting NOP just
39243 before the RET instructions in such cases. */
39244 static void
39245 ix86_pad_returns (void)
39246 {
39247 edge e;
39248 edge_iterator ei;
39249
39250 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39251 {
39252 basic_block bb = e->src;
39253 rtx_insn *ret = BB_END (bb);
39254 rtx_insn *prev;
39255 bool replace = false;
39256
39257 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39258 || optimize_bb_for_size_p (bb))
39259 continue;
39260 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39261 if (active_insn_p (prev) || LABEL_P (prev))
39262 break;
39263 if (prev && LABEL_P (prev))
39264 {
39265 edge e;
39266 edge_iterator ei;
39267
39268 FOR_EACH_EDGE (e, ei, bb->preds)
39269 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39270 && !(e->flags & EDGE_FALLTHRU))
39271 {
39272 replace = true;
39273 break;
39274 }
39275 }
39276 if (!replace)
39277 {
39278 prev = prev_active_insn (ret);
39279 if (prev
39280 && ((JUMP_P (prev) && any_condjump_p (prev))
39281 || CALL_P (prev)))
39282 replace = true;
39283 /* Empty functions get branch mispredict even when
39284 the jump destination is not visible to us. */
39285 if (!prev && !optimize_function_for_size_p (cfun))
39286 replace = true;
39287 }
39288 if (replace)
39289 {
39290 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39291 delete_insn (ret);
39292 }
39293 }
39294 }
39295
39296 /* Count the minimum number of instructions in BB. Return 4 if the
39297 number of instructions >= 4. */
39298
39299 static int
39300 ix86_count_insn_bb (basic_block bb)
39301 {
39302 rtx_insn *insn;
39303 int insn_count = 0;
39304
39305 /* Count number of instructions in this block. Return 4 if the number
39306 of instructions >= 4. */
39307 FOR_BB_INSNS (bb, insn)
39308 {
39309 /* Only happen in exit blocks. */
39310 if (JUMP_P (insn)
39311 && ANY_RETURN_P (PATTERN (insn)))
39312 break;
39313
39314 if (NONDEBUG_INSN_P (insn)
39315 && GET_CODE (PATTERN (insn)) != USE
39316 && GET_CODE (PATTERN (insn)) != CLOBBER)
39317 {
39318 insn_count++;
39319 if (insn_count >= 4)
39320 return insn_count;
39321 }
39322 }
39323
39324 return insn_count;
39325 }
39326
39327
39328 /* Count the minimum number of instructions in code path in BB.
39329 Return 4 if the number of instructions >= 4. */
39330
39331 static int
39332 ix86_count_insn (basic_block bb)
39333 {
39334 edge e;
39335 edge_iterator ei;
39336 int min_prev_count;
39337
39338 /* Only bother counting instructions along paths with no
39339 more than 2 basic blocks between entry and exit. Given
39340 that BB has an edge to exit, determine if a predecessor
39341 of BB has an edge from entry. If so, compute the number
39342 of instructions in the predecessor block. If there
39343 happen to be multiple such blocks, compute the minimum. */
39344 min_prev_count = 4;
39345 FOR_EACH_EDGE (e, ei, bb->preds)
39346 {
39347 edge prev_e;
39348 edge_iterator prev_ei;
39349
39350 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39351 {
39352 min_prev_count = 0;
39353 break;
39354 }
39355 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39356 {
39357 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39358 {
39359 int count = ix86_count_insn_bb (e->src);
39360 if (count < min_prev_count)
39361 min_prev_count = count;
39362 break;
39363 }
39364 }
39365 }
39366
39367 if (min_prev_count < 4)
39368 min_prev_count += ix86_count_insn_bb (bb);
39369
39370 return min_prev_count;
39371 }
39372
39373 /* Pad short function to 4 instructions. */
39374
39375 static void
39376 ix86_pad_short_function (void)
39377 {
39378 edge e;
39379 edge_iterator ei;
39380
39381 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39382 {
39383 rtx_insn *ret = BB_END (e->src);
39384 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39385 {
39386 int insn_count = ix86_count_insn (e->src);
39387
39388 /* Pad short function. */
39389 if (insn_count < 4)
39390 {
39391 rtx_insn *insn = ret;
39392
39393 /* Find epilogue. */
39394 while (insn
39395 && (!NOTE_P (insn)
39396 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39397 insn = PREV_INSN (insn);
39398
39399 if (!insn)
39400 insn = ret;
39401
39402 /* Two NOPs count as one instruction. */
39403 insn_count = 2 * (4 - insn_count);
39404 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39405 }
39406 }
39407 }
39408 }
39409
39410 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39411 the epilogue, the Windows system unwinder will apply epilogue logic and
39412 produce incorrect offsets. This can be avoided by adding a nop between
39413 the last insn that can throw and the first insn of the epilogue. */
39414
39415 static void
39416 ix86_seh_fixup_eh_fallthru (void)
39417 {
39418 edge e;
39419 edge_iterator ei;
39420
39421 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39422 {
39423 rtx_insn *insn, *next;
39424
39425 /* Find the beginning of the epilogue. */
39426 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39427 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39428 break;
39429 if (insn == NULL)
39430 continue;
39431
39432 /* We only care about preceding insns that can throw. */
39433 insn = prev_active_insn (insn);
39434 if (insn == NULL || !can_throw_internal (insn))
39435 continue;
39436
39437 /* Do not separate calls from their debug information. */
39438 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39439 if (NOTE_P (next)
39440 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39441 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39442 insn = next;
39443 else
39444 break;
39445
39446 emit_insn_after (gen_nops (const1_rtx), insn);
39447 }
39448 }
39449
39450 /* Implement machine specific optimizations. We implement padding of returns
39451 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39452 static void
39453 ix86_reorg (void)
39454 {
39455 /* We are freeing block_for_insn in the toplev to keep compatibility
39456 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39457 compute_bb_for_insn ();
39458
39459 if (TARGET_SEH && current_function_has_exception_handlers ())
39460 ix86_seh_fixup_eh_fallthru ();
39461
39462 if (optimize && optimize_function_for_speed_p (cfun))
39463 {
39464 if (TARGET_PAD_SHORT_FUNCTION)
39465 ix86_pad_short_function ();
39466 else if (TARGET_PAD_RETURNS)
39467 ix86_pad_returns ();
39468 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39469 if (TARGET_FOUR_JUMP_LIMIT)
39470 ix86_avoid_jump_mispredicts ();
39471 #endif
39472 }
39473 }
39474
39475 /* Return nonzero when QImode register that must be represented via REX prefix
39476 is used. */
39477 bool
39478 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
39479 {
39480 int i;
39481 extract_insn_cached (insn);
39482 for (i = 0; i < recog_data.n_operands; i++)
39483 if (GENERAL_REG_P (recog_data.operand[i])
39484 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39485 return true;
39486 return false;
39487 }
39488
39489 /* Return nonzero when P points to register encoded via REX prefix.
39490 Called via for_each_rtx. */
39491 static int
39492 extended_reg_mentioned_1 (rtx *p, void *)
39493 {
39494 unsigned int regno;
39495 if (!REG_P (*p))
39496 return 0;
39497 regno = REGNO (*p);
39498 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39499 }
39500
39501 /* Return true when INSN mentions register that must be encoded using REX
39502 prefix. */
39503 bool
39504 x86_extended_reg_mentioned_p (rtx insn)
39505 {
39506 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39507 extended_reg_mentioned_1, NULL);
39508 }
39509
39510 /* If profitable, negate (without causing overflow) integer constant
39511 of mode MODE at location LOC. Return true in this case. */
39512 bool
39513 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39514 {
39515 HOST_WIDE_INT val;
39516
39517 if (!CONST_INT_P (*loc))
39518 return false;
39519
39520 switch (mode)
39521 {
39522 case DImode:
39523 /* DImode x86_64 constants must fit in 32 bits. */
39524 gcc_assert (x86_64_immediate_operand (*loc, mode));
39525
39526 mode = SImode;
39527 break;
39528
39529 case SImode:
39530 case HImode:
39531 case QImode:
39532 break;
39533
39534 default:
39535 gcc_unreachable ();
39536 }
39537
39538 /* Avoid overflows. */
39539 if (mode_signbit_p (mode, *loc))
39540 return false;
39541
39542 val = INTVAL (*loc);
39543
39544 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39545 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39546 if ((val < 0 && val != -128)
39547 || val == 128)
39548 {
39549 *loc = GEN_INT (-val);
39550 return true;
39551 }
39552
39553 return false;
39554 }
39555
39556 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39557 optabs would emit if we didn't have TFmode patterns. */
39558
39559 void
39560 x86_emit_floatuns (rtx operands[2])
39561 {
39562 rtx_code_label *neglab, *donelab;
39563 rtx i0, i1, f0, in, out;
39564 enum machine_mode mode, inmode;
39565
39566 inmode = GET_MODE (operands[1]);
39567 gcc_assert (inmode == SImode || inmode == DImode);
39568
39569 out = operands[0];
39570 in = force_reg (inmode, operands[1]);
39571 mode = GET_MODE (out);
39572 neglab = gen_label_rtx ();
39573 donelab = gen_label_rtx ();
39574 f0 = gen_reg_rtx (mode);
39575
39576 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39577
39578 expand_float (out, in, 0);
39579
39580 emit_jump_insn (gen_jump (donelab));
39581 emit_barrier ();
39582
39583 emit_label (neglab);
39584
39585 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39586 1, OPTAB_DIRECT);
39587 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39588 1, OPTAB_DIRECT);
39589 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39590
39591 expand_float (f0, i0, 0);
39592
39593 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39594
39595 emit_label (donelab);
39596 }
39597 \f
39598 /* AVX512F does support 64-byte integer vector operations,
39599 thus the longest vector we are faced with is V64QImode. */
39600 #define MAX_VECT_LEN 64
39601
39602 struct expand_vec_perm_d
39603 {
39604 rtx target, op0, op1;
39605 unsigned char perm[MAX_VECT_LEN];
39606 enum machine_mode vmode;
39607 unsigned char nelt;
39608 bool one_operand_p;
39609 bool testing_p;
39610 };
39611
39612 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39613 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39614 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39615
39616 /* Get a vector mode of the same size as the original but with elements
39617 twice as wide. This is only guaranteed to apply to integral vectors. */
39618
39619 static inline enum machine_mode
39620 get_mode_wider_vector (enum machine_mode o)
39621 {
39622 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39623 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39624 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39625 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39626 return n;
39627 }
39628
39629 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39630 fill target with val via vec_duplicate. */
39631
39632 static bool
39633 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39634 {
39635 bool ok;
39636 rtx_insn *insn;
39637 rtx dup;
39638
39639 /* First attempt to recognize VAL as-is. */
39640 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39641 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39642 if (recog_memoized (insn) < 0)
39643 {
39644 rtx_insn *seq;
39645 /* If that fails, force VAL into a register. */
39646
39647 start_sequence ();
39648 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39649 seq = get_insns ();
39650 end_sequence ();
39651 if (seq)
39652 emit_insn_before (seq, insn);
39653
39654 ok = recog_memoized (insn) >= 0;
39655 gcc_assert (ok);
39656 }
39657 return true;
39658 }
39659
39660 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39661 with all elements equal to VAR. Return true if successful. */
39662
39663 static bool
39664 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39665 rtx target, rtx val)
39666 {
39667 bool ok;
39668
39669 switch (mode)
39670 {
39671 case V2SImode:
39672 case V2SFmode:
39673 if (!mmx_ok)
39674 return false;
39675 /* FALLTHRU */
39676
39677 case V4DFmode:
39678 case V4DImode:
39679 case V8SFmode:
39680 case V8SImode:
39681 case V2DFmode:
39682 case V2DImode:
39683 case V4SFmode:
39684 case V4SImode:
39685 case V16SImode:
39686 case V8DImode:
39687 case V16SFmode:
39688 case V8DFmode:
39689 return ix86_vector_duplicate_value (mode, target, val);
39690
39691 case V4HImode:
39692 if (!mmx_ok)
39693 return false;
39694 if (TARGET_SSE || TARGET_3DNOW_A)
39695 {
39696 rtx x;
39697
39698 val = gen_lowpart (SImode, val);
39699 x = gen_rtx_TRUNCATE (HImode, val);
39700 x = gen_rtx_VEC_DUPLICATE (mode, x);
39701 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39702 return true;
39703 }
39704 goto widen;
39705
39706 case V8QImode:
39707 if (!mmx_ok)
39708 return false;
39709 goto widen;
39710
39711 case V8HImode:
39712 if (TARGET_SSE2)
39713 {
39714 struct expand_vec_perm_d dperm;
39715 rtx tmp1, tmp2;
39716
39717 permute:
39718 memset (&dperm, 0, sizeof (dperm));
39719 dperm.target = target;
39720 dperm.vmode = mode;
39721 dperm.nelt = GET_MODE_NUNITS (mode);
39722 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39723 dperm.one_operand_p = true;
39724
39725 /* Extend to SImode using a paradoxical SUBREG. */
39726 tmp1 = gen_reg_rtx (SImode);
39727 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39728
39729 /* Insert the SImode value as low element of a V4SImode vector. */
39730 tmp2 = gen_reg_rtx (V4SImode);
39731 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39732 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39733
39734 ok = (expand_vec_perm_1 (&dperm)
39735 || expand_vec_perm_broadcast_1 (&dperm));
39736 gcc_assert (ok);
39737 return ok;
39738 }
39739 goto widen;
39740
39741 case V16QImode:
39742 if (TARGET_SSE2)
39743 goto permute;
39744 goto widen;
39745
39746 widen:
39747 /* Replicate the value once into the next wider mode and recurse. */
39748 {
39749 enum machine_mode smode, wsmode, wvmode;
39750 rtx x;
39751
39752 smode = GET_MODE_INNER (mode);
39753 wvmode = get_mode_wider_vector (mode);
39754 wsmode = GET_MODE_INNER (wvmode);
39755
39756 val = convert_modes (wsmode, smode, val, true);
39757 x = expand_simple_binop (wsmode, ASHIFT, val,
39758 GEN_INT (GET_MODE_BITSIZE (smode)),
39759 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39760 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39761
39762 x = gen_reg_rtx (wvmode);
39763 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39764 gcc_assert (ok);
39765 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39766 return ok;
39767 }
39768
39769 case V16HImode:
39770 case V32QImode:
39771 {
39772 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39773 rtx x = gen_reg_rtx (hvmode);
39774
39775 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39776 gcc_assert (ok);
39777
39778 x = gen_rtx_VEC_CONCAT (mode, x, x);
39779 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39780 }
39781 return true;
39782
39783 default:
39784 return false;
39785 }
39786 }
39787
39788 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39789 whose ONE_VAR element is VAR, and other elements are zero. Return true
39790 if successful. */
39791
39792 static bool
39793 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39794 rtx target, rtx var, int one_var)
39795 {
39796 enum machine_mode vsimode;
39797 rtx new_target;
39798 rtx x, tmp;
39799 bool use_vector_set = false;
39800
39801 switch (mode)
39802 {
39803 case V2DImode:
39804 /* For SSE4.1, we normally use vector set. But if the second
39805 element is zero and inter-unit moves are OK, we use movq
39806 instead. */
39807 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39808 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39809 && one_var == 0));
39810 break;
39811 case V16QImode:
39812 case V4SImode:
39813 case V4SFmode:
39814 use_vector_set = TARGET_SSE4_1;
39815 break;
39816 case V8HImode:
39817 use_vector_set = TARGET_SSE2;
39818 break;
39819 case V4HImode:
39820 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39821 break;
39822 case V32QImode:
39823 case V16HImode:
39824 case V8SImode:
39825 case V8SFmode:
39826 case V4DFmode:
39827 use_vector_set = TARGET_AVX;
39828 break;
39829 case V4DImode:
39830 /* Use ix86_expand_vector_set in 64bit mode only. */
39831 use_vector_set = TARGET_AVX && TARGET_64BIT;
39832 break;
39833 default:
39834 break;
39835 }
39836
39837 if (use_vector_set)
39838 {
39839 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39840 var = force_reg (GET_MODE_INNER (mode), var);
39841 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39842 return true;
39843 }
39844
39845 switch (mode)
39846 {
39847 case V2SFmode:
39848 case V2SImode:
39849 if (!mmx_ok)
39850 return false;
39851 /* FALLTHRU */
39852
39853 case V2DFmode:
39854 case V2DImode:
39855 if (one_var != 0)
39856 return false;
39857 var = force_reg (GET_MODE_INNER (mode), var);
39858 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39859 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39860 return true;
39861
39862 case V4SFmode:
39863 case V4SImode:
39864 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39865 new_target = gen_reg_rtx (mode);
39866 else
39867 new_target = target;
39868 var = force_reg (GET_MODE_INNER (mode), var);
39869 x = gen_rtx_VEC_DUPLICATE (mode, var);
39870 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39871 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39872 if (one_var != 0)
39873 {
39874 /* We need to shuffle the value to the correct position, so
39875 create a new pseudo to store the intermediate result. */
39876
39877 /* With SSE2, we can use the integer shuffle insns. */
39878 if (mode != V4SFmode && TARGET_SSE2)
39879 {
39880 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39881 const1_rtx,
39882 GEN_INT (one_var == 1 ? 0 : 1),
39883 GEN_INT (one_var == 2 ? 0 : 1),
39884 GEN_INT (one_var == 3 ? 0 : 1)));
39885 if (target != new_target)
39886 emit_move_insn (target, new_target);
39887 return true;
39888 }
39889
39890 /* Otherwise convert the intermediate result to V4SFmode and
39891 use the SSE1 shuffle instructions. */
39892 if (mode != V4SFmode)
39893 {
39894 tmp = gen_reg_rtx (V4SFmode);
39895 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39896 }
39897 else
39898 tmp = new_target;
39899
39900 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39901 const1_rtx,
39902 GEN_INT (one_var == 1 ? 0 : 1),
39903 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39904 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39905
39906 if (mode != V4SFmode)
39907 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39908 else if (tmp != target)
39909 emit_move_insn (target, tmp);
39910 }
39911 else if (target != new_target)
39912 emit_move_insn (target, new_target);
39913 return true;
39914
39915 case V8HImode:
39916 case V16QImode:
39917 vsimode = V4SImode;
39918 goto widen;
39919 case V4HImode:
39920 case V8QImode:
39921 if (!mmx_ok)
39922 return false;
39923 vsimode = V2SImode;
39924 goto widen;
39925 widen:
39926 if (one_var != 0)
39927 return false;
39928
39929 /* Zero extend the variable element to SImode and recurse. */
39930 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39931
39932 x = gen_reg_rtx (vsimode);
39933 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39934 var, one_var))
39935 gcc_unreachable ();
39936
39937 emit_move_insn (target, gen_lowpart (mode, x));
39938 return true;
39939
39940 default:
39941 return false;
39942 }
39943 }
39944
39945 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39946 consisting of the values in VALS. It is known that all elements
39947 except ONE_VAR are constants. Return true if successful. */
39948
39949 static bool
39950 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39951 rtx target, rtx vals, int one_var)
39952 {
39953 rtx var = XVECEXP (vals, 0, one_var);
39954 enum machine_mode wmode;
39955 rtx const_vec, x;
39956
39957 const_vec = copy_rtx (vals);
39958 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39959 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39960
39961 switch (mode)
39962 {
39963 case V2DFmode:
39964 case V2DImode:
39965 case V2SFmode:
39966 case V2SImode:
39967 /* For the two element vectors, it's just as easy to use
39968 the general case. */
39969 return false;
39970
39971 case V4DImode:
39972 /* Use ix86_expand_vector_set in 64bit mode only. */
39973 if (!TARGET_64BIT)
39974 return false;
39975 case V4DFmode:
39976 case V8SFmode:
39977 case V8SImode:
39978 case V16HImode:
39979 case V32QImode:
39980 case V4SFmode:
39981 case V4SImode:
39982 case V8HImode:
39983 case V4HImode:
39984 break;
39985
39986 case V16QImode:
39987 if (TARGET_SSE4_1)
39988 break;
39989 wmode = V8HImode;
39990 goto widen;
39991 case V8QImode:
39992 wmode = V4HImode;
39993 goto widen;
39994 widen:
39995 /* There's no way to set one QImode entry easily. Combine
39996 the variable value with its adjacent constant value, and
39997 promote to an HImode set. */
39998 x = XVECEXP (vals, 0, one_var ^ 1);
39999 if (one_var & 1)
40000 {
40001 var = convert_modes (HImode, QImode, var, true);
40002 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40003 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40004 x = GEN_INT (INTVAL (x) & 0xff);
40005 }
40006 else
40007 {
40008 var = convert_modes (HImode, QImode, var, true);
40009 x = gen_int_mode (INTVAL (x) << 8, HImode);
40010 }
40011 if (x != const0_rtx)
40012 var = expand_simple_binop (HImode, IOR, var, x, var,
40013 1, OPTAB_LIB_WIDEN);
40014
40015 x = gen_reg_rtx (wmode);
40016 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40017 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40018
40019 emit_move_insn (target, gen_lowpart (mode, x));
40020 return true;
40021
40022 default:
40023 return false;
40024 }
40025
40026 emit_move_insn (target, const_vec);
40027 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40028 return true;
40029 }
40030
40031 /* A subroutine of ix86_expand_vector_init_general. Use vector
40032 concatenate to handle the most general case: all values variable,
40033 and none identical. */
40034
40035 static void
40036 ix86_expand_vector_init_concat (enum machine_mode mode,
40037 rtx target, rtx *ops, int n)
40038 {
40039 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40040 rtx first[16], second[8], third[4];
40041 rtvec v;
40042 int i, j;
40043
40044 switch (n)
40045 {
40046 case 2:
40047 switch (mode)
40048 {
40049 case V16SImode:
40050 cmode = V8SImode;
40051 break;
40052 case V16SFmode:
40053 cmode = V8SFmode;
40054 break;
40055 case V8DImode:
40056 cmode = V4DImode;
40057 break;
40058 case V8DFmode:
40059 cmode = V4DFmode;
40060 break;
40061 case V8SImode:
40062 cmode = V4SImode;
40063 break;
40064 case V8SFmode:
40065 cmode = V4SFmode;
40066 break;
40067 case V4DImode:
40068 cmode = V2DImode;
40069 break;
40070 case V4DFmode:
40071 cmode = V2DFmode;
40072 break;
40073 case V4SImode:
40074 cmode = V2SImode;
40075 break;
40076 case V4SFmode:
40077 cmode = V2SFmode;
40078 break;
40079 case V2DImode:
40080 cmode = DImode;
40081 break;
40082 case V2SImode:
40083 cmode = SImode;
40084 break;
40085 case V2DFmode:
40086 cmode = DFmode;
40087 break;
40088 case V2SFmode:
40089 cmode = SFmode;
40090 break;
40091 default:
40092 gcc_unreachable ();
40093 }
40094
40095 if (!register_operand (ops[1], cmode))
40096 ops[1] = force_reg (cmode, ops[1]);
40097 if (!register_operand (ops[0], cmode))
40098 ops[0] = force_reg (cmode, ops[0]);
40099 emit_insn (gen_rtx_SET (VOIDmode, target,
40100 gen_rtx_VEC_CONCAT (mode, ops[0],
40101 ops[1])));
40102 break;
40103
40104 case 4:
40105 switch (mode)
40106 {
40107 case V4DImode:
40108 cmode = V2DImode;
40109 break;
40110 case V4DFmode:
40111 cmode = V2DFmode;
40112 break;
40113 case V4SImode:
40114 cmode = V2SImode;
40115 break;
40116 case V4SFmode:
40117 cmode = V2SFmode;
40118 break;
40119 default:
40120 gcc_unreachable ();
40121 }
40122 goto half;
40123
40124 case 8:
40125 switch (mode)
40126 {
40127 case V8DImode:
40128 cmode = V2DImode;
40129 hmode = V4DImode;
40130 break;
40131 case V8DFmode:
40132 cmode = V2DFmode;
40133 hmode = V4DFmode;
40134 break;
40135 case V8SImode:
40136 cmode = V2SImode;
40137 hmode = V4SImode;
40138 break;
40139 case V8SFmode:
40140 cmode = V2SFmode;
40141 hmode = V4SFmode;
40142 break;
40143 default:
40144 gcc_unreachable ();
40145 }
40146 goto half;
40147
40148 case 16:
40149 switch (mode)
40150 {
40151 case V16SImode:
40152 cmode = V2SImode;
40153 hmode = V4SImode;
40154 gmode = V8SImode;
40155 break;
40156 case V16SFmode:
40157 cmode = V2SFmode;
40158 hmode = V4SFmode;
40159 gmode = V8SFmode;
40160 break;
40161 default:
40162 gcc_unreachable ();
40163 }
40164 goto half;
40165
40166 half:
40167 /* FIXME: We process inputs backward to help RA. PR 36222. */
40168 i = n - 1;
40169 j = (n >> 1) - 1;
40170 for (; i > 0; i -= 2, j--)
40171 {
40172 first[j] = gen_reg_rtx (cmode);
40173 v = gen_rtvec (2, ops[i - 1], ops[i]);
40174 ix86_expand_vector_init (false, first[j],
40175 gen_rtx_PARALLEL (cmode, v));
40176 }
40177
40178 n >>= 1;
40179 if (n > 4)
40180 {
40181 gcc_assert (hmode != VOIDmode);
40182 gcc_assert (gmode != VOIDmode);
40183 for (i = j = 0; i < n; i += 2, j++)
40184 {
40185 second[j] = gen_reg_rtx (hmode);
40186 ix86_expand_vector_init_concat (hmode, second [j],
40187 &first [i], 2);
40188 }
40189 n >>= 1;
40190 for (i = j = 0; i < n; i += 2, j++)
40191 {
40192 third[j] = gen_reg_rtx (gmode);
40193 ix86_expand_vector_init_concat (gmode, third[j],
40194 &second[i], 2);
40195 }
40196 n >>= 1;
40197 ix86_expand_vector_init_concat (mode, target, third, n);
40198 }
40199 else if (n > 2)
40200 {
40201 gcc_assert (hmode != VOIDmode);
40202 for (i = j = 0; i < n; i += 2, j++)
40203 {
40204 second[j] = gen_reg_rtx (hmode);
40205 ix86_expand_vector_init_concat (hmode, second [j],
40206 &first [i], 2);
40207 }
40208 n >>= 1;
40209 ix86_expand_vector_init_concat (mode, target, second, n);
40210 }
40211 else
40212 ix86_expand_vector_init_concat (mode, target, first, n);
40213 break;
40214
40215 default:
40216 gcc_unreachable ();
40217 }
40218 }
40219
40220 /* A subroutine of ix86_expand_vector_init_general. Use vector
40221 interleave to handle the most general case: all values variable,
40222 and none identical. */
40223
40224 static void
40225 ix86_expand_vector_init_interleave (enum machine_mode mode,
40226 rtx target, rtx *ops, int n)
40227 {
40228 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40229 int i, j;
40230 rtx op0, op1;
40231 rtx (*gen_load_even) (rtx, rtx, rtx);
40232 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40233 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40234
40235 switch (mode)
40236 {
40237 case V8HImode:
40238 gen_load_even = gen_vec_setv8hi;
40239 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40240 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40241 inner_mode = HImode;
40242 first_imode = V4SImode;
40243 second_imode = V2DImode;
40244 third_imode = VOIDmode;
40245 break;
40246 case V16QImode:
40247 gen_load_even = gen_vec_setv16qi;
40248 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40249 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40250 inner_mode = QImode;
40251 first_imode = V8HImode;
40252 second_imode = V4SImode;
40253 third_imode = V2DImode;
40254 break;
40255 default:
40256 gcc_unreachable ();
40257 }
40258
40259 for (i = 0; i < n; i++)
40260 {
40261 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40262 op0 = gen_reg_rtx (SImode);
40263 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40264
40265 /* Insert the SImode value as low element of V4SImode vector. */
40266 op1 = gen_reg_rtx (V4SImode);
40267 op0 = gen_rtx_VEC_MERGE (V4SImode,
40268 gen_rtx_VEC_DUPLICATE (V4SImode,
40269 op0),
40270 CONST0_RTX (V4SImode),
40271 const1_rtx);
40272 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40273
40274 /* Cast the V4SImode vector back to a vector in orignal mode. */
40275 op0 = gen_reg_rtx (mode);
40276 emit_move_insn (op0, gen_lowpart (mode, op1));
40277
40278 /* Load even elements into the second position. */
40279 emit_insn (gen_load_even (op0,
40280 force_reg (inner_mode,
40281 ops [i + i + 1]),
40282 const1_rtx));
40283
40284 /* Cast vector to FIRST_IMODE vector. */
40285 ops[i] = gen_reg_rtx (first_imode);
40286 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40287 }
40288
40289 /* Interleave low FIRST_IMODE vectors. */
40290 for (i = j = 0; i < n; i += 2, j++)
40291 {
40292 op0 = gen_reg_rtx (first_imode);
40293 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40294
40295 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40296 ops[j] = gen_reg_rtx (second_imode);
40297 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40298 }
40299
40300 /* Interleave low SECOND_IMODE vectors. */
40301 switch (second_imode)
40302 {
40303 case V4SImode:
40304 for (i = j = 0; i < n / 2; i += 2, j++)
40305 {
40306 op0 = gen_reg_rtx (second_imode);
40307 emit_insn (gen_interleave_second_low (op0, ops[i],
40308 ops[i + 1]));
40309
40310 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40311 vector. */
40312 ops[j] = gen_reg_rtx (third_imode);
40313 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40314 }
40315 second_imode = V2DImode;
40316 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40317 /* FALLTHRU */
40318
40319 case V2DImode:
40320 op0 = gen_reg_rtx (second_imode);
40321 emit_insn (gen_interleave_second_low (op0, ops[0],
40322 ops[1]));
40323
40324 /* Cast the SECOND_IMODE vector back to a vector on original
40325 mode. */
40326 emit_insn (gen_rtx_SET (VOIDmode, target,
40327 gen_lowpart (mode, op0)));
40328 break;
40329
40330 default:
40331 gcc_unreachable ();
40332 }
40333 }
40334
40335 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40336 all values variable, and none identical. */
40337
40338 static void
40339 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40340 rtx target, rtx vals)
40341 {
40342 rtx ops[64], op0, op1;
40343 enum machine_mode half_mode = VOIDmode;
40344 int n, i;
40345
40346 switch (mode)
40347 {
40348 case V2SFmode:
40349 case V2SImode:
40350 if (!mmx_ok && !TARGET_SSE)
40351 break;
40352 /* FALLTHRU */
40353
40354 case V16SImode:
40355 case V16SFmode:
40356 case V8DFmode:
40357 case V8DImode:
40358 case V8SFmode:
40359 case V8SImode:
40360 case V4DFmode:
40361 case V4DImode:
40362 case V4SFmode:
40363 case V4SImode:
40364 case V2DFmode:
40365 case V2DImode:
40366 n = GET_MODE_NUNITS (mode);
40367 for (i = 0; i < n; i++)
40368 ops[i] = XVECEXP (vals, 0, i);
40369 ix86_expand_vector_init_concat (mode, target, ops, n);
40370 return;
40371
40372 case V32QImode:
40373 half_mode = V16QImode;
40374 goto half;
40375
40376 case V16HImode:
40377 half_mode = V8HImode;
40378 goto half;
40379
40380 half:
40381 n = GET_MODE_NUNITS (mode);
40382 for (i = 0; i < n; i++)
40383 ops[i] = XVECEXP (vals, 0, i);
40384 op0 = gen_reg_rtx (half_mode);
40385 op1 = gen_reg_rtx (half_mode);
40386 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40387 n >> 2);
40388 ix86_expand_vector_init_interleave (half_mode, op1,
40389 &ops [n >> 1], n >> 2);
40390 emit_insn (gen_rtx_SET (VOIDmode, target,
40391 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40392 return;
40393
40394 case V16QImode:
40395 if (!TARGET_SSE4_1)
40396 break;
40397 /* FALLTHRU */
40398
40399 case V8HImode:
40400 if (!TARGET_SSE2)
40401 break;
40402
40403 /* Don't use ix86_expand_vector_init_interleave if we can't
40404 move from GPR to SSE register directly. */
40405 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40406 break;
40407
40408 n = GET_MODE_NUNITS (mode);
40409 for (i = 0; i < n; i++)
40410 ops[i] = XVECEXP (vals, 0, i);
40411 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40412 return;
40413
40414 case V4HImode:
40415 case V8QImode:
40416 break;
40417
40418 default:
40419 gcc_unreachable ();
40420 }
40421
40422 {
40423 int i, j, n_elts, n_words, n_elt_per_word;
40424 enum machine_mode inner_mode;
40425 rtx words[4], shift;
40426
40427 inner_mode = GET_MODE_INNER (mode);
40428 n_elts = GET_MODE_NUNITS (mode);
40429 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40430 n_elt_per_word = n_elts / n_words;
40431 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40432
40433 for (i = 0; i < n_words; ++i)
40434 {
40435 rtx word = NULL_RTX;
40436
40437 for (j = 0; j < n_elt_per_word; ++j)
40438 {
40439 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40440 elt = convert_modes (word_mode, inner_mode, elt, true);
40441
40442 if (j == 0)
40443 word = elt;
40444 else
40445 {
40446 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40447 word, 1, OPTAB_LIB_WIDEN);
40448 word = expand_simple_binop (word_mode, IOR, word, elt,
40449 word, 1, OPTAB_LIB_WIDEN);
40450 }
40451 }
40452
40453 words[i] = word;
40454 }
40455
40456 if (n_words == 1)
40457 emit_move_insn (target, gen_lowpart (mode, words[0]));
40458 else if (n_words == 2)
40459 {
40460 rtx tmp = gen_reg_rtx (mode);
40461 emit_clobber (tmp);
40462 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40463 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40464 emit_move_insn (target, tmp);
40465 }
40466 else if (n_words == 4)
40467 {
40468 rtx tmp = gen_reg_rtx (V4SImode);
40469 gcc_assert (word_mode == SImode);
40470 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40471 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40472 emit_move_insn (target, gen_lowpart (mode, tmp));
40473 }
40474 else
40475 gcc_unreachable ();
40476 }
40477 }
40478
40479 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40480 instructions unless MMX_OK is true. */
40481
40482 void
40483 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40484 {
40485 enum machine_mode mode = GET_MODE (target);
40486 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40487 int n_elts = GET_MODE_NUNITS (mode);
40488 int n_var = 0, one_var = -1;
40489 bool all_same = true, all_const_zero = true;
40490 int i;
40491 rtx x;
40492
40493 for (i = 0; i < n_elts; ++i)
40494 {
40495 x = XVECEXP (vals, 0, i);
40496 if (!(CONST_INT_P (x)
40497 || GET_CODE (x) == CONST_DOUBLE
40498 || GET_CODE (x) == CONST_FIXED))
40499 n_var++, one_var = i;
40500 else if (x != CONST0_RTX (inner_mode))
40501 all_const_zero = false;
40502 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40503 all_same = false;
40504 }
40505
40506 /* Constants are best loaded from the constant pool. */
40507 if (n_var == 0)
40508 {
40509 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40510 return;
40511 }
40512
40513 /* If all values are identical, broadcast the value. */
40514 if (all_same
40515 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40516 XVECEXP (vals, 0, 0)))
40517 return;
40518
40519 /* Values where only one field is non-constant are best loaded from
40520 the pool and overwritten via move later. */
40521 if (n_var == 1)
40522 {
40523 if (all_const_zero
40524 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40525 XVECEXP (vals, 0, one_var),
40526 one_var))
40527 return;
40528
40529 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40530 return;
40531 }
40532
40533 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40534 }
40535
40536 void
40537 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40538 {
40539 enum machine_mode mode = GET_MODE (target);
40540 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40541 enum machine_mode half_mode;
40542 bool use_vec_merge = false;
40543 rtx tmp;
40544 static rtx (*gen_extract[6][2]) (rtx, rtx)
40545 = {
40546 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40547 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40548 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40549 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40550 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40551 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40552 };
40553 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40554 = {
40555 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40556 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40557 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40558 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40559 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40560 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40561 };
40562 int i, j, n;
40563
40564 switch (mode)
40565 {
40566 case V2SFmode:
40567 case V2SImode:
40568 if (mmx_ok)
40569 {
40570 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40571 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40572 if (elt == 0)
40573 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40574 else
40575 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40576 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40577 return;
40578 }
40579 break;
40580
40581 case V2DImode:
40582 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40583 if (use_vec_merge)
40584 break;
40585
40586 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40587 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40588 if (elt == 0)
40589 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40590 else
40591 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40592 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40593 return;
40594
40595 case V2DFmode:
40596 {
40597 rtx op0, op1;
40598
40599 /* For the two element vectors, we implement a VEC_CONCAT with
40600 the extraction of the other element. */
40601
40602 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40603 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40604
40605 if (elt == 0)
40606 op0 = val, op1 = tmp;
40607 else
40608 op0 = tmp, op1 = val;
40609
40610 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40611 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40612 }
40613 return;
40614
40615 case V4SFmode:
40616 use_vec_merge = TARGET_SSE4_1;
40617 if (use_vec_merge)
40618 break;
40619
40620 switch (elt)
40621 {
40622 case 0:
40623 use_vec_merge = true;
40624 break;
40625
40626 case 1:
40627 /* tmp = target = A B C D */
40628 tmp = copy_to_reg (target);
40629 /* target = A A B B */
40630 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40631 /* target = X A B B */
40632 ix86_expand_vector_set (false, target, val, 0);
40633 /* target = A X C D */
40634 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40635 const1_rtx, const0_rtx,
40636 GEN_INT (2+4), GEN_INT (3+4)));
40637 return;
40638
40639 case 2:
40640 /* tmp = target = A B C D */
40641 tmp = copy_to_reg (target);
40642 /* tmp = X B C D */
40643 ix86_expand_vector_set (false, tmp, val, 0);
40644 /* target = A B X D */
40645 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40646 const0_rtx, const1_rtx,
40647 GEN_INT (0+4), GEN_INT (3+4)));
40648 return;
40649
40650 case 3:
40651 /* tmp = target = A B C D */
40652 tmp = copy_to_reg (target);
40653 /* tmp = X B C D */
40654 ix86_expand_vector_set (false, tmp, val, 0);
40655 /* target = A B X D */
40656 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40657 const0_rtx, const1_rtx,
40658 GEN_INT (2+4), GEN_INT (0+4)));
40659 return;
40660
40661 default:
40662 gcc_unreachable ();
40663 }
40664 break;
40665
40666 case V4SImode:
40667 use_vec_merge = TARGET_SSE4_1;
40668 if (use_vec_merge)
40669 break;
40670
40671 /* Element 0 handled by vec_merge below. */
40672 if (elt == 0)
40673 {
40674 use_vec_merge = true;
40675 break;
40676 }
40677
40678 if (TARGET_SSE2)
40679 {
40680 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40681 store into element 0, then shuffle them back. */
40682
40683 rtx order[4];
40684
40685 order[0] = GEN_INT (elt);
40686 order[1] = const1_rtx;
40687 order[2] = const2_rtx;
40688 order[3] = GEN_INT (3);
40689 order[elt] = const0_rtx;
40690
40691 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40692 order[1], order[2], order[3]));
40693
40694 ix86_expand_vector_set (false, target, val, 0);
40695
40696 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40697 order[1], order[2], order[3]));
40698 }
40699 else
40700 {
40701 /* For SSE1, we have to reuse the V4SF code. */
40702 rtx t = gen_reg_rtx (V4SFmode);
40703 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40704 emit_move_insn (target, gen_lowpart (mode, t));
40705 }
40706 return;
40707
40708 case V8HImode:
40709 use_vec_merge = TARGET_SSE2;
40710 break;
40711 case V4HImode:
40712 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40713 break;
40714
40715 case V16QImode:
40716 use_vec_merge = TARGET_SSE4_1;
40717 break;
40718
40719 case V8QImode:
40720 break;
40721
40722 case V32QImode:
40723 half_mode = V16QImode;
40724 j = 0;
40725 n = 16;
40726 goto half;
40727
40728 case V16HImode:
40729 half_mode = V8HImode;
40730 j = 1;
40731 n = 8;
40732 goto half;
40733
40734 case V8SImode:
40735 half_mode = V4SImode;
40736 j = 2;
40737 n = 4;
40738 goto half;
40739
40740 case V4DImode:
40741 half_mode = V2DImode;
40742 j = 3;
40743 n = 2;
40744 goto half;
40745
40746 case V8SFmode:
40747 half_mode = V4SFmode;
40748 j = 4;
40749 n = 4;
40750 goto half;
40751
40752 case V4DFmode:
40753 half_mode = V2DFmode;
40754 j = 5;
40755 n = 2;
40756 goto half;
40757
40758 half:
40759 /* Compute offset. */
40760 i = elt / n;
40761 elt %= n;
40762
40763 gcc_assert (i <= 1);
40764
40765 /* Extract the half. */
40766 tmp = gen_reg_rtx (half_mode);
40767 emit_insn (gen_extract[j][i] (tmp, target));
40768
40769 /* Put val in tmp at elt. */
40770 ix86_expand_vector_set (false, tmp, val, elt);
40771
40772 /* Put it back. */
40773 emit_insn (gen_insert[j][i] (target, target, tmp));
40774 return;
40775
40776 default:
40777 break;
40778 }
40779
40780 if (use_vec_merge)
40781 {
40782 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40783 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40784 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40785 }
40786 else
40787 {
40788 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40789
40790 emit_move_insn (mem, target);
40791
40792 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40793 emit_move_insn (tmp, val);
40794
40795 emit_move_insn (target, mem);
40796 }
40797 }
40798
40799 void
40800 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40801 {
40802 enum machine_mode mode = GET_MODE (vec);
40803 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40804 bool use_vec_extr = false;
40805 rtx tmp;
40806
40807 switch (mode)
40808 {
40809 case V2SImode:
40810 case V2SFmode:
40811 if (!mmx_ok)
40812 break;
40813 /* FALLTHRU */
40814
40815 case V2DFmode:
40816 case V2DImode:
40817 use_vec_extr = true;
40818 break;
40819
40820 case V4SFmode:
40821 use_vec_extr = TARGET_SSE4_1;
40822 if (use_vec_extr)
40823 break;
40824
40825 switch (elt)
40826 {
40827 case 0:
40828 tmp = vec;
40829 break;
40830
40831 case 1:
40832 case 3:
40833 tmp = gen_reg_rtx (mode);
40834 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40835 GEN_INT (elt), GEN_INT (elt),
40836 GEN_INT (elt+4), GEN_INT (elt+4)));
40837 break;
40838
40839 case 2:
40840 tmp = gen_reg_rtx (mode);
40841 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40842 break;
40843
40844 default:
40845 gcc_unreachable ();
40846 }
40847 vec = tmp;
40848 use_vec_extr = true;
40849 elt = 0;
40850 break;
40851
40852 case V4SImode:
40853 use_vec_extr = TARGET_SSE4_1;
40854 if (use_vec_extr)
40855 break;
40856
40857 if (TARGET_SSE2)
40858 {
40859 switch (elt)
40860 {
40861 case 0:
40862 tmp = vec;
40863 break;
40864
40865 case 1:
40866 case 3:
40867 tmp = gen_reg_rtx (mode);
40868 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40869 GEN_INT (elt), GEN_INT (elt),
40870 GEN_INT (elt), GEN_INT (elt)));
40871 break;
40872
40873 case 2:
40874 tmp = gen_reg_rtx (mode);
40875 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40876 break;
40877
40878 default:
40879 gcc_unreachable ();
40880 }
40881 vec = tmp;
40882 use_vec_extr = true;
40883 elt = 0;
40884 }
40885 else
40886 {
40887 /* For SSE1, we have to reuse the V4SF code. */
40888 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40889 gen_lowpart (V4SFmode, vec), elt);
40890 return;
40891 }
40892 break;
40893
40894 case V8HImode:
40895 use_vec_extr = TARGET_SSE2;
40896 break;
40897 case V4HImode:
40898 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40899 break;
40900
40901 case V16QImode:
40902 use_vec_extr = TARGET_SSE4_1;
40903 break;
40904
40905 case V8SFmode:
40906 if (TARGET_AVX)
40907 {
40908 tmp = gen_reg_rtx (V4SFmode);
40909 if (elt < 4)
40910 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40911 else
40912 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40913 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40914 return;
40915 }
40916 break;
40917
40918 case V4DFmode:
40919 if (TARGET_AVX)
40920 {
40921 tmp = gen_reg_rtx (V2DFmode);
40922 if (elt < 2)
40923 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40924 else
40925 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40926 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40927 return;
40928 }
40929 break;
40930
40931 case V32QImode:
40932 if (TARGET_AVX)
40933 {
40934 tmp = gen_reg_rtx (V16QImode);
40935 if (elt < 16)
40936 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40937 else
40938 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40939 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40940 return;
40941 }
40942 break;
40943
40944 case V16HImode:
40945 if (TARGET_AVX)
40946 {
40947 tmp = gen_reg_rtx (V8HImode);
40948 if (elt < 8)
40949 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40950 else
40951 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40952 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40953 return;
40954 }
40955 break;
40956
40957 case V8SImode:
40958 if (TARGET_AVX)
40959 {
40960 tmp = gen_reg_rtx (V4SImode);
40961 if (elt < 4)
40962 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40963 else
40964 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40965 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40966 return;
40967 }
40968 break;
40969
40970 case V4DImode:
40971 if (TARGET_AVX)
40972 {
40973 tmp = gen_reg_rtx (V2DImode);
40974 if (elt < 2)
40975 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40976 else
40977 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40978 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40979 return;
40980 }
40981 break;
40982
40983 case V32HImode:
40984 if (TARGET_AVX512BW)
40985 {
40986 tmp = gen_reg_rtx (V16HImode);
40987 if (elt < 16)
40988 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
40989 else
40990 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
40991 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40992 return;
40993 }
40994 break;
40995
40996 case V64QImode:
40997 if (TARGET_AVX512BW)
40998 {
40999 tmp = gen_reg_rtx (V32QImode);
41000 if (elt < 32)
41001 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
41002 else
41003 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
41004 ix86_expand_vector_extract (false, target, tmp, elt & 31);
41005 return;
41006 }
41007 break;
41008
41009 case V16SFmode:
41010 tmp = gen_reg_rtx (V8SFmode);
41011 if (elt < 8)
41012 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41013 else
41014 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41015 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41016 return;
41017
41018 case V8DFmode:
41019 tmp = gen_reg_rtx (V4DFmode);
41020 if (elt < 4)
41021 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41022 else
41023 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41024 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41025 return;
41026
41027 case V16SImode:
41028 tmp = gen_reg_rtx (V8SImode);
41029 if (elt < 8)
41030 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41031 else
41032 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41033 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41034 return;
41035
41036 case V8DImode:
41037 tmp = gen_reg_rtx (V4DImode);
41038 if (elt < 4)
41039 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41040 else
41041 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41042 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41043 return;
41044
41045 case V8QImode:
41046 /* ??? Could extract the appropriate HImode element and shift. */
41047 default:
41048 break;
41049 }
41050
41051 if (use_vec_extr)
41052 {
41053 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41054 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41055
41056 /* Let the rtl optimizers know about the zero extension performed. */
41057 if (inner_mode == QImode || inner_mode == HImode)
41058 {
41059 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41060 target = gen_lowpart (SImode, target);
41061 }
41062
41063 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41064 }
41065 else
41066 {
41067 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41068
41069 emit_move_insn (mem, vec);
41070
41071 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41072 emit_move_insn (target, tmp);
41073 }
41074 }
41075
41076 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41077 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41078 The upper bits of DEST are undefined, though they shouldn't cause
41079 exceptions (some bits from src or all zeros are ok). */
41080
41081 static void
41082 emit_reduc_half (rtx dest, rtx src, int i)
41083 {
41084 rtx tem, d = dest;
41085 switch (GET_MODE (src))
41086 {
41087 case V4SFmode:
41088 if (i == 128)
41089 tem = gen_sse_movhlps (dest, src, src);
41090 else
41091 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41092 GEN_INT (1 + 4), GEN_INT (1 + 4));
41093 break;
41094 case V2DFmode:
41095 tem = gen_vec_interleave_highv2df (dest, src, src);
41096 break;
41097 case V16QImode:
41098 case V8HImode:
41099 case V4SImode:
41100 case V2DImode:
41101 d = gen_reg_rtx (V1TImode);
41102 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41103 GEN_INT (i / 2));
41104 break;
41105 case V8SFmode:
41106 if (i == 256)
41107 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41108 else
41109 tem = gen_avx_shufps256 (dest, src, src,
41110 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41111 break;
41112 case V4DFmode:
41113 if (i == 256)
41114 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41115 else
41116 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41117 break;
41118 case V32QImode:
41119 case V16HImode:
41120 case V8SImode:
41121 case V4DImode:
41122 if (i == 256)
41123 {
41124 if (GET_MODE (dest) != V4DImode)
41125 d = gen_reg_rtx (V4DImode);
41126 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41127 gen_lowpart (V4DImode, src),
41128 const1_rtx);
41129 }
41130 else
41131 {
41132 d = gen_reg_rtx (V2TImode);
41133 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41134 GEN_INT (i / 2));
41135 }
41136 break;
41137 case V16SImode:
41138 case V16SFmode:
41139 case V8DImode:
41140 case V8DFmode:
41141 if (i > 128)
41142 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41143 gen_lowpart (V16SImode, src),
41144 gen_lowpart (V16SImode, src),
41145 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41146 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41147 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41148 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41149 GEN_INT (0xC), GEN_INT (0xD),
41150 GEN_INT (0xE), GEN_INT (0xF),
41151 GEN_INT (0x10), GEN_INT (0x11),
41152 GEN_INT (0x12), GEN_INT (0x13),
41153 GEN_INT (0x14), GEN_INT (0x15),
41154 GEN_INT (0x16), GEN_INT (0x17));
41155 else
41156 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41157 gen_lowpart (V16SImode, src),
41158 GEN_INT (i == 128 ? 0x2 : 0x1),
41159 GEN_INT (0x3),
41160 GEN_INT (0x3),
41161 GEN_INT (0x3),
41162 GEN_INT (i == 128 ? 0x6 : 0x5),
41163 GEN_INT (0x7),
41164 GEN_INT (0x7),
41165 GEN_INT (0x7),
41166 GEN_INT (i == 128 ? 0xA : 0x9),
41167 GEN_INT (0xB),
41168 GEN_INT (0xB),
41169 GEN_INT (0xB),
41170 GEN_INT (i == 128 ? 0xE : 0xD),
41171 GEN_INT (0xF),
41172 GEN_INT (0xF),
41173 GEN_INT (0xF));
41174 break;
41175 default:
41176 gcc_unreachable ();
41177 }
41178 emit_insn (tem);
41179 if (d != dest)
41180 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41181 }
41182
41183 /* Expand a vector reduction. FN is the binary pattern to reduce;
41184 DEST is the destination; IN is the input vector. */
41185
41186 void
41187 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41188 {
41189 rtx half, dst, vec = in;
41190 enum machine_mode mode = GET_MODE (in);
41191 int i;
41192
41193 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41194 if (TARGET_SSE4_1
41195 && mode == V8HImode
41196 && fn == gen_uminv8hi3)
41197 {
41198 emit_insn (gen_sse4_1_phminposuw (dest, in));
41199 return;
41200 }
41201
41202 for (i = GET_MODE_BITSIZE (mode);
41203 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41204 i >>= 1)
41205 {
41206 half = gen_reg_rtx (mode);
41207 emit_reduc_half (half, vec, i);
41208 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41209 dst = dest;
41210 else
41211 dst = gen_reg_rtx (mode);
41212 emit_insn (fn (dst, half, vec));
41213 vec = dst;
41214 }
41215 }
41216 \f
41217 /* Target hook for scalar_mode_supported_p. */
41218 static bool
41219 ix86_scalar_mode_supported_p (enum machine_mode mode)
41220 {
41221 if (DECIMAL_FLOAT_MODE_P (mode))
41222 return default_decimal_float_supported_p ();
41223 else if (mode == TFmode)
41224 return true;
41225 else
41226 return default_scalar_mode_supported_p (mode);
41227 }
41228
41229 /* Implements target hook vector_mode_supported_p. */
41230 static bool
41231 ix86_vector_mode_supported_p (enum machine_mode mode)
41232 {
41233 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41234 return true;
41235 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41236 return true;
41237 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41238 return true;
41239 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41240 return true;
41241 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41242 return true;
41243 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41244 return true;
41245 return false;
41246 }
41247
41248 /* Implement target hook libgcc_floating_mode_supported_p. */
41249 static bool
41250 ix86_libgcc_floating_mode_supported_p (enum machine_mode mode)
41251 {
41252 switch (mode)
41253 {
41254 case SFmode:
41255 case DFmode:
41256 case XFmode:
41257 return true;
41258
41259 case TFmode:
41260 #ifdef IX86_NO_LIBGCC_TFMODE
41261 return false;
41262 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
41263 return TARGET_LONG_DOUBLE_128;
41264 #else
41265 return true;
41266 #endif
41267
41268 default:
41269 return false;
41270 }
41271 }
41272
41273 /* Target hook for c_mode_for_suffix. */
41274 static enum machine_mode
41275 ix86_c_mode_for_suffix (char suffix)
41276 {
41277 if (suffix == 'q')
41278 return TFmode;
41279 if (suffix == 'w')
41280 return XFmode;
41281
41282 return VOIDmode;
41283 }
41284
41285 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41286
41287 We do this in the new i386 backend to maintain source compatibility
41288 with the old cc0-based compiler. */
41289
41290 static tree
41291 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41292 {
41293 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41294 clobbers);
41295 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41296 clobbers);
41297 return clobbers;
41298 }
41299
41300 /* Implements target vector targetm.asm.encode_section_info. */
41301
41302 static void ATTRIBUTE_UNUSED
41303 ix86_encode_section_info (tree decl, rtx rtl, int first)
41304 {
41305 default_encode_section_info (decl, rtl, first);
41306
41307 if (TREE_CODE (decl) == VAR_DECL
41308 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41309 && ix86_in_large_data_p (decl))
41310 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41311 }
41312
41313 /* Worker function for REVERSE_CONDITION. */
41314
41315 enum rtx_code
41316 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41317 {
41318 return (mode != CCFPmode && mode != CCFPUmode
41319 ? reverse_condition (code)
41320 : reverse_condition_maybe_unordered (code));
41321 }
41322
41323 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41324 to OPERANDS[0]. */
41325
41326 const char *
41327 output_387_reg_move (rtx insn, rtx *operands)
41328 {
41329 if (REG_P (operands[0]))
41330 {
41331 if (REG_P (operands[1])
41332 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41333 {
41334 if (REGNO (operands[0]) == FIRST_STACK_REG)
41335 return output_387_ffreep (operands, 0);
41336 return "fstp\t%y0";
41337 }
41338 if (STACK_TOP_P (operands[0]))
41339 return "fld%Z1\t%y1";
41340 return "fst\t%y0";
41341 }
41342 else if (MEM_P (operands[0]))
41343 {
41344 gcc_assert (REG_P (operands[1]));
41345 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41346 return "fstp%Z0\t%y0";
41347 else
41348 {
41349 /* There is no non-popping store to memory for XFmode.
41350 So if we need one, follow the store with a load. */
41351 if (GET_MODE (operands[0]) == XFmode)
41352 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41353 else
41354 return "fst%Z0\t%y0";
41355 }
41356 }
41357 else
41358 gcc_unreachable();
41359 }
41360
41361 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41362 FP status register is set. */
41363
41364 void
41365 ix86_emit_fp_unordered_jump (rtx label)
41366 {
41367 rtx reg = gen_reg_rtx (HImode);
41368 rtx temp;
41369
41370 emit_insn (gen_x86_fnstsw_1 (reg));
41371
41372 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41373 {
41374 emit_insn (gen_x86_sahf_1 (reg));
41375
41376 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41377 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41378 }
41379 else
41380 {
41381 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41382
41383 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41384 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41385 }
41386
41387 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41388 gen_rtx_LABEL_REF (VOIDmode, label),
41389 pc_rtx);
41390 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41391
41392 emit_jump_insn (temp);
41393 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41394 }
41395
41396 /* Output code to perform a log1p XFmode calculation. */
41397
41398 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41399 {
41400 rtx_code_label *label1 = gen_label_rtx ();
41401 rtx_code_label *label2 = gen_label_rtx ();
41402
41403 rtx tmp = gen_reg_rtx (XFmode);
41404 rtx tmp2 = gen_reg_rtx (XFmode);
41405 rtx test;
41406
41407 emit_insn (gen_absxf2 (tmp, op1));
41408 test = gen_rtx_GE (VOIDmode, tmp,
41409 CONST_DOUBLE_FROM_REAL_VALUE (
41410 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41411 XFmode));
41412 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41413
41414 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41415 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41416 emit_jump (label2);
41417
41418 emit_label (label1);
41419 emit_move_insn (tmp, CONST1_RTX (XFmode));
41420 emit_insn (gen_addxf3 (tmp, op1, tmp));
41421 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41422 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41423
41424 emit_label (label2);
41425 }
41426
41427 /* Emit code for round calculation. */
41428 void ix86_emit_i387_round (rtx op0, rtx op1)
41429 {
41430 enum machine_mode inmode = GET_MODE (op1);
41431 enum machine_mode outmode = GET_MODE (op0);
41432 rtx e1, e2, res, tmp, tmp1, half;
41433 rtx scratch = gen_reg_rtx (HImode);
41434 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41435 rtx_code_label *jump_label = gen_label_rtx ();
41436 rtx insn;
41437 rtx (*gen_abs) (rtx, rtx);
41438 rtx (*gen_neg) (rtx, rtx);
41439
41440 switch (inmode)
41441 {
41442 case SFmode:
41443 gen_abs = gen_abssf2;
41444 break;
41445 case DFmode:
41446 gen_abs = gen_absdf2;
41447 break;
41448 case XFmode:
41449 gen_abs = gen_absxf2;
41450 break;
41451 default:
41452 gcc_unreachable ();
41453 }
41454
41455 switch (outmode)
41456 {
41457 case SFmode:
41458 gen_neg = gen_negsf2;
41459 break;
41460 case DFmode:
41461 gen_neg = gen_negdf2;
41462 break;
41463 case XFmode:
41464 gen_neg = gen_negxf2;
41465 break;
41466 case HImode:
41467 gen_neg = gen_neghi2;
41468 break;
41469 case SImode:
41470 gen_neg = gen_negsi2;
41471 break;
41472 case DImode:
41473 gen_neg = gen_negdi2;
41474 break;
41475 default:
41476 gcc_unreachable ();
41477 }
41478
41479 e1 = gen_reg_rtx (inmode);
41480 e2 = gen_reg_rtx (inmode);
41481 res = gen_reg_rtx (outmode);
41482
41483 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41484
41485 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41486
41487 /* scratch = fxam(op1) */
41488 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41489 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41490 UNSPEC_FXAM)));
41491 /* e1 = fabs(op1) */
41492 emit_insn (gen_abs (e1, op1));
41493
41494 /* e2 = e1 + 0.5 */
41495 half = force_reg (inmode, half);
41496 emit_insn (gen_rtx_SET (VOIDmode, e2,
41497 gen_rtx_PLUS (inmode, e1, half)));
41498
41499 /* res = floor(e2) */
41500 if (inmode != XFmode)
41501 {
41502 tmp1 = gen_reg_rtx (XFmode);
41503
41504 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41505 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41506 }
41507 else
41508 tmp1 = e2;
41509
41510 switch (outmode)
41511 {
41512 case SFmode:
41513 case DFmode:
41514 {
41515 rtx tmp0 = gen_reg_rtx (XFmode);
41516
41517 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41518
41519 emit_insn (gen_rtx_SET (VOIDmode, res,
41520 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41521 UNSPEC_TRUNC_NOOP)));
41522 }
41523 break;
41524 case XFmode:
41525 emit_insn (gen_frndintxf2_floor (res, tmp1));
41526 break;
41527 case HImode:
41528 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41529 break;
41530 case SImode:
41531 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41532 break;
41533 case DImode:
41534 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41535 break;
41536 default:
41537 gcc_unreachable ();
41538 }
41539
41540 /* flags = signbit(a) */
41541 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41542
41543 /* if (flags) then res = -res */
41544 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41545 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41546 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41547 pc_rtx);
41548 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41549 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41550 JUMP_LABEL (insn) = jump_label;
41551
41552 emit_insn (gen_neg (res, res));
41553
41554 emit_label (jump_label);
41555 LABEL_NUSES (jump_label) = 1;
41556
41557 emit_move_insn (op0, res);
41558 }
41559
41560 /* Output code to perform a Newton-Rhapson approximation of a single precision
41561 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41562
41563 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41564 {
41565 rtx x0, x1, e0, e1;
41566
41567 x0 = gen_reg_rtx (mode);
41568 e0 = gen_reg_rtx (mode);
41569 e1 = gen_reg_rtx (mode);
41570 x1 = gen_reg_rtx (mode);
41571
41572 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41573
41574 b = force_reg (mode, b);
41575
41576 /* x0 = rcp(b) estimate */
41577 if (mode == V16SFmode || mode == V8DFmode)
41578 emit_insn (gen_rtx_SET (VOIDmode, x0,
41579 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41580 UNSPEC_RCP14)));
41581 else
41582 emit_insn (gen_rtx_SET (VOIDmode, x0,
41583 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41584 UNSPEC_RCP)));
41585
41586 /* e0 = x0 * b */
41587 emit_insn (gen_rtx_SET (VOIDmode, e0,
41588 gen_rtx_MULT (mode, x0, b)));
41589
41590 /* e0 = x0 * e0 */
41591 emit_insn (gen_rtx_SET (VOIDmode, e0,
41592 gen_rtx_MULT (mode, x0, e0)));
41593
41594 /* e1 = x0 + x0 */
41595 emit_insn (gen_rtx_SET (VOIDmode, e1,
41596 gen_rtx_PLUS (mode, x0, x0)));
41597
41598 /* x1 = e1 - e0 */
41599 emit_insn (gen_rtx_SET (VOIDmode, x1,
41600 gen_rtx_MINUS (mode, e1, e0)));
41601
41602 /* res = a * x1 */
41603 emit_insn (gen_rtx_SET (VOIDmode, res,
41604 gen_rtx_MULT (mode, a, x1)));
41605 }
41606
41607 /* Output code to perform a Newton-Rhapson approximation of a
41608 single precision floating point [reciprocal] square root. */
41609
41610 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41611 bool recip)
41612 {
41613 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41614 REAL_VALUE_TYPE r;
41615 int unspec;
41616
41617 x0 = gen_reg_rtx (mode);
41618 e0 = gen_reg_rtx (mode);
41619 e1 = gen_reg_rtx (mode);
41620 e2 = gen_reg_rtx (mode);
41621 e3 = gen_reg_rtx (mode);
41622
41623 real_from_integer (&r, VOIDmode, -3, SIGNED);
41624 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41625
41626 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41627 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41628 unspec = UNSPEC_RSQRT;
41629
41630 if (VECTOR_MODE_P (mode))
41631 {
41632 mthree = ix86_build_const_vector (mode, true, mthree);
41633 mhalf = ix86_build_const_vector (mode, true, mhalf);
41634 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41635 if (GET_MODE_SIZE (mode) == 64)
41636 unspec = UNSPEC_RSQRT14;
41637 }
41638
41639 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41640 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41641
41642 a = force_reg (mode, a);
41643
41644 /* x0 = rsqrt(a) estimate */
41645 emit_insn (gen_rtx_SET (VOIDmode, x0,
41646 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41647 unspec)));
41648
41649 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41650 if (!recip)
41651 {
41652 rtx zero, mask;
41653
41654 zero = gen_reg_rtx (mode);
41655 mask = gen_reg_rtx (mode);
41656
41657 zero = force_reg (mode, CONST0_RTX(mode));
41658
41659 /* Handle masked compare. */
41660 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41661 {
41662 mask = gen_reg_rtx (HImode);
41663 /* Imm value 0x4 corresponds to not-equal comparison. */
41664 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41665 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41666 }
41667 else
41668 {
41669 emit_insn (gen_rtx_SET (VOIDmode, mask,
41670 gen_rtx_NE (mode, zero, a)));
41671
41672 emit_insn (gen_rtx_SET (VOIDmode, x0,
41673 gen_rtx_AND (mode, x0, mask)));
41674 }
41675 }
41676
41677 /* e0 = x0 * a */
41678 emit_insn (gen_rtx_SET (VOIDmode, e0,
41679 gen_rtx_MULT (mode, x0, a)));
41680 /* e1 = e0 * x0 */
41681 emit_insn (gen_rtx_SET (VOIDmode, e1,
41682 gen_rtx_MULT (mode, e0, x0)));
41683
41684 /* e2 = e1 - 3. */
41685 mthree = force_reg (mode, mthree);
41686 emit_insn (gen_rtx_SET (VOIDmode, e2,
41687 gen_rtx_PLUS (mode, e1, mthree)));
41688
41689 mhalf = force_reg (mode, mhalf);
41690 if (recip)
41691 /* e3 = -.5 * x0 */
41692 emit_insn (gen_rtx_SET (VOIDmode, e3,
41693 gen_rtx_MULT (mode, x0, mhalf)));
41694 else
41695 /* e3 = -.5 * e0 */
41696 emit_insn (gen_rtx_SET (VOIDmode, e3,
41697 gen_rtx_MULT (mode, e0, mhalf)));
41698 /* ret = e2 * e3 */
41699 emit_insn (gen_rtx_SET (VOIDmode, res,
41700 gen_rtx_MULT (mode, e2, e3)));
41701 }
41702
41703 #ifdef TARGET_SOLARIS
41704 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41705
41706 static void
41707 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41708 tree decl)
41709 {
41710 /* With Binutils 2.15, the "@unwind" marker must be specified on
41711 every occurrence of the ".eh_frame" section, not just the first
41712 one. */
41713 if (TARGET_64BIT
41714 && strcmp (name, ".eh_frame") == 0)
41715 {
41716 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41717 flags & SECTION_WRITE ? "aw" : "a");
41718 return;
41719 }
41720
41721 #ifndef USE_GAS
41722 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41723 {
41724 solaris_elf_asm_comdat_section (name, flags, decl);
41725 return;
41726 }
41727 #endif
41728
41729 default_elf_asm_named_section (name, flags, decl);
41730 }
41731 #endif /* TARGET_SOLARIS */
41732
41733 /* Return the mangling of TYPE if it is an extended fundamental type. */
41734
41735 static const char *
41736 ix86_mangle_type (const_tree type)
41737 {
41738 type = TYPE_MAIN_VARIANT (type);
41739
41740 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41741 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41742 return NULL;
41743
41744 switch (TYPE_MODE (type))
41745 {
41746 case TFmode:
41747 /* __float128 is "g". */
41748 return "g";
41749 case XFmode:
41750 /* "long double" or __float80 is "e". */
41751 return "e";
41752 default:
41753 return NULL;
41754 }
41755 }
41756
41757 /* For 32-bit code we can save PIC register setup by using
41758 __stack_chk_fail_local hidden function instead of calling
41759 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41760 register, so it is better to call __stack_chk_fail directly. */
41761
41762 static tree ATTRIBUTE_UNUSED
41763 ix86_stack_protect_fail (void)
41764 {
41765 return TARGET_64BIT
41766 ? default_external_stack_protect_fail ()
41767 : default_hidden_stack_protect_fail ();
41768 }
41769
41770 /* Select a format to encode pointers in exception handling data. CODE
41771 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41772 true if the symbol may be affected by dynamic relocations.
41773
41774 ??? All x86 object file formats are capable of representing this.
41775 After all, the relocation needed is the same as for the call insn.
41776 Whether or not a particular assembler allows us to enter such, I
41777 guess we'll have to see. */
41778 int
41779 asm_preferred_eh_data_format (int code, int global)
41780 {
41781 if (flag_pic)
41782 {
41783 int type = DW_EH_PE_sdata8;
41784 if (!TARGET_64BIT
41785 || ix86_cmodel == CM_SMALL_PIC
41786 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41787 type = DW_EH_PE_sdata4;
41788 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41789 }
41790 if (ix86_cmodel == CM_SMALL
41791 || (ix86_cmodel == CM_MEDIUM && code))
41792 return DW_EH_PE_udata4;
41793 return DW_EH_PE_absptr;
41794 }
41795 \f
41796 /* Expand copysign from SIGN to the positive value ABS_VALUE
41797 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41798 the sign-bit. */
41799 static void
41800 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41801 {
41802 enum machine_mode mode = GET_MODE (sign);
41803 rtx sgn = gen_reg_rtx (mode);
41804 if (mask == NULL_RTX)
41805 {
41806 enum machine_mode vmode;
41807
41808 if (mode == SFmode)
41809 vmode = V4SFmode;
41810 else if (mode == DFmode)
41811 vmode = V2DFmode;
41812 else
41813 vmode = mode;
41814
41815 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41816 if (!VECTOR_MODE_P (mode))
41817 {
41818 /* We need to generate a scalar mode mask in this case. */
41819 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41820 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41821 mask = gen_reg_rtx (mode);
41822 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41823 }
41824 }
41825 else
41826 mask = gen_rtx_NOT (mode, mask);
41827 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41828 gen_rtx_AND (mode, mask, sign)));
41829 emit_insn (gen_rtx_SET (VOIDmode, result,
41830 gen_rtx_IOR (mode, abs_value, sgn)));
41831 }
41832
41833 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41834 mask for masking out the sign-bit is stored in *SMASK, if that is
41835 non-null. */
41836 static rtx
41837 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41838 {
41839 enum machine_mode vmode, mode = GET_MODE (op0);
41840 rtx xa, mask;
41841
41842 xa = gen_reg_rtx (mode);
41843 if (mode == SFmode)
41844 vmode = V4SFmode;
41845 else if (mode == DFmode)
41846 vmode = V2DFmode;
41847 else
41848 vmode = mode;
41849 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41850 if (!VECTOR_MODE_P (mode))
41851 {
41852 /* We need to generate a scalar mode mask in this case. */
41853 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41854 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41855 mask = gen_reg_rtx (mode);
41856 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41857 }
41858 emit_insn (gen_rtx_SET (VOIDmode, xa,
41859 gen_rtx_AND (mode, op0, mask)));
41860
41861 if (smask)
41862 *smask = mask;
41863
41864 return xa;
41865 }
41866
41867 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41868 swapping the operands if SWAP_OPERANDS is true. The expanded
41869 code is a forward jump to a newly created label in case the
41870 comparison is true. The generated label rtx is returned. */
41871 static rtx_code_label *
41872 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41873 bool swap_operands)
41874 {
41875 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41876 rtx_code_label *label;
41877 rtx tmp;
41878
41879 if (swap_operands)
41880 {
41881 tmp = op0;
41882 op0 = op1;
41883 op1 = tmp;
41884 }
41885
41886 label = gen_label_rtx ();
41887 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41888 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41889 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41890 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41891 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41892 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41893 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41894 JUMP_LABEL (tmp) = label;
41895
41896 return label;
41897 }
41898
41899 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41900 using comparison code CODE. Operands are swapped for the comparison if
41901 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41902 static rtx
41903 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41904 bool swap_operands)
41905 {
41906 rtx (*insn)(rtx, rtx, rtx, rtx);
41907 enum machine_mode mode = GET_MODE (op0);
41908 rtx mask = gen_reg_rtx (mode);
41909
41910 if (swap_operands)
41911 {
41912 rtx tmp = op0;
41913 op0 = op1;
41914 op1 = tmp;
41915 }
41916
41917 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41918
41919 emit_insn (insn (mask, op0, op1,
41920 gen_rtx_fmt_ee (code, mode, op0, op1)));
41921 return mask;
41922 }
41923
41924 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41925 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41926 static rtx
41927 ix86_gen_TWO52 (enum machine_mode mode)
41928 {
41929 REAL_VALUE_TYPE TWO52r;
41930 rtx TWO52;
41931
41932 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41933 TWO52 = const_double_from_real_value (TWO52r, mode);
41934 TWO52 = force_reg (mode, TWO52);
41935
41936 return TWO52;
41937 }
41938
41939 /* Expand SSE sequence for computing lround from OP1 storing
41940 into OP0. */
41941 void
41942 ix86_expand_lround (rtx op0, rtx op1)
41943 {
41944 /* C code for the stuff we're doing below:
41945 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41946 return (long)tmp;
41947 */
41948 enum machine_mode mode = GET_MODE (op1);
41949 const struct real_format *fmt;
41950 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41951 rtx adj;
41952
41953 /* load nextafter (0.5, 0.0) */
41954 fmt = REAL_MODE_FORMAT (mode);
41955 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41956 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41957
41958 /* adj = copysign (0.5, op1) */
41959 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41960 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41961
41962 /* adj = op1 + adj */
41963 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41964
41965 /* op0 = (imode)adj */
41966 expand_fix (op0, adj, 0);
41967 }
41968
41969 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41970 into OPERAND0. */
41971 void
41972 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41973 {
41974 /* C code for the stuff we're doing below (for do_floor):
41975 xi = (long)op1;
41976 xi -= (double)xi > op1 ? 1 : 0;
41977 return xi;
41978 */
41979 enum machine_mode fmode = GET_MODE (op1);
41980 enum machine_mode imode = GET_MODE (op0);
41981 rtx ireg, freg, tmp;
41982 rtx_code_label *label;
41983
41984 /* reg = (long)op1 */
41985 ireg = gen_reg_rtx (imode);
41986 expand_fix (ireg, op1, 0);
41987
41988 /* freg = (double)reg */
41989 freg = gen_reg_rtx (fmode);
41990 expand_float (freg, ireg, 0);
41991
41992 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41993 label = ix86_expand_sse_compare_and_jump (UNLE,
41994 freg, op1, !do_floor);
41995 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41996 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41997 emit_move_insn (ireg, tmp);
41998
41999 emit_label (label);
42000 LABEL_NUSES (label) = 1;
42001
42002 emit_move_insn (op0, ireg);
42003 }
42004
42005 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42006 result in OPERAND0. */
42007 void
42008 ix86_expand_rint (rtx operand0, rtx operand1)
42009 {
42010 /* C code for the stuff we're doing below:
42011 xa = fabs (operand1);
42012 if (!isless (xa, 2**52))
42013 return operand1;
42014 xa = xa + 2**52 - 2**52;
42015 return copysign (xa, operand1);
42016 */
42017 enum machine_mode mode = GET_MODE (operand0);
42018 rtx res, xa, TWO52, mask;
42019 rtx_code_label *label;
42020
42021 res = gen_reg_rtx (mode);
42022 emit_move_insn (res, operand1);
42023
42024 /* xa = abs (operand1) */
42025 xa = ix86_expand_sse_fabs (res, &mask);
42026
42027 /* if (!isless (xa, TWO52)) goto label; */
42028 TWO52 = ix86_gen_TWO52 (mode);
42029 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42030
42031 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42032 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42033
42034 ix86_sse_copysign_to_positive (res, xa, res, mask);
42035
42036 emit_label (label);
42037 LABEL_NUSES (label) = 1;
42038
42039 emit_move_insn (operand0, res);
42040 }
42041
42042 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42043 into OPERAND0. */
42044 void
42045 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42046 {
42047 /* C code for the stuff we expand below.
42048 double xa = fabs (x), x2;
42049 if (!isless (xa, TWO52))
42050 return x;
42051 xa = xa + TWO52 - TWO52;
42052 x2 = copysign (xa, x);
42053 Compensate. Floor:
42054 if (x2 > x)
42055 x2 -= 1;
42056 Compensate. Ceil:
42057 if (x2 < x)
42058 x2 -= -1;
42059 return x2;
42060 */
42061 enum machine_mode mode = GET_MODE (operand0);
42062 rtx xa, TWO52, tmp, one, res, mask;
42063 rtx_code_label *label;
42064
42065 TWO52 = ix86_gen_TWO52 (mode);
42066
42067 /* Temporary for holding the result, initialized to the input
42068 operand to ease control flow. */
42069 res = gen_reg_rtx (mode);
42070 emit_move_insn (res, operand1);
42071
42072 /* xa = abs (operand1) */
42073 xa = ix86_expand_sse_fabs (res, &mask);
42074
42075 /* if (!isless (xa, TWO52)) goto label; */
42076 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42077
42078 /* xa = xa + TWO52 - TWO52; */
42079 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42080 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42081
42082 /* xa = copysign (xa, operand1) */
42083 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42084
42085 /* generate 1.0 or -1.0 */
42086 one = force_reg (mode,
42087 const_double_from_real_value (do_floor
42088 ? dconst1 : dconstm1, mode));
42089
42090 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42091 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42092 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42093 gen_rtx_AND (mode, one, tmp)));
42094 /* We always need to subtract here to preserve signed zero. */
42095 tmp = expand_simple_binop (mode, MINUS,
42096 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42097 emit_move_insn (res, tmp);
42098
42099 emit_label (label);
42100 LABEL_NUSES (label) = 1;
42101
42102 emit_move_insn (operand0, res);
42103 }
42104
42105 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42106 into OPERAND0. */
42107 void
42108 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42109 {
42110 /* C code for the stuff we expand below.
42111 double xa = fabs (x), x2;
42112 if (!isless (xa, TWO52))
42113 return x;
42114 x2 = (double)(long)x;
42115 Compensate. Floor:
42116 if (x2 > x)
42117 x2 -= 1;
42118 Compensate. Ceil:
42119 if (x2 < x)
42120 x2 += 1;
42121 if (HONOR_SIGNED_ZEROS (mode))
42122 return copysign (x2, x);
42123 return x2;
42124 */
42125 enum machine_mode mode = GET_MODE (operand0);
42126 rtx xa, xi, TWO52, tmp, one, res, mask;
42127 rtx_code_label *label;
42128
42129 TWO52 = ix86_gen_TWO52 (mode);
42130
42131 /* Temporary for holding the result, initialized to the input
42132 operand to ease control flow. */
42133 res = gen_reg_rtx (mode);
42134 emit_move_insn (res, operand1);
42135
42136 /* xa = abs (operand1) */
42137 xa = ix86_expand_sse_fabs (res, &mask);
42138
42139 /* if (!isless (xa, TWO52)) goto label; */
42140 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42141
42142 /* xa = (double)(long)x */
42143 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42144 expand_fix (xi, res, 0);
42145 expand_float (xa, xi, 0);
42146
42147 /* generate 1.0 */
42148 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42149
42150 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42151 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42152 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42153 gen_rtx_AND (mode, one, tmp)));
42154 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42155 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42156 emit_move_insn (res, tmp);
42157
42158 if (HONOR_SIGNED_ZEROS (mode))
42159 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42160
42161 emit_label (label);
42162 LABEL_NUSES (label) = 1;
42163
42164 emit_move_insn (operand0, res);
42165 }
42166
42167 /* Expand SSE sequence for computing round from OPERAND1 storing
42168 into OPERAND0. Sequence that works without relying on DImode truncation
42169 via cvttsd2siq that is only available on 64bit targets. */
42170 void
42171 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42172 {
42173 /* C code for the stuff we expand below.
42174 double xa = fabs (x), xa2, x2;
42175 if (!isless (xa, TWO52))
42176 return x;
42177 Using the absolute value and copying back sign makes
42178 -0.0 -> -0.0 correct.
42179 xa2 = xa + TWO52 - TWO52;
42180 Compensate.
42181 dxa = xa2 - xa;
42182 if (dxa <= -0.5)
42183 xa2 += 1;
42184 else if (dxa > 0.5)
42185 xa2 -= 1;
42186 x2 = copysign (xa2, x);
42187 return x2;
42188 */
42189 enum machine_mode mode = GET_MODE (operand0);
42190 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42191 rtx_code_label *label;
42192
42193 TWO52 = ix86_gen_TWO52 (mode);
42194
42195 /* Temporary for holding the result, initialized to the input
42196 operand to ease control flow. */
42197 res = gen_reg_rtx (mode);
42198 emit_move_insn (res, operand1);
42199
42200 /* xa = abs (operand1) */
42201 xa = ix86_expand_sse_fabs (res, &mask);
42202
42203 /* if (!isless (xa, TWO52)) goto label; */
42204 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42205
42206 /* xa2 = xa + TWO52 - TWO52; */
42207 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42208 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42209
42210 /* dxa = xa2 - xa; */
42211 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42212
42213 /* generate 0.5, 1.0 and -0.5 */
42214 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42215 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42216 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42217 0, OPTAB_DIRECT);
42218
42219 /* Compensate. */
42220 tmp = gen_reg_rtx (mode);
42221 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42222 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42223 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42224 gen_rtx_AND (mode, one, tmp)));
42225 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42226 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42227 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42228 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42229 gen_rtx_AND (mode, one, tmp)));
42230 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42231
42232 /* res = copysign (xa2, operand1) */
42233 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42234
42235 emit_label (label);
42236 LABEL_NUSES (label) = 1;
42237
42238 emit_move_insn (operand0, res);
42239 }
42240
42241 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42242 into OPERAND0. */
42243 void
42244 ix86_expand_trunc (rtx operand0, rtx operand1)
42245 {
42246 /* C code for SSE variant we expand below.
42247 double xa = fabs (x), x2;
42248 if (!isless (xa, TWO52))
42249 return x;
42250 x2 = (double)(long)x;
42251 if (HONOR_SIGNED_ZEROS (mode))
42252 return copysign (x2, x);
42253 return x2;
42254 */
42255 enum machine_mode mode = GET_MODE (operand0);
42256 rtx xa, xi, TWO52, res, mask;
42257 rtx_code_label *label;
42258
42259 TWO52 = ix86_gen_TWO52 (mode);
42260
42261 /* Temporary for holding the result, initialized to the input
42262 operand to ease control flow. */
42263 res = gen_reg_rtx (mode);
42264 emit_move_insn (res, operand1);
42265
42266 /* xa = abs (operand1) */
42267 xa = ix86_expand_sse_fabs (res, &mask);
42268
42269 /* if (!isless (xa, TWO52)) goto label; */
42270 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42271
42272 /* x = (double)(long)x */
42273 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42274 expand_fix (xi, res, 0);
42275 expand_float (res, xi, 0);
42276
42277 if (HONOR_SIGNED_ZEROS (mode))
42278 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42279
42280 emit_label (label);
42281 LABEL_NUSES (label) = 1;
42282
42283 emit_move_insn (operand0, res);
42284 }
42285
42286 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42287 into OPERAND0. */
42288 void
42289 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42290 {
42291 enum machine_mode mode = GET_MODE (operand0);
42292 rtx xa, mask, TWO52, one, res, smask, tmp;
42293 rtx_code_label *label;
42294
42295 /* C code for SSE variant we expand below.
42296 double xa = fabs (x), x2;
42297 if (!isless (xa, TWO52))
42298 return x;
42299 xa2 = xa + TWO52 - TWO52;
42300 Compensate:
42301 if (xa2 > xa)
42302 xa2 -= 1.0;
42303 x2 = copysign (xa2, x);
42304 return x2;
42305 */
42306
42307 TWO52 = ix86_gen_TWO52 (mode);
42308
42309 /* Temporary for holding the result, initialized to the input
42310 operand to ease control flow. */
42311 res = gen_reg_rtx (mode);
42312 emit_move_insn (res, operand1);
42313
42314 /* xa = abs (operand1) */
42315 xa = ix86_expand_sse_fabs (res, &smask);
42316
42317 /* if (!isless (xa, TWO52)) goto label; */
42318 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42319
42320 /* res = xa + TWO52 - TWO52; */
42321 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42322 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42323 emit_move_insn (res, tmp);
42324
42325 /* generate 1.0 */
42326 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42327
42328 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42329 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42330 emit_insn (gen_rtx_SET (VOIDmode, mask,
42331 gen_rtx_AND (mode, mask, one)));
42332 tmp = expand_simple_binop (mode, MINUS,
42333 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42334 emit_move_insn (res, tmp);
42335
42336 /* res = copysign (res, operand1) */
42337 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42338
42339 emit_label (label);
42340 LABEL_NUSES (label) = 1;
42341
42342 emit_move_insn (operand0, res);
42343 }
42344
42345 /* Expand SSE sequence for computing round from OPERAND1 storing
42346 into OPERAND0. */
42347 void
42348 ix86_expand_round (rtx operand0, rtx operand1)
42349 {
42350 /* C code for the stuff we're doing below:
42351 double xa = fabs (x);
42352 if (!isless (xa, TWO52))
42353 return x;
42354 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42355 return copysign (xa, x);
42356 */
42357 enum machine_mode mode = GET_MODE (operand0);
42358 rtx res, TWO52, xa, xi, half, mask;
42359 rtx_code_label *label;
42360 const struct real_format *fmt;
42361 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42362
42363 /* Temporary for holding the result, initialized to the input
42364 operand to ease control flow. */
42365 res = gen_reg_rtx (mode);
42366 emit_move_insn (res, operand1);
42367
42368 TWO52 = ix86_gen_TWO52 (mode);
42369 xa = ix86_expand_sse_fabs (res, &mask);
42370 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42371
42372 /* load nextafter (0.5, 0.0) */
42373 fmt = REAL_MODE_FORMAT (mode);
42374 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42375 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42376
42377 /* xa = xa + 0.5 */
42378 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42379 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42380
42381 /* xa = (double)(int64_t)xa */
42382 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42383 expand_fix (xi, xa, 0);
42384 expand_float (xa, xi, 0);
42385
42386 /* res = copysign (xa, operand1) */
42387 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42388
42389 emit_label (label);
42390 LABEL_NUSES (label) = 1;
42391
42392 emit_move_insn (operand0, res);
42393 }
42394
42395 /* Expand SSE sequence for computing round
42396 from OP1 storing into OP0 using sse4 round insn. */
42397 void
42398 ix86_expand_round_sse4 (rtx op0, rtx op1)
42399 {
42400 enum machine_mode mode = GET_MODE (op0);
42401 rtx e1, e2, res, half;
42402 const struct real_format *fmt;
42403 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42404 rtx (*gen_copysign) (rtx, rtx, rtx);
42405 rtx (*gen_round) (rtx, rtx, rtx);
42406
42407 switch (mode)
42408 {
42409 case SFmode:
42410 gen_copysign = gen_copysignsf3;
42411 gen_round = gen_sse4_1_roundsf2;
42412 break;
42413 case DFmode:
42414 gen_copysign = gen_copysigndf3;
42415 gen_round = gen_sse4_1_rounddf2;
42416 break;
42417 default:
42418 gcc_unreachable ();
42419 }
42420
42421 /* round (a) = trunc (a + copysign (0.5, a)) */
42422
42423 /* load nextafter (0.5, 0.0) */
42424 fmt = REAL_MODE_FORMAT (mode);
42425 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42426 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42427 half = const_double_from_real_value (pred_half, mode);
42428
42429 /* e1 = copysign (0.5, op1) */
42430 e1 = gen_reg_rtx (mode);
42431 emit_insn (gen_copysign (e1, half, op1));
42432
42433 /* e2 = op1 + e1 */
42434 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42435
42436 /* res = trunc (e2) */
42437 res = gen_reg_rtx (mode);
42438 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42439
42440 emit_move_insn (op0, res);
42441 }
42442 \f
42443
42444 /* Table of valid machine attributes. */
42445 static const struct attribute_spec ix86_attribute_table[] =
42446 {
42447 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42448 affects_type_identity } */
42449 /* Stdcall attribute says callee is responsible for popping arguments
42450 if they are not variable. */
42451 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42452 true },
42453 /* Fastcall attribute says callee is responsible for popping arguments
42454 if they are not variable. */
42455 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42456 true },
42457 /* Thiscall attribute says callee is responsible for popping arguments
42458 if they are not variable. */
42459 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42460 true },
42461 /* Cdecl attribute says the callee is a normal C declaration */
42462 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42463 true },
42464 /* Regparm attribute specifies how many integer arguments are to be
42465 passed in registers. */
42466 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42467 true },
42468 /* Sseregparm attribute says we are using x86_64 calling conventions
42469 for FP arguments. */
42470 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42471 true },
42472 /* The transactional memory builtins are implicitly regparm or fastcall
42473 depending on the ABI. Override the generic do-nothing attribute that
42474 these builtins were declared with. */
42475 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42476 true },
42477 /* force_align_arg_pointer says this function realigns the stack at entry. */
42478 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42479 false, true, true, ix86_handle_cconv_attribute, false },
42480 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42481 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42482 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42483 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42484 false },
42485 #endif
42486 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42487 false },
42488 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42489 false },
42490 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42491 SUBTARGET_ATTRIBUTE_TABLE,
42492 #endif
42493 /* ms_abi and sysv_abi calling convention function attributes. */
42494 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42495 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42496 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42497 false },
42498 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42499 ix86_handle_callee_pop_aggregate_return, true },
42500 /* End element. */
42501 { NULL, 0, 0, false, false, false, NULL, false }
42502 };
42503
42504 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42505 static int
42506 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42507 tree vectype, int)
42508 {
42509 unsigned elements;
42510
42511 switch (type_of_cost)
42512 {
42513 case scalar_stmt:
42514 return ix86_cost->scalar_stmt_cost;
42515
42516 case scalar_load:
42517 return ix86_cost->scalar_load_cost;
42518
42519 case scalar_store:
42520 return ix86_cost->scalar_store_cost;
42521
42522 case vector_stmt:
42523 return ix86_cost->vec_stmt_cost;
42524
42525 case vector_load:
42526 return ix86_cost->vec_align_load_cost;
42527
42528 case vector_store:
42529 return ix86_cost->vec_store_cost;
42530
42531 case vec_to_scalar:
42532 return ix86_cost->vec_to_scalar_cost;
42533
42534 case scalar_to_vec:
42535 return ix86_cost->scalar_to_vec_cost;
42536
42537 case unaligned_load:
42538 case unaligned_store:
42539 return ix86_cost->vec_unalign_load_cost;
42540
42541 case cond_branch_taken:
42542 return ix86_cost->cond_taken_branch_cost;
42543
42544 case cond_branch_not_taken:
42545 return ix86_cost->cond_not_taken_branch_cost;
42546
42547 case vec_perm:
42548 case vec_promote_demote:
42549 return ix86_cost->vec_stmt_cost;
42550
42551 case vec_construct:
42552 elements = TYPE_VECTOR_SUBPARTS (vectype);
42553 return elements / 2 + 1;
42554
42555 default:
42556 gcc_unreachable ();
42557 }
42558 }
42559
42560 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42561 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42562 insn every time. */
42563
42564 static GTY(()) rtx_insn *vselect_insn;
42565
42566 /* Initialize vselect_insn. */
42567
42568 static void
42569 init_vselect_insn (void)
42570 {
42571 unsigned i;
42572 rtx x;
42573
42574 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42575 for (i = 0; i < MAX_VECT_LEN; ++i)
42576 XVECEXP (x, 0, i) = const0_rtx;
42577 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42578 const0_rtx), x);
42579 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42580 start_sequence ();
42581 vselect_insn = emit_insn (x);
42582 end_sequence ();
42583 }
42584
42585 /* Construct (set target (vec_select op0 (parallel perm))) and
42586 return true if that's a valid instruction in the active ISA. */
42587
42588 static bool
42589 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42590 unsigned nelt, bool testing_p)
42591 {
42592 unsigned int i;
42593 rtx x, save_vconcat;
42594 int icode;
42595
42596 if (vselect_insn == NULL_RTX)
42597 init_vselect_insn ();
42598
42599 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42600 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42601 for (i = 0; i < nelt; ++i)
42602 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42603 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42604 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42605 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42606 SET_DEST (PATTERN (vselect_insn)) = target;
42607 icode = recog_memoized (vselect_insn);
42608
42609 if (icode >= 0 && !testing_p)
42610 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42611
42612 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42613 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42614 INSN_CODE (vselect_insn) = -1;
42615
42616 return icode >= 0;
42617 }
42618
42619 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42620
42621 static bool
42622 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42623 const unsigned char *perm, unsigned nelt,
42624 bool testing_p)
42625 {
42626 enum machine_mode v2mode;
42627 rtx x;
42628 bool ok;
42629
42630 if (vselect_insn == NULL_RTX)
42631 init_vselect_insn ();
42632
42633 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42634 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42635 PUT_MODE (x, v2mode);
42636 XEXP (x, 0) = op0;
42637 XEXP (x, 1) = op1;
42638 ok = expand_vselect (target, x, perm, nelt, testing_p);
42639 XEXP (x, 0) = const0_rtx;
42640 XEXP (x, 1) = const0_rtx;
42641 return ok;
42642 }
42643
42644 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42645 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42646
42647 static bool
42648 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42649 {
42650 enum machine_mode vmode = d->vmode;
42651 unsigned i, mask, nelt = d->nelt;
42652 rtx target, op0, op1, x;
42653 rtx rperm[32], vperm;
42654
42655 if (d->one_operand_p)
42656 return false;
42657 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42658 ;
42659 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42660 ;
42661 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42662 ;
42663 else
42664 return false;
42665
42666 /* This is a blend, not a permute. Elements must stay in their
42667 respective lanes. */
42668 for (i = 0; i < nelt; ++i)
42669 {
42670 unsigned e = d->perm[i];
42671 if (!(e == i || e == i + nelt))
42672 return false;
42673 }
42674
42675 if (d->testing_p)
42676 return true;
42677
42678 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42679 decision should be extracted elsewhere, so that we only try that
42680 sequence once all budget==3 options have been tried. */
42681 target = d->target;
42682 op0 = d->op0;
42683 op1 = d->op1;
42684 mask = 0;
42685
42686 switch (vmode)
42687 {
42688 case V4DFmode:
42689 case V8SFmode:
42690 case V2DFmode:
42691 case V4SFmode:
42692 case V8HImode:
42693 case V8SImode:
42694 for (i = 0; i < nelt; ++i)
42695 mask |= (d->perm[i] >= nelt) << i;
42696 break;
42697
42698 case V2DImode:
42699 for (i = 0; i < 2; ++i)
42700 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42701 vmode = V8HImode;
42702 goto do_subreg;
42703
42704 case V4SImode:
42705 for (i = 0; i < 4; ++i)
42706 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42707 vmode = V8HImode;
42708 goto do_subreg;
42709
42710 case V16QImode:
42711 /* See if bytes move in pairs so we can use pblendw with
42712 an immediate argument, rather than pblendvb with a vector
42713 argument. */
42714 for (i = 0; i < 16; i += 2)
42715 if (d->perm[i] + 1 != d->perm[i + 1])
42716 {
42717 use_pblendvb:
42718 for (i = 0; i < nelt; ++i)
42719 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42720
42721 finish_pblendvb:
42722 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42723 vperm = force_reg (vmode, vperm);
42724
42725 if (GET_MODE_SIZE (vmode) == 16)
42726 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42727 else
42728 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42729 if (target != d->target)
42730 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42731 return true;
42732 }
42733
42734 for (i = 0; i < 8; ++i)
42735 mask |= (d->perm[i * 2] >= 16) << i;
42736 vmode = V8HImode;
42737 /* FALLTHRU */
42738
42739 do_subreg:
42740 target = gen_reg_rtx (vmode);
42741 op0 = gen_lowpart (vmode, op0);
42742 op1 = gen_lowpart (vmode, op1);
42743 break;
42744
42745 case V32QImode:
42746 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42747 for (i = 0; i < 32; i += 2)
42748 if (d->perm[i] + 1 != d->perm[i + 1])
42749 goto use_pblendvb;
42750 /* See if bytes move in quadruplets. If yes, vpblendd
42751 with immediate can be used. */
42752 for (i = 0; i < 32; i += 4)
42753 if (d->perm[i] + 2 != d->perm[i + 2])
42754 break;
42755 if (i < 32)
42756 {
42757 /* See if bytes move the same in both lanes. If yes,
42758 vpblendw with immediate can be used. */
42759 for (i = 0; i < 16; i += 2)
42760 if (d->perm[i] + 16 != d->perm[i + 16])
42761 goto use_pblendvb;
42762
42763 /* Use vpblendw. */
42764 for (i = 0; i < 16; ++i)
42765 mask |= (d->perm[i * 2] >= 32) << i;
42766 vmode = V16HImode;
42767 goto do_subreg;
42768 }
42769
42770 /* Use vpblendd. */
42771 for (i = 0; i < 8; ++i)
42772 mask |= (d->perm[i * 4] >= 32) << i;
42773 vmode = V8SImode;
42774 goto do_subreg;
42775
42776 case V16HImode:
42777 /* See if words move in pairs. If yes, vpblendd can be used. */
42778 for (i = 0; i < 16; i += 2)
42779 if (d->perm[i] + 1 != d->perm[i + 1])
42780 break;
42781 if (i < 16)
42782 {
42783 /* See if words move the same in both lanes. If not,
42784 vpblendvb must be used. */
42785 for (i = 0; i < 8; i++)
42786 if (d->perm[i] + 8 != d->perm[i + 8])
42787 {
42788 /* Use vpblendvb. */
42789 for (i = 0; i < 32; ++i)
42790 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42791
42792 vmode = V32QImode;
42793 nelt = 32;
42794 target = gen_reg_rtx (vmode);
42795 op0 = gen_lowpart (vmode, op0);
42796 op1 = gen_lowpart (vmode, op1);
42797 goto finish_pblendvb;
42798 }
42799
42800 /* Use vpblendw. */
42801 for (i = 0; i < 16; ++i)
42802 mask |= (d->perm[i] >= 16) << i;
42803 break;
42804 }
42805
42806 /* Use vpblendd. */
42807 for (i = 0; i < 8; ++i)
42808 mask |= (d->perm[i * 2] >= 16) << i;
42809 vmode = V8SImode;
42810 goto do_subreg;
42811
42812 case V4DImode:
42813 /* Use vpblendd. */
42814 for (i = 0; i < 4; ++i)
42815 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42816 vmode = V8SImode;
42817 goto do_subreg;
42818
42819 default:
42820 gcc_unreachable ();
42821 }
42822
42823 /* This matches five different patterns with the different modes. */
42824 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42825 x = gen_rtx_SET (VOIDmode, target, x);
42826 emit_insn (x);
42827 if (target != d->target)
42828 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42829
42830 return true;
42831 }
42832
42833 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42834 in terms of the variable form of vpermilps.
42835
42836 Note that we will have already failed the immediate input vpermilps,
42837 which requires that the high and low part shuffle be identical; the
42838 variable form doesn't require that. */
42839
42840 static bool
42841 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42842 {
42843 rtx rperm[8], vperm;
42844 unsigned i;
42845
42846 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42847 return false;
42848
42849 /* We can only permute within the 128-bit lane. */
42850 for (i = 0; i < 8; ++i)
42851 {
42852 unsigned e = d->perm[i];
42853 if (i < 4 ? e >= 4 : e < 4)
42854 return false;
42855 }
42856
42857 if (d->testing_p)
42858 return true;
42859
42860 for (i = 0; i < 8; ++i)
42861 {
42862 unsigned e = d->perm[i];
42863
42864 /* Within each 128-bit lane, the elements of op0 are numbered
42865 from 0 and the elements of op1 are numbered from 4. */
42866 if (e >= 8 + 4)
42867 e -= 8;
42868 else if (e >= 4)
42869 e -= 4;
42870
42871 rperm[i] = GEN_INT (e);
42872 }
42873
42874 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42875 vperm = force_reg (V8SImode, vperm);
42876 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42877
42878 return true;
42879 }
42880
42881 /* Return true if permutation D can be performed as VMODE permutation
42882 instead. */
42883
42884 static bool
42885 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42886 {
42887 unsigned int i, j, chunk;
42888
42889 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42890 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42891 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42892 return false;
42893
42894 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42895 return true;
42896
42897 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42898 for (i = 0; i < d->nelt; i += chunk)
42899 if (d->perm[i] & (chunk - 1))
42900 return false;
42901 else
42902 for (j = 1; j < chunk; ++j)
42903 if (d->perm[i] + j != d->perm[i + j])
42904 return false;
42905
42906 return true;
42907 }
42908
42909 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42910 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42911
42912 static bool
42913 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42914 {
42915 unsigned i, nelt, eltsz, mask;
42916 unsigned char perm[32];
42917 enum machine_mode vmode = V16QImode;
42918 rtx rperm[32], vperm, target, op0, op1;
42919
42920 nelt = d->nelt;
42921
42922 if (!d->one_operand_p)
42923 {
42924 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42925 {
42926 if (TARGET_AVX2
42927 && valid_perm_using_mode_p (V2TImode, d))
42928 {
42929 if (d->testing_p)
42930 return true;
42931
42932 /* Use vperm2i128 insn. The pattern uses
42933 V4DImode instead of V2TImode. */
42934 target = d->target;
42935 if (d->vmode != V4DImode)
42936 target = gen_reg_rtx (V4DImode);
42937 op0 = gen_lowpart (V4DImode, d->op0);
42938 op1 = gen_lowpart (V4DImode, d->op1);
42939 rperm[0]
42940 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42941 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42942 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42943 if (target != d->target)
42944 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42945 return true;
42946 }
42947 return false;
42948 }
42949 }
42950 else
42951 {
42952 if (GET_MODE_SIZE (d->vmode) == 16)
42953 {
42954 if (!TARGET_SSSE3)
42955 return false;
42956 }
42957 else if (GET_MODE_SIZE (d->vmode) == 32)
42958 {
42959 if (!TARGET_AVX2)
42960 return false;
42961
42962 /* V4DImode should be already handled through
42963 expand_vselect by vpermq instruction. */
42964 gcc_assert (d->vmode != V4DImode);
42965
42966 vmode = V32QImode;
42967 if (d->vmode == V8SImode
42968 || d->vmode == V16HImode
42969 || d->vmode == V32QImode)
42970 {
42971 /* First see if vpermq can be used for
42972 V8SImode/V16HImode/V32QImode. */
42973 if (valid_perm_using_mode_p (V4DImode, d))
42974 {
42975 for (i = 0; i < 4; i++)
42976 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42977 if (d->testing_p)
42978 return true;
42979 target = gen_reg_rtx (V4DImode);
42980 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42981 perm, 4, false))
42982 {
42983 emit_move_insn (d->target,
42984 gen_lowpart (d->vmode, target));
42985 return true;
42986 }
42987 return false;
42988 }
42989
42990 /* Next see if vpermd can be used. */
42991 if (valid_perm_using_mode_p (V8SImode, d))
42992 vmode = V8SImode;
42993 }
42994 /* Or if vpermps can be used. */
42995 else if (d->vmode == V8SFmode)
42996 vmode = V8SImode;
42997
42998 if (vmode == V32QImode)
42999 {
43000 /* vpshufb only works intra lanes, it is not
43001 possible to shuffle bytes in between the lanes. */
43002 for (i = 0; i < nelt; ++i)
43003 if ((d->perm[i] ^ i) & (nelt / 2))
43004 return false;
43005 }
43006 }
43007 else
43008 return false;
43009 }
43010
43011 if (d->testing_p)
43012 return true;
43013
43014 if (vmode == V8SImode)
43015 for (i = 0; i < 8; ++i)
43016 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
43017 else
43018 {
43019 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43020 if (!d->one_operand_p)
43021 mask = 2 * nelt - 1;
43022 else if (vmode == V16QImode)
43023 mask = nelt - 1;
43024 else
43025 mask = nelt / 2 - 1;
43026
43027 for (i = 0; i < nelt; ++i)
43028 {
43029 unsigned j, e = d->perm[i] & mask;
43030 for (j = 0; j < eltsz; ++j)
43031 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43032 }
43033 }
43034
43035 vperm = gen_rtx_CONST_VECTOR (vmode,
43036 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43037 vperm = force_reg (vmode, vperm);
43038
43039 target = d->target;
43040 if (d->vmode != vmode)
43041 target = gen_reg_rtx (vmode);
43042 op0 = gen_lowpart (vmode, d->op0);
43043 if (d->one_operand_p)
43044 {
43045 if (vmode == V16QImode)
43046 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43047 else if (vmode == V32QImode)
43048 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43049 else if (vmode == V8SFmode)
43050 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43051 else
43052 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43053 }
43054 else
43055 {
43056 op1 = gen_lowpart (vmode, d->op1);
43057 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43058 }
43059 if (target != d->target)
43060 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43061
43062 return true;
43063 }
43064
43065 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43066 in a single instruction. */
43067
43068 static bool
43069 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43070 {
43071 unsigned i, nelt = d->nelt;
43072 unsigned char perm2[MAX_VECT_LEN];
43073
43074 /* Check plain VEC_SELECT first, because AVX has instructions that could
43075 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43076 input where SEL+CONCAT may not. */
43077 if (d->one_operand_p)
43078 {
43079 int mask = nelt - 1;
43080 bool identity_perm = true;
43081 bool broadcast_perm = true;
43082
43083 for (i = 0; i < nelt; i++)
43084 {
43085 perm2[i] = d->perm[i] & mask;
43086 if (perm2[i] != i)
43087 identity_perm = false;
43088 if (perm2[i])
43089 broadcast_perm = false;
43090 }
43091
43092 if (identity_perm)
43093 {
43094 if (!d->testing_p)
43095 emit_move_insn (d->target, d->op0);
43096 return true;
43097 }
43098 else if (broadcast_perm && TARGET_AVX2)
43099 {
43100 /* Use vpbroadcast{b,w,d}. */
43101 rtx (*gen) (rtx, rtx) = NULL;
43102 switch (d->vmode)
43103 {
43104 case V32QImode:
43105 gen = gen_avx2_pbroadcastv32qi_1;
43106 break;
43107 case V16HImode:
43108 gen = gen_avx2_pbroadcastv16hi_1;
43109 break;
43110 case V8SImode:
43111 gen = gen_avx2_pbroadcastv8si_1;
43112 break;
43113 case V16QImode:
43114 gen = gen_avx2_pbroadcastv16qi;
43115 break;
43116 case V8HImode:
43117 gen = gen_avx2_pbroadcastv8hi;
43118 break;
43119 case V8SFmode:
43120 gen = gen_avx2_vec_dupv8sf_1;
43121 break;
43122 /* For other modes prefer other shuffles this function creates. */
43123 default: break;
43124 }
43125 if (gen != NULL)
43126 {
43127 if (!d->testing_p)
43128 emit_insn (gen (d->target, d->op0));
43129 return true;
43130 }
43131 }
43132
43133 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43134 return true;
43135
43136 /* There are plenty of patterns in sse.md that are written for
43137 SEL+CONCAT and are not replicated for a single op. Perhaps
43138 that should be changed, to avoid the nastiness here. */
43139
43140 /* Recognize interleave style patterns, which means incrementing
43141 every other permutation operand. */
43142 for (i = 0; i < nelt; i += 2)
43143 {
43144 perm2[i] = d->perm[i] & mask;
43145 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43146 }
43147 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43148 d->testing_p))
43149 return true;
43150
43151 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43152 if (nelt >= 4)
43153 {
43154 for (i = 0; i < nelt; i += 4)
43155 {
43156 perm2[i + 0] = d->perm[i + 0] & mask;
43157 perm2[i + 1] = d->perm[i + 1] & mask;
43158 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43159 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43160 }
43161
43162 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43163 d->testing_p))
43164 return true;
43165 }
43166 }
43167
43168 /* Finally, try the fully general two operand permute. */
43169 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43170 d->testing_p))
43171 return true;
43172
43173 /* Recognize interleave style patterns with reversed operands. */
43174 if (!d->one_operand_p)
43175 {
43176 for (i = 0; i < nelt; ++i)
43177 {
43178 unsigned e = d->perm[i];
43179 if (e >= nelt)
43180 e -= nelt;
43181 else
43182 e += nelt;
43183 perm2[i] = e;
43184 }
43185
43186 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43187 d->testing_p))
43188 return true;
43189 }
43190
43191 /* Try the SSE4.1 blend variable merge instructions. */
43192 if (expand_vec_perm_blend (d))
43193 return true;
43194
43195 /* Try one of the AVX vpermil variable permutations. */
43196 if (expand_vec_perm_vpermil (d))
43197 return true;
43198
43199 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43200 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43201 if (expand_vec_perm_pshufb (d))
43202 return true;
43203
43204 /* Try the AVX512F vpermi2 instructions. */
43205 rtx vec[64];
43206 enum machine_mode mode = d->vmode;
43207 if (mode == V8DFmode)
43208 mode = V8DImode;
43209 else if (mode == V16SFmode)
43210 mode = V16SImode;
43211 for (i = 0; i < nelt; ++i)
43212 vec[i] = GEN_INT (d->perm[i]);
43213 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43214 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43215 return true;
43216
43217 return false;
43218 }
43219
43220 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43221 in terms of a pair of pshuflw + pshufhw instructions. */
43222
43223 static bool
43224 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43225 {
43226 unsigned char perm2[MAX_VECT_LEN];
43227 unsigned i;
43228 bool ok;
43229
43230 if (d->vmode != V8HImode || !d->one_operand_p)
43231 return false;
43232
43233 /* The two permutations only operate in 64-bit lanes. */
43234 for (i = 0; i < 4; ++i)
43235 if (d->perm[i] >= 4)
43236 return false;
43237 for (i = 4; i < 8; ++i)
43238 if (d->perm[i] < 4)
43239 return false;
43240
43241 if (d->testing_p)
43242 return true;
43243
43244 /* Emit the pshuflw. */
43245 memcpy (perm2, d->perm, 4);
43246 for (i = 4; i < 8; ++i)
43247 perm2[i] = i;
43248 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43249 gcc_assert (ok);
43250
43251 /* Emit the pshufhw. */
43252 memcpy (perm2 + 4, d->perm + 4, 4);
43253 for (i = 0; i < 4; ++i)
43254 perm2[i] = i;
43255 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43256 gcc_assert (ok);
43257
43258 return true;
43259 }
43260
43261 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43262 the permutation using the SSSE3 palignr instruction. This succeeds
43263 when all of the elements in PERM fit within one vector and we merely
43264 need to shift them down so that a single vector permutation has a
43265 chance to succeed. */
43266
43267 static bool
43268 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43269 {
43270 unsigned i, nelt = d->nelt;
43271 unsigned min, max;
43272 bool in_order, ok;
43273 rtx shift, target;
43274 struct expand_vec_perm_d dcopy;
43275
43276 /* Even with AVX, palignr only operates on 128-bit vectors. */
43277 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43278 return false;
43279
43280 min = nelt, max = 0;
43281 for (i = 0; i < nelt; ++i)
43282 {
43283 unsigned e = d->perm[i];
43284 if (e < min)
43285 min = e;
43286 if (e > max)
43287 max = e;
43288 }
43289 if (min == 0 || max - min >= nelt)
43290 return false;
43291
43292 /* Given that we have SSSE3, we know we'll be able to implement the
43293 single operand permutation after the palignr with pshufb. */
43294 if (d->testing_p)
43295 return true;
43296
43297 dcopy = *d;
43298 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43299 target = gen_reg_rtx (TImode);
43300 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43301 gen_lowpart (TImode, d->op0), shift));
43302
43303 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43304 dcopy.one_operand_p = true;
43305
43306 in_order = true;
43307 for (i = 0; i < nelt; ++i)
43308 {
43309 unsigned e = dcopy.perm[i] - min;
43310 if (e != i)
43311 in_order = false;
43312 dcopy.perm[i] = e;
43313 }
43314
43315 /* Test for the degenerate case where the alignment by itself
43316 produces the desired permutation. */
43317 if (in_order)
43318 {
43319 emit_move_insn (d->target, dcopy.op0);
43320 return true;
43321 }
43322
43323 ok = expand_vec_perm_1 (&dcopy);
43324 gcc_assert (ok);
43325
43326 return ok;
43327 }
43328
43329 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43330 the permutation using the SSE4_1 pblendv instruction. Potentially
43331 reduces permutaion from 2 pshufb and or to 1 pshufb and pblendv. */
43332
43333 static bool
43334 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43335 {
43336 unsigned i, which, nelt = d->nelt;
43337 struct expand_vec_perm_d dcopy, dcopy1;
43338 enum machine_mode vmode = d->vmode;
43339 bool ok;
43340
43341 /* Use the same checks as in expand_vec_perm_blend, but skipping
43342 AVX and AVX2 as they require more than 2 instructions. */
43343 if (d->one_operand_p)
43344 return false;
43345 if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43346 ;
43347 else
43348 return false;
43349
43350 /* Figure out where permutation elements stay not in their
43351 respective lanes. */
43352 for (i = 0, which = 0; i < nelt; ++i)
43353 {
43354 unsigned e = d->perm[i];
43355 if (e != i)
43356 which |= (e < nelt ? 1 : 2);
43357 }
43358 /* We can pblend the part where elements stay not in their
43359 respective lanes only when these elements are all in one
43360 half of a permutation.
43361 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43362 lanes, but both 8 and 9 >= 8
43363 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43364 respective lanes and 8 >= 8, but 2 not. */
43365 if (which != 1 && which != 2)
43366 return false;
43367 if (d->testing_p)
43368 return true;
43369
43370 /* First we apply one operand permutation to the part where
43371 elements stay not in their respective lanes. */
43372 dcopy = *d;
43373 if (which == 2)
43374 dcopy.op0 = dcopy.op1 = d->op1;
43375 else
43376 dcopy.op0 = dcopy.op1 = d->op0;
43377 dcopy.one_operand_p = true;
43378
43379 for (i = 0; i < nelt; ++i)
43380 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43381
43382 ok = expand_vec_perm_1 (&dcopy);
43383 gcc_assert (ok);
43384
43385 /* Next we put permuted elements into their positions. */
43386 dcopy1 = *d;
43387 if (which == 2)
43388 dcopy1.op1 = dcopy.target;
43389 else
43390 dcopy1.op0 = dcopy.target;
43391
43392 for (i = 0; i < nelt; ++i)
43393 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43394
43395 ok = expand_vec_perm_blend (&dcopy1);
43396 gcc_assert (ok);
43397
43398 return true;
43399 }
43400
43401 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43402
43403 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43404 a two vector permutation into a single vector permutation by using
43405 an interleave operation to merge the vectors. */
43406
43407 static bool
43408 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43409 {
43410 struct expand_vec_perm_d dremap, dfinal;
43411 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43412 unsigned HOST_WIDE_INT contents;
43413 unsigned char remap[2 * MAX_VECT_LEN];
43414 rtx_insn *seq;
43415 bool ok, same_halves = false;
43416
43417 if (GET_MODE_SIZE (d->vmode) == 16)
43418 {
43419 if (d->one_operand_p)
43420 return false;
43421 }
43422 else if (GET_MODE_SIZE (d->vmode) == 32)
43423 {
43424 if (!TARGET_AVX)
43425 return false;
43426 /* For 32-byte modes allow even d->one_operand_p.
43427 The lack of cross-lane shuffling in some instructions
43428 might prevent a single insn shuffle. */
43429 dfinal = *d;
43430 dfinal.testing_p = true;
43431 /* If expand_vec_perm_interleave3 can expand this into
43432 a 3 insn sequence, give up and let it be expanded as
43433 3 insn sequence. While that is one insn longer,
43434 it doesn't need a memory operand and in the common
43435 case that both interleave low and high permutations
43436 with the same operands are adjacent needs 4 insns
43437 for both after CSE. */
43438 if (expand_vec_perm_interleave3 (&dfinal))
43439 return false;
43440 }
43441 else
43442 return false;
43443
43444 /* Examine from whence the elements come. */
43445 contents = 0;
43446 for (i = 0; i < nelt; ++i)
43447 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43448
43449 memset (remap, 0xff, sizeof (remap));
43450 dremap = *d;
43451
43452 if (GET_MODE_SIZE (d->vmode) == 16)
43453 {
43454 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43455
43456 /* Split the two input vectors into 4 halves. */
43457 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43458 h2 = h1 << nelt2;
43459 h3 = h2 << nelt2;
43460 h4 = h3 << nelt2;
43461
43462 /* If the elements from the low halves use interleave low, and similarly
43463 for interleave high. If the elements are from mis-matched halves, we
43464 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43465 if ((contents & (h1 | h3)) == contents)
43466 {
43467 /* punpckl* */
43468 for (i = 0; i < nelt2; ++i)
43469 {
43470 remap[i] = i * 2;
43471 remap[i + nelt] = i * 2 + 1;
43472 dremap.perm[i * 2] = i;
43473 dremap.perm[i * 2 + 1] = i + nelt;
43474 }
43475 if (!TARGET_SSE2 && d->vmode == V4SImode)
43476 dremap.vmode = V4SFmode;
43477 }
43478 else if ((contents & (h2 | h4)) == contents)
43479 {
43480 /* punpckh* */
43481 for (i = 0; i < nelt2; ++i)
43482 {
43483 remap[i + nelt2] = i * 2;
43484 remap[i + nelt + nelt2] = i * 2 + 1;
43485 dremap.perm[i * 2] = i + nelt2;
43486 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43487 }
43488 if (!TARGET_SSE2 && d->vmode == V4SImode)
43489 dremap.vmode = V4SFmode;
43490 }
43491 else if ((contents & (h1 | h4)) == contents)
43492 {
43493 /* shufps */
43494 for (i = 0; i < nelt2; ++i)
43495 {
43496 remap[i] = i;
43497 remap[i + nelt + nelt2] = i + nelt2;
43498 dremap.perm[i] = i;
43499 dremap.perm[i + nelt2] = i + nelt + nelt2;
43500 }
43501 if (nelt != 4)
43502 {
43503 /* shufpd */
43504 dremap.vmode = V2DImode;
43505 dremap.nelt = 2;
43506 dremap.perm[0] = 0;
43507 dremap.perm[1] = 3;
43508 }
43509 }
43510 else if ((contents & (h2 | h3)) == contents)
43511 {
43512 /* shufps */
43513 for (i = 0; i < nelt2; ++i)
43514 {
43515 remap[i + nelt2] = i;
43516 remap[i + nelt] = i + nelt2;
43517 dremap.perm[i] = i + nelt2;
43518 dremap.perm[i + nelt2] = i + nelt;
43519 }
43520 if (nelt != 4)
43521 {
43522 /* shufpd */
43523 dremap.vmode = V2DImode;
43524 dremap.nelt = 2;
43525 dremap.perm[0] = 1;
43526 dremap.perm[1] = 2;
43527 }
43528 }
43529 else
43530 return false;
43531 }
43532 else
43533 {
43534 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43535 unsigned HOST_WIDE_INT q[8];
43536 unsigned int nonzero_halves[4];
43537
43538 /* Split the two input vectors into 8 quarters. */
43539 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43540 for (i = 1; i < 8; ++i)
43541 q[i] = q[0] << (nelt4 * i);
43542 for (i = 0; i < 4; ++i)
43543 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43544 {
43545 nonzero_halves[nzcnt] = i;
43546 ++nzcnt;
43547 }
43548
43549 if (nzcnt == 1)
43550 {
43551 gcc_assert (d->one_operand_p);
43552 nonzero_halves[1] = nonzero_halves[0];
43553 same_halves = true;
43554 }
43555 else if (d->one_operand_p)
43556 {
43557 gcc_assert (nonzero_halves[0] == 0);
43558 gcc_assert (nonzero_halves[1] == 1);
43559 }
43560
43561 if (nzcnt <= 2)
43562 {
43563 if (d->perm[0] / nelt2 == nonzero_halves[1])
43564 {
43565 /* Attempt to increase the likelihood that dfinal
43566 shuffle will be intra-lane. */
43567 char tmph = nonzero_halves[0];
43568 nonzero_halves[0] = nonzero_halves[1];
43569 nonzero_halves[1] = tmph;
43570 }
43571
43572 /* vperm2f128 or vperm2i128. */
43573 for (i = 0; i < nelt2; ++i)
43574 {
43575 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43576 remap[i + nonzero_halves[0] * nelt2] = i;
43577 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43578 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43579 }
43580
43581 if (d->vmode != V8SFmode
43582 && d->vmode != V4DFmode
43583 && d->vmode != V8SImode)
43584 {
43585 dremap.vmode = V8SImode;
43586 dremap.nelt = 8;
43587 for (i = 0; i < 4; ++i)
43588 {
43589 dremap.perm[i] = i + nonzero_halves[0] * 4;
43590 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43591 }
43592 }
43593 }
43594 else if (d->one_operand_p)
43595 return false;
43596 else if (TARGET_AVX2
43597 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43598 {
43599 /* vpunpckl* */
43600 for (i = 0; i < nelt4; ++i)
43601 {
43602 remap[i] = i * 2;
43603 remap[i + nelt] = i * 2 + 1;
43604 remap[i + nelt2] = i * 2 + nelt2;
43605 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43606 dremap.perm[i * 2] = i;
43607 dremap.perm[i * 2 + 1] = i + nelt;
43608 dremap.perm[i * 2 + nelt2] = i + nelt2;
43609 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43610 }
43611 }
43612 else if (TARGET_AVX2
43613 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43614 {
43615 /* vpunpckh* */
43616 for (i = 0; i < nelt4; ++i)
43617 {
43618 remap[i + nelt4] = i * 2;
43619 remap[i + nelt + nelt4] = i * 2 + 1;
43620 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43621 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43622 dremap.perm[i * 2] = i + nelt4;
43623 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43624 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43625 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43626 }
43627 }
43628 else
43629 return false;
43630 }
43631
43632 /* Use the remapping array set up above to move the elements from their
43633 swizzled locations into their final destinations. */
43634 dfinal = *d;
43635 for (i = 0; i < nelt; ++i)
43636 {
43637 unsigned e = remap[d->perm[i]];
43638 gcc_assert (e < nelt);
43639 /* If same_halves is true, both halves of the remapped vector are the
43640 same. Avoid cross-lane accesses if possible. */
43641 if (same_halves && i >= nelt2)
43642 {
43643 gcc_assert (e < nelt2);
43644 dfinal.perm[i] = e + nelt2;
43645 }
43646 else
43647 dfinal.perm[i] = e;
43648 }
43649 if (!d->testing_p)
43650 {
43651 dremap.target = gen_reg_rtx (dremap.vmode);
43652 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43653 }
43654 dfinal.op1 = dfinal.op0;
43655 dfinal.one_operand_p = true;
43656
43657 /* Test if the final remap can be done with a single insn. For V4SFmode or
43658 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43659 start_sequence ();
43660 ok = expand_vec_perm_1 (&dfinal);
43661 seq = get_insns ();
43662 end_sequence ();
43663
43664 if (!ok)
43665 return false;
43666
43667 if (d->testing_p)
43668 return true;
43669
43670 if (dremap.vmode != dfinal.vmode)
43671 {
43672 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43673 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43674 }
43675
43676 ok = expand_vec_perm_1 (&dremap);
43677 gcc_assert (ok);
43678
43679 emit_insn (seq);
43680 return true;
43681 }
43682
43683 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43684 a single vector cross-lane permutation into vpermq followed
43685 by any of the single insn permutations. */
43686
43687 static bool
43688 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43689 {
43690 struct expand_vec_perm_d dremap, dfinal;
43691 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43692 unsigned contents[2];
43693 bool ok;
43694
43695 if (!(TARGET_AVX2
43696 && (d->vmode == V32QImode || d->vmode == V16HImode)
43697 && d->one_operand_p))
43698 return false;
43699
43700 contents[0] = 0;
43701 contents[1] = 0;
43702 for (i = 0; i < nelt2; ++i)
43703 {
43704 contents[0] |= 1u << (d->perm[i] / nelt4);
43705 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43706 }
43707
43708 for (i = 0; i < 2; ++i)
43709 {
43710 unsigned int cnt = 0;
43711 for (j = 0; j < 4; ++j)
43712 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43713 return false;
43714 }
43715
43716 if (d->testing_p)
43717 return true;
43718
43719 dremap = *d;
43720 dremap.vmode = V4DImode;
43721 dremap.nelt = 4;
43722 dremap.target = gen_reg_rtx (V4DImode);
43723 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43724 dremap.op1 = dremap.op0;
43725 dremap.one_operand_p = true;
43726 for (i = 0; i < 2; ++i)
43727 {
43728 unsigned int cnt = 0;
43729 for (j = 0; j < 4; ++j)
43730 if ((contents[i] & (1u << j)) != 0)
43731 dremap.perm[2 * i + cnt++] = j;
43732 for (; cnt < 2; ++cnt)
43733 dremap.perm[2 * i + cnt] = 0;
43734 }
43735
43736 dfinal = *d;
43737 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43738 dfinal.op1 = dfinal.op0;
43739 dfinal.one_operand_p = true;
43740 for (i = 0, j = 0; i < nelt; ++i)
43741 {
43742 if (i == nelt2)
43743 j = 2;
43744 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43745 if ((d->perm[i] / nelt4) == dremap.perm[j])
43746 ;
43747 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43748 dfinal.perm[i] |= nelt4;
43749 else
43750 gcc_unreachable ();
43751 }
43752
43753 ok = expand_vec_perm_1 (&dremap);
43754 gcc_assert (ok);
43755
43756 ok = expand_vec_perm_1 (&dfinal);
43757 gcc_assert (ok);
43758
43759 return true;
43760 }
43761
43762 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43763 a vector permutation using two instructions, vperm2f128 resp.
43764 vperm2i128 followed by any single in-lane permutation. */
43765
43766 static bool
43767 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43768 {
43769 struct expand_vec_perm_d dfirst, dsecond;
43770 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43771 bool ok;
43772
43773 if (!TARGET_AVX
43774 || GET_MODE_SIZE (d->vmode) != 32
43775 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43776 return false;
43777
43778 dsecond = *d;
43779 dsecond.one_operand_p = false;
43780 dsecond.testing_p = true;
43781
43782 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43783 immediate. For perm < 16 the second permutation uses
43784 d->op0 as first operand, for perm >= 16 it uses d->op1
43785 as first operand. The second operand is the result of
43786 vperm2[fi]128. */
43787 for (perm = 0; perm < 32; perm++)
43788 {
43789 /* Ignore permutations which do not move anything cross-lane. */
43790 if (perm < 16)
43791 {
43792 /* The second shuffle for e.g. V4DFmode has
43793 0123 and ABCD operands.
43794 Ignore AB23, as 23 is already in the second lane
43795 of the first operand. */
43796 if ((perm & 0xc) == (1 << 2)) continue;
43797 /* And 01CD, as 01 is in the first lane of the first
43798 operand. */
43799 if ((perm & 3) == 0) continue;
43800 /* And 4567, as then the vperm2[fi]128 doesn't change
43801 anything on the original 4567 second operand. */
43802 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43803 }
43804 else
43805 {
43806 /* The second shuffle for e.g. V4DFmode has
43807 4567 and ABCD operands.
43808 Ignore AB67, as 67 is already in the second lane
43809 of the first operand. */
43810 if ((perm & 0xc) == (3 << 2)) continue;
43811 /* And 45CD, as 45 is in the first lane of the first
43812 operand. */
43813 if ((perm & 3) == 2) continue;
43814 /* And 0123, as then the vperm2[fi]128 doesn't change
43815 anything on the original 0123 first operand. */
43816 if ((perm & 0xf) == (1 << 2)) continue;
43817 }
43818
43819 for (i = 0; i < nelt; i++)
43820 {
43821 j = d->perm[i] / nelt2;
43822 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43823 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43824 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43825 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43826 else
43827 break;
43828 }
43829
43830 if (i == nelt)
43831 {
43832 start_sequence ();
43833 ok = expand_vec_perm_1 (&dsecond);
43834 end_sequence ();
43835 }
43836 else
43837 ok = false;
43838
43839 if (ok)
43840 {
43841 if (d->testing_p)
43842 return true;
43843
43844 /* Found a usable second shuffle. dfirst will be
43845 vperm2f128 on d->op0 and d->op1. */
43846 dsecond.testing_p = false;
43847 dfirst = *d;
43848 dfirst.target = gen_reg_rtx (d->vmode);
43849 for (i = 0; i < nelt; i++)
43850 dfirst.perm[i] = (i & (nelt2 - 1))
43851 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43852
43853 ok = expand_vec_perm_1 (&dfirst);
43854 gcc_assert (ok);
43855
43856 /* And dsecond is some single insn shuffle, taking
43857 d->op0 and result of vperm2f128 (if perm < 16) or
43858 d->op1 and result of vperm2f128 (otherwise). */
43859 dsecond.op1 = dfirst.target;
43860 if (perm >= 16)
43861 dsecond.op0 = dfirst.op1;
43862
43863 ok = expand_vec_perm_1 (&dsecond);
43864 gcc_assert (ok);
43865
43866 return true;
43867 }
43868
43869 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43870 if (d->one_operand_p)
43871 return false;
43872 }
43873
43874 return false;
43875 }
43876
43877 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43878 a two vector permutation using 2 intra-lane interleave insns
43879 and cross-lane shuffle for 32-byte vectors. */
43880
43881 static bool
43882 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43883 {
43884 unsigned i, nelt;
43885 rtx (*gen) (rtx, rtx, rtx);
43886
43887 if (d->one_operand_p)
43888 return false;
43889 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43890 ;
43891 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43892 ;
43893 else
43894 return false;
43895
43896 nelt = d->nelt;
43897 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43898 return false;
43899 for (i = 0; i < nelt; i += 2)
43900 if (d->perm[i] != d->perm[0] + i / 2
43901 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43902 return false;
43903
43904 if (d->testing_p)
43905 return true;
43906
43907 switch (d->vmode)
43908 {
43909 case V32QImode:
43910 if (d->perm[0])
43911 gen = gen_vec_interleave_highv32qi;
43912 else
43913 gen = gen_vec_interleave_lowv32qi;
43914 break;
43915 case V16HImode:
43916 if (d->perm[0])
43917 gen = gen_vec_interleave_highv16hi;
43918 else
43919 gen = gen_vec_interleave_lowv16hi;
43920 break;
43921 case V8SImode:
43922 if (d->perm[0])
43923 gen = gen_vec_interleave_highv8si;
43924 else
43925 gen = gen_vec_interleave_lowv8si;
43926 break;
43927 case V4DImode:
43928 if (d->perm[0])
43929 gen = gen_vec_interleave_highv4di;
43930 else
43931 gen = gen_vec_interleave_lowv4di;
43932 break;
43933 case V8SFmode:
43934 if (d->perm[0])
43935 gen = gen_vec_interleave_highv8sf;
43936 else
43937 gen = gen_vec_interleave_lowv8sf;
43938 break;
43939 case V4DFmode:
43940 if (d->perm[0])
43941 gen = gen_vec_interleave_highv4df;
43942 else
43943 gen = gen_vec_interleave_lowv4df;
43944 break;
43945 default:
43946 gcc_unreachable ();
43947 }
43948
43949 emit_insn (gen (d->target, d->op0, d->op1));
43950 return true;
43951 }
43952
43953 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43954 a single vector permutation using a single intra-lane vector
43955 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43956 the non-swapped and swapped vectors together. */
43957
43958 static bool
43959 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43960 {
43961 struct expand_vec_perm_d dfirst, dsecond;
43962 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43963 rtx_insn *seq;
43964 bool ok;
43965 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43966
43967 if (!TARGET_AVX
43968 || TARGET_AVX2
43969 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43970 || !d->one_operand_p)
43971 return false;
43972
43973 dfirst = *d;
43974 for (i = 0; i < nelt; i++)
43975 dfirst.perm[i] = 0xff;
43976 for (i = 0, msk = 0; i < nelt; i++)
43977 {
43978 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43979 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43980 return false;
43981 dfirst.perm[j] = d->perm[i];
43982 if (j != i)
43983 msk |= (1 << i);
43984 }
43985 for (i = 0; i < nelt; i++)
43986 if (dfirst.perm[i] == 0xff)
43987 dfirst.perm[i] = i;
43988
43989 if (!d->testing_p)
43990 dfirst.target = gen_reg_rtx (dfirst.vmode);
43991
43992 start_sequence ();
43993 ok = expand_vec_perm_1 (&dfirst);
43994 seq = get_insns ();
43995 end_sequence ();
43996
43997 if (!ok)
43998 return false;
43999
44000 if (d->testing_p)
44001 return true;
44002
44003 emit_insn (seq);
44004
44005 dsecond = *d;
44006 dsecond.op0 = dfirst.target;
44007 dsecond.op1 = dfirst.target;
44008 dsecond.one_operand_p = true;
44009 dsecond.target = gen_reg_rtx (dsecond.vmode);
44010 for (i = 0; i < nelt; i++)
44011 dsecond.perm[i] = i ^ nelt2;
44012
44013 ok = expand_vec_perm_1 (&dsecond);
44014 gcc_assert (ok);
44015
44016 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
44017 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
44018 return true;
44019 }
44020
44021 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44022 permutation using two vperm2f128, followed by a vshufpd insn blending
44023 the two vectors together. */
44024
44025 static bool
44026 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
44027 {
44028 struct expand_vec_perm_d dfirst, dsecond, dthird;
44029 bool ok;
44030
44031 if (!TARGET_AVX || (d->vmode != V4DFmode))
44032 return false;
44033
44034 if (d->testing_p)
44035 return true;
44036
44037 dfirst = *d;
44038 dsecond = *d;
44039 dthird = *d;
44040
44041 dfirst.perm[0] = (d->perm[0] & ~1);
44042 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44043 dfirst.perm[2] = (d->perm[2] & ~1);
44044 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44045 dsecond.perm[0] = (d->perm[1] & ~1);
44046 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44047 dsecond.perm[2] = (d->perm[3] & ~1);
44048 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44049 dthird.perm[0] = (d->perm[0] % 2);
44050 dthird.perm[1] = (d->perm[1] % 2) + 4;
44051 dthird.perm[2] = (d->perm[2] % 2) + 2;
44052 dthird.perm[3] = (d->perm[3] % 2) + 6;
44053
44054 dfirst.target = gen_reg_rtx (dfirst.vmode);
44055 dsecond.target = gen_reg_rtx (dsecond.vmode);
44056 dthird.op0 = dfirst.target;
44057 dthird.op1 = dsecond.target;
44058 dthird.one_operand_p = false;
44059
44060 canonicalize_perm (&dfirst);
44061 canonicalize_perm (&dsecond);
44062
44063 ok = expand_vec_perm_1 (&dfirst)
44064 && expand_vec_perm_1 (&dsecond)
44065 && expand_vec_perm_1 (&dthird);
44066
44067 gcc_assert (ok);
44068
44069 return true;
44070 }
44071
44072 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44073 permutation with two pshufb insns and an ior. We should have already
44074 failed all two instruction sequences. */
44075
44076 static bool
44077 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44078 {
44079 rtx rperm[2][16], vperm, l, h, op, m128;
44080 unsigned int i, nelt, eltsz;
44081
44082 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44083 return false;
44084 gcc_assert (!d->one_operand_p);
44085
44086 if (d->testing_p)
44087 return true;
44088
44089 nelt = d->nelt;
44090 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44091
44092 /* Generate two permutation masks. If the required element is within
44093 the given vector it is shuffled into the proper lane. If the required
44094 element is in the other vector, force a zero into the lane by setting
44095 bit 7 in the permutation mask. */
44096 m128 = GEN_INT (-128);
44097 for (i = 0; i < nelt; ++i)
44098 {
44099 unsigned j, e = d->perm[i];
44100 unsigned which = (e >= nelt);
44101 if (e >= nelt)
44102 e -= nelt;
44103
44104 for (j = 0; j < eltsz; ++j)
44105 {
44106 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44107 rperm[1-which][i*eltsz + j] = m128;
44108 }
44109 }
44110
44111 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44112 vperm = force_reg (V16QImode, vperm);
44113
44114 l = gen_reg_rtx (V16QImode);
44115 op = gen_lowpart (V16QImode, d->op0);
44116 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44117
44118 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44119 vperm = force_reg (V16QImode, vperm);
44120
44121 h = gen_reg_rtx (V16QImode);
44122 op = gen_lowpart (V16QImode, d->op1);
44123 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44124
44125 op = d->target;
44126 if (d->vmode != V16QImode)
44127 op = gen_reg_rtx (V16QImode);
44128 emit_insn (gen_iorv16qi3 (op, l, h));
44129 if (op != d->target)
44130 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44131
44132 return true;
44133 }
44134
44135 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44136 with two vpshufb insns, vpermq and vpor. We should have already failed
44137 all two or three instruction sequences. */
44138
44139 static bool
44140 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44141 {
44142 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44143 unsigned int i, nelt, eltsz;
44144
44145 if (!TARGET_AVX2
44146 || !d->one_operand_p
44147 || (d->vmode != V32QImode && d->vmode != V16HImode))
44148 return false;
44149
44150 if (d->testing_p)
44151 return true;
44152
44153 nelt = d->nelt;
44154 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44155
44156 /* Generate two permutation masks. If the required element is within
44157 the same lane, it is shuffled in. If the required element from the
44158 other lane, force a zero by setting bit 7 in the permutation mask.
44159 In the other mask the mask has non-negative elements if element
44160 is requested from the other lane, but also moved to the other lane,
44161 so that the result of vpshufb can have the two V2TImode halves
44162 swapped. */
44163 m128 = GEN_INT (-128);
44164 for (i = 0; i < nelt; ++i)
44165 {
44166 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44167 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44168
44169 for (j = 0; j < eltsz; ++j)
44170 {
44171 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44172 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44173 }
44174 }
44175
44176 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44177 vperm = force_reg (V32QImode, vperm);
44178
44179 h = gen_reg_rtx (V32QImode);
44180 op = gen_lowpart (V32QImode, d->op0);
44181 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44182
44183 /* Swap the 128-byte lanes of h into hp. */
44184 hp = gen_reg_rtx (V4DImode);
44185 op = gen_lowpart (V4DImode, h);
44186 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44187 const1_rtx));
44188
44189 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44190 vperm = force_reg (V32QImode, vperm);
44191
44192 l = gen_reg_rtx (V32QImode);
44193 op = gen_lowpart (V32QImode, d->op0);
44194 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44195
44196 op = d->target;
44197 if (d->vmode != V32QImode)
44198 op = gen_reg_rtx (V32QImode);
44199 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44200 if (op != d->target)
44201 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44202
44203 return true;
44204 }
44205
44206 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44207 and extract-odd permutations of two V32QImode and V16QImode operand
44208 with two vpshufb insns, vpor and vpermq. We should have already
44209 failed all two or three instruction sequences. */
44210
44211 static bool
44212 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44213 {
44214 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44215 unsigned int i, nelt, eltsz;
44216
44217 if (!TARGET_AVX2
44218 || d->one_operand_p
44219 || (d->vmode != V32QImode && d->vmode != V16HImode))
44220 return false;
44221
44222 for (i = 0; i < d->nelt; ++i)
44223 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44224 return false;
44225
44226 if (d->testing_p)
44227 return true;
44228
44229 nelt = d->nelt;
44230 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44231
44232 /* Generate two permutation masks. In the first permutation mask
44233 the first quarter will contain indexes for the first half
44234 of the op0, the second quarter will contain bit 7 set, third quarter
44235 will contain indexes for the second half of the op0 and the
44236 last quarter bit 7 set. In the second permutation mask
44237 the first quarter will contain bit 7 set, the second quarter
44238 indexes for the first half of the op1, the third quarter bit 7 set
44239 and last quarter indexes for the second half of the op1.
44240 I.e. the first mask e.g. for V32QImode extract even will be:
44241 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44242 (all values masked with 0xf except for -128) and second mask
44243 for extract even will be
44244 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44245 m128 = GEN_INT (-128);
44246 for (i = 0; i < nelt; ++i)
44247 {
44248 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44249 unsigned which = d->perm[i] >= nelt;
44250 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44251
44252 for (j = 0; j < eltsz; ++j)
44253 {
44254 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44255 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44256 }
44257 }
44258
44259 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44260 vperm = force_reg (V32QImode, vperm);
44261
44262 l = gen_reg_rtx (V32QImode);
44263 op = gen_lowpart (V32QImode, d->op0);
44264 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44265
44266 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44267 vperm = force_reg (V32QImode, vperm);
44268
44269 h = gen_reg_rtx (V32QImode);
44270 op = gen_lowpart (V32QImode, d->op1);
44271 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44272
44273 ior = gen_reg_rtx (V32QImode);
44274 emit_insn (gen_iorv32qi3 (ior, l, h));
44275
44276 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44277 op = gen_reg_rtx (V4DImode);
44278 ior = gen_lowpart (V4DImode, ior);
44279 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44280 const1_rtx, GEN_INT (3)));
44281 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44282
44283 return true;
44284 }
44285
44286 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44287 and extract-odd permutations. */
44288
44289 static bool
44290 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44291 {
44292 rtx t1, t2, t3, t4, t5;
44293
44294 switch (d->vmode)
44295 {
44296 case V4DFmode:
44297 if (d->testing_p)
44298 break;
44299 t1 = gen_reg_rtx (V4DFmode);
44300 t2 = gen_reg_rtx (V4DFmode);
44301
44302 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44303 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44304 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44305
44306 /* Now an unpck[lh]pd will produce the result required. */
44307 if (odd)
44308 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44309 else
44310 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44311 emit_insn (t3);
44312 break;
44313
44314 case V8SFmode:
44315 {
44316 int mask = odd ? 0xdd : 0x88;
44317
44318 if (d->testing_p)
44319 break;
44320 t1 = gen_reg_rtx (V8SFmode);
44321 t2 = gen_reg_rtx (V8SFmode);
44322 t3 = gen_reg_rtx (V8SFmode);
44323
44324 /* Shuffle within the 128-bit lanes to produce:
44325 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44326 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44327 GEN_INT (mask)));
44328
44329 /* Shuffle the lanes around to produce:
44330 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44331 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44332 GEN_INT (0x3)));
44333
44334 /* Shuffle within the 128-bit lanes to produce:
44335 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44336 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44337
44338 /* Shuffle within the 128-bit lanes to produce:
44339 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44340 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44341
44342 /* Shuffle the lanes around to produce:
44343 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44344 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44345 GEN_INT (0x20)));
44346 }
44347 break;
44348
44349 case V2DFmode:
44350 case V4SFmode:
44351 case V2DImode:
44352 case V4SImode:
44353 /* These are always directly implementable by expand_vec_perm_1. */
44354 gcc_unreachable ();
44355
44356 case V8HImode:
44357 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44358 return expand_vec_perm_pshufb2 (d);
44359 else
44360 {
44361 if (d->testing_p)
44362 break;
44363 /* We need 2*log2(N)-1 operations to achieve odd/even
44364 with interleave. */
44365 t1 = gen_reg_rtx (V8HImode);
44366 t2 = gen_reg_rtx (V8HImode);
44367 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44368 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44369 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44370 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44371 if (odd)
44372 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44373 else
44374 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44375 emit_insn (t3);
44376 }
44377 break;
44378
44379 case V16QImode:
44380 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44381 return expand_vec_perm_pshufb2 (d);
44382 else
44383 {
44384 if (d->testing_p)
44385 break;
44386 t1 = gen_reg_rtx (V16QImode);
44387 t2 = gen_reg_rtx (V16QImode);
44388 t3 = gen_reg_rtx (V16QImode);
44389 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44390 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44391 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44392 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44393 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44394 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44395 if (odd)
44396 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44397 else
44398 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44399 emit_insn (t3);
44400 }
44401 break;
44402
44403 case V16HImode:
44404 case V32QImode:
44405 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44406
44407 case V4DImode:
44408 if (!TARGET_AVX2)
44409 {
44410 struct expand_vec_perm_d d_copy = *d;
44411 d_copy.vmode = V4DFmode;
44412 if (d->testing_p)
44413 d_copy.target = gen_lowpart (V4DFmode, d->target);
44414 else
44415 d_copy.target = gen_reg_rtx (V4DFmode);
44416 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44417 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44418 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44419 {
44420 if (!d->testing_p)
44421 emit_move_insn (d->target,
44422 gen_lowpart (V4DImode, d_copy.target));
44423 return true;
44424 }
44425 return false;
44426 }
44427
44428 if (d->testing_p)
44429 break;
44430
44431 t1 = gen_reg_rtx (V4DImode);
44432 t2 = gen_reg_rtx (V4DImode);
44433
44434 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44435 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44436 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44437
44438 /* Now an vpunpck[lh]qdq will produce the result required. */
44439 if (odd)
44440 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44441 else
44442 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44443 emit_insn (t3);
44444 break;
44445
44446 case V8SImode:
44447 if (!TARGET_AVX2)
44448 {
44449 struct expand_vec_perm_d d_copy = *d;
44450 d_copy.vmode = V8SFmode;
44451 if (d->testing_p)
44452 d_copy.target = gen_lowpart (V8SFmode, d->target);
44453 else
44454 d_copy.target = gen_reg_rtx (V8SFmode);
44455 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44456 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44457 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44458 {
44459 if (!d->testing_p)
44460 emit_move_insn (d->target,
44461 gen_lowpart (V8SImode, d_copy.target));
44462 return true;
44463 }
44464 return false;
44465 }
44466
44467 if (d->testing_p)
44468 break;
44469
44470 t1 = gen_reg_rtx (V8SImode);
44471 t2 = gen_reg_rtx (V8SImode);
44472 t3 = gen_reg_rtx (V4DImode);
44473 t4 = gen_reg_rtx (V4DImode);
44474 t5 = gen_reg_rtx (V4DImode);
44475
44476 /* Shuffle the lanes around into
44477 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44478 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44479 gen_lowpart (V4DImode, d->op1),
44480 GEN_INT (0x20)));
44481 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44482 gen_lowpart (V4DImode, d->op1),
44483 GEN_INT (0x31)));
44484
44485 /* Swap the 2nd and 3rd position in each lane into
44486 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44487 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44488 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44489 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44490 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44491
44492 /* Now an vpunpck[lh]qdq will produce
44493 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44494 if (odd)
44495 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44496 gen_lowpart (V4DImode, t2));
44497 else
44498 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44499 gen_lowpart (V4DImode, t2));
44500 emit_insn (t3);
44501 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44502 break;
44503
44504 default:
44505 gcc_unreachable ();
44506 }
44507
44508 return true;
44509 }
44510
44511 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44512 extract-even and extract-odd permutations. */
44513
44514 static bool
44515 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44516 {
44517 unsigned i, odd, nelt = d->nelt;
44518
44519 odd = d->perm[0];
44520 if (odd != 0 && odd != 1)
44521 return false;
44522
44523 for (i = 1; i < nelt; ++i)
44524 if (d->perm[i] != 2 * i + odd)
44525 return false;
44526
44527 return expand_vec_perm_even_odd_1 (d, odd);
44528 }
44529
44530 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44531 permutations. We assume that expand_vec_perm_1 has already failed. */
44532
44533 static bool
44534 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44535 {
44536 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44537 enum machine_mode vmode = d->vmode;
44538 unsigned char perm2[4];
44539 rtx op0 = d->op0, dest;
44540 bool ok;
44541
44542 switch (vmode)
44543 {
44544 case V4DFmode:
44545 case V8SFmode:
44546 /* These are special-cased in sse.md so that we can optionally
44547 use the vbroadcast instruction. They expand to two insns
44548 if the input happens to be in a register. */
44549 gcc_unreachable ();
44550
44551 case V2DFmode:
44552 case V2DImode:
44553 case V4SFmode:
44554 case V4SImode:
44555 /* These are always implementable using standard shuffle patterns. */
44556 gcc_unreachable ();
44557
44558 case V8HImode:
44559 case V16QImode:
44560 /* These can be implemented via interleave. We save one insn by
44561 stopping once we have promoted to V4SImode and then use pshufd. */
44562 if (d->testing_p)
44563 return true;
44564 do
44565 {
44566 rtx dest;
44567 rtx (*gen) (rtx, rtx, rtx)
44568 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44569 : gen_vec_interleave_lowv8hi;
44570
44571 if (elt >= nelt2)
44572 {
44573 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44574 : gen_vec_interleave_highv8hi;
44575 elt -= nelt2;
44576 }
44577 nelt2 /= 2;
44578
44579 dest = gen_reg_rtx (vmode);
44580 emit_insn (gen (dest, op0, op0));
44581 vmode = get_mode_wider_vector (vmode);
44582 op0 = gen_lowpart (vmode, dest);
44583 }
44584 while (vmode != V4SImode);
44585
44586 memset (perm2, elt, 4);
44587 dest = gen_reg_rtx (V4SImode);
44588 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44589 gcc_assert (ok);
44590 if (!d->testing_p)
44591 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44592 return true;
44593
44594 case V32QImode:
44595 case V16HImode:
44596 case V8SImode:
44597 case V4DImode:
44598 /* For AVX2 broadcasts of the first element vpbroadcast* or
44599 vpermq should be used by expand_vec_perm_1. */
44600 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44601 return false;
44602
44603 default:
44604 gcc_unreachable ();
44605 }
44606 }
44607
44608 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44609 broadcast permutations. */
44610
44611 static bool
44612 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44613 {
44614 unsigned i, elt, nelt = d->nelt;
44615
44616 if (!d->one_operand_p)
44617 return false;
44618
44619 elt = d->perm[0];
44620 for (i = 1; i < nelt; ++i)
44621 if (d->perm[i] != elt)
44622 return false;
44623
44624 return expand_vec_perm_broadcast_1 (d);
44625 }
44626
44627 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44628 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44629 all the shorter instruction sequences. */
44630
44631 static bool
44632 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44633 {
44634 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44635 unsigned int i, nelt, eltsz;
44636 bool used[4];
44637
44638 if (!TARGET_AVX2
44639 || d->one_operand_p
44640 || (d->vmode != V32QImode && d->vmode != V16HImode))
44641 return false;
44642
44643 if (d->testing_p)
44644 return true;
44645
44646 nelt = d->nelt;
44647 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44648
44649 /* Generate 4 permutation masks. If the required element is within
44650 the same lane, it is shuffled in. If the required element from the
44651 other lane, force a zero by setting bit 7 in the permutation mask.
44652 In the other mask the mask has non-negative elements if element
44653 is requested from the other lane, but also moved to the other lane,
44654 so that the result of vpshufb can have the two V2TImode halves
44655 swapped. */
44656 m128 = GEN_INT (-128);
44657 for (i = 0; i < 32; ++i)
44658 {
44659 rperm[0][i] = m128;
44660 rperm[1][i] = m128;
44661 rperm[2][i] = m128;
44662 rperm[3][i] = m128;
44663 }
44664 used[0] = false;
44665 used[1] = false;
44666 used[2] = false;
44667 used[3] = false;
44668 for (i = 0; i < nelt; ++i)
44669 {
44670 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44671 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44672 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44673
44674 for (j = 0; j < eltsz; ++j)
44675 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44676 used[which] = true;
44677 }
44678
44679 for (i = 0; i < 2; ++i)
44680 {
44681 if (!used[2 * i + 1])
44682 {
44683 h[i] = NULL_RTX;
44684 continue;
44685 }
44686 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44687 gen_rtvec_v (32, rperm[2 * i + 1]));
44688 vperm = force_reg (V32QImode, vperm);
44689 h[i] = gen_reg_rtx (V32QImode);
44690 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44691 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44692 }
44693
44694 /* Swap the 128-byte lanes of h[X]. */
44695 for (i = 0; i < 2; ++i)
44696 {
44697 if (h[i] == NULL_RTX)
44698 continue;
44699 op = gen_reg_rtx (V4DImode);
44700 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44701 const2_rtx, GEN_INT (3), const0_rtx,
44702 const1_rtx));
44703 h[i] = gen_lowpart (V32QImode, op);
44704 }
44705
44706 for (i = 0; i < 2; ++i)
44707 {
44708 if (!used[2 * i])
44709 {
44710 l[i] = NULL_RTX;
44711 continue;
44712 }
44713 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44714 vperm = force_reg (V32QImode, vperm);
44715 l[i] = gen_reg_rtx (V32QImode);
44716 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44717 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44718 }
44719
44720 for (i = 0; i < 2; ++i)
44721 {
44722 if (h[i] && l[i])
44723 {
44724 op = gen_reg_rtx (V32QImode);
44725 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44726 l[i] = op;
44727 }
44728 else if (h[i])
44729 l[i] = h[i];
44730 }
44731
44732 gcc_assert (l[0] && l[1]);
44733 op = d->target;
44734 if (d->vmode != V32QImode)
44735 op = gen_reg_rtx (V32QImode);
44736 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44737 if (op != d->target)
44738 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44739 return true;
44740 }
44741
44742 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44743 With all of the interface bits taken care of, perform the expansion
44744 in D and return true on success. */
44745
44746 static bool
44747 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44748 {
44749 /* Try a single instruction expansion. */
44750 if (expand_vec_perm_1 (d))
44751 return true;
44752
44753 /* Try sequences of two instructions. */
44754
44755 if (expand_vec_perm_pshuflw_pshufhw (d))
44756 return true;
44757
44758 if (expand_vec_perm_palignr (d))
44759 return true;
44760
44761 if (expand_vec_perm_interleave2 (d))
44762 return true;
44763
44764 if (expand_vec_perm_broadcast (d))
44765 return true;
44766
44767 if (expand_vec_perm_vpermq_perm_1 (d))
44768 return true;
44769
44770 if (expand_vec_perm_vperm2f128 (d))
44771 return true;
44772
44773 if (expand_vec_perm_pblendv (d))
44774 return true;
44775
44776 /* Try sequences of three instructions. */
44777
44778 if (expand_vec_perm_2vperm2f128_vshuf (d))
44779 return true;
44780
44781 if (expand_vec_perm_pshufb2 (d))
44782 return true;
44783
44784 if (expand_vec_perm_interleave3 (d))
44785 return true;
44786
44787 if (expand_vec_perm_vperm2f128_vblend (d))
44788 return true;
44789
44790 /* Try sequences of four instructions. */
44791
44792 if (expand_vec_perm_vpshufb2_vpermq (d))
44793 return true;
44794
44795 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44796 return true;
44797
44798 /* ??? Look for narrow permutations whose element orderings would
44799 allow the promotion to a wider mode. */
44800
44801 /* ??? Look for sequences of interleave or a wider permute that place
44802 the data into the correct lanes for a half-vector shuffle like
44803 pshuf[lh]w or vpermilps. */
44804
44805 /* ??? Look for sequences of interleave that produce the desired results.
44806 The combinatorics of punpck[lh] get pretty ugly... */
44807
44808 if (expand_vec_perm_even_odd (d))
44809 return true;
44810
44811 /* Even longer sequences. */
44812 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44813 return true;
44814
44815 return false;
44816 }
44817
44818 /* If a permutation only uses one operand, make it clear. Returns true
44819 if the permutation references both operands. */
44820
44821 static bool
44822 canonicalize_perm (struct expand_vec_perm_d *d)
44823 {
44824 int i, which, nelt = d->nelt;
44825
44826 for (i = which = 0; i < nelt; ++i)
44827 which |= (d->perm[i] < nelt ? 1 : 2);
44828
44829 d->one_operand_p = true;
44830 switch (which)
44831 {
44832 default:
44833 gcc_unreachable();
44834
44835 case 3:
44836 if (!rtx_equal_p (d->op0, d->op1))
44837 {
44838 d->one_operand_p = false;
44839 break;
44840 }
44841 /* The elements of PERM do not suggest that only the first operand
44842 is used, but both operands are identical. Allow easier matching
44843 of the permutation by folding the permutation into the single
44844 input vector. */
44845 /* FALLTHRU */
44846
44847 case 2:
44848 for (i = 0; i < nelt; ++i)
44849 d->perm[i] &= nelt - 1;
44850 d->op0 = d->op1;
44851 break;
44852
44853 case 1:
44854 d->op1 = d->op0;
44855 break;
44856 }
44857
44858 return (which == 3);
44859 }
44860
44861 bool
44862 ix86_expand_vec_perm_const (rtx operands[4])
44863 {
44864 struct expand_vec_perm_d d;
44865 unsigned char perm[MAX_VECT_LEN];
44866 int i, nelt;
44867 bool two_args;
44868 rtx sel;
44869
44870 d.target = operands[0];
44871 d.op0 = operands[1];
44872 d.op1 = operands[2];
44873 sel = operands[3];
44874
44875 d.vmode = GET_MODE (d.target);
44876 gcc_assert (VECTOR_MODE_P (d.vmode));
44877 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44878 d.testing_p = false;
44879
44880 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44881 gcc_assert (XVECLEN (sel, 0) == nelt);
44882 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44883
44884 for (i = 0; i < nelt; ++i)
44885 {
44886 rtx e = XVECEXP (sel, 0, i);
44887 int ei = INTVAL (e) & (2 * nelt - 1);
44888 d.perm[i] = ei;
44889 perm[i] = ei;
44890 }
44891
44892 two_args = canonicalize_perm (&d);
44893
44894 if (ix86_expand_vec_perm_const_1 (&d))
44895 return true;
44896
44897 /* If the selector says both arguments are needed, but the operands are the
44898 same, the above tried to expand with one_operand_p and flattened selector.
44899 If that didn't work, retry without one_operand_p; we succeeded with that
44900 during testing. */
44901 if (two_args && d.one_operand_p)
44902 {
44903 d.one_operand_p = false;
44904 memcpy (d.perm, perm, sizeof (perm));
44905 return ix86_expand_vec_perm_const_1 (&d);
44906 }
44907
44908 return false;
44909 }
44910
44911 /* Implement targetm.vectorize.vec_perm_const_ok. */
44912
44913 static bool
44914 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44915 const unsigned char *sel)
44916 {
44917 struct expand_vec_perm_d d;
44918 unsigned int i, nelt, which;
44919 bool ret;
44920
44921 d.vmode = vmode;
44922 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44923 d.testing_p = true;
44924
44925 /* Given sufficient ISA support we can just return true here
44926 for selected vector modes. */
44927 if (d.vmode == V16SImode || d.vmode == V16SFmode
44928 || d.vmode == V8DFmode || d.vmode == V8DImode)
44929 /* All implementable with a single vpermi2 insn. */
44930 return true;
44931 if (GET_MODE_SIZE (d.vmode) == 16)
44932 {
44933 /* All implementable with a single vpperm insn. */
44934 if (TARGET_XOP)
44935 return true;
44936 /* All implementable with 2 pshufb + 1 ior. */
44937 if (TARGET_SSSE3)
44938 return true;
44939 /* All implementable with shufpd or unpck[lh]pd. */
44940 if (d.nelt == 2)
44941 return true;
44942 }
44943
44944 /* Extract the values from the vector CST into the permutation
44945 array in D. */
44946 memcpy (d.perm, sel, nelt);
44947 for (i = which = 0; i < nelt; ++i)
44948 {
44949 unsigned char e = d.perm[i];
44950 gcc_assert (e < 2 * nelt);
44951 which |= (e < nelt ? 1 : 2);
44952 }
44953
44954 /* For all elements from second vector, fold the elements to first. */
44955 if (which == 2)
44956 for (i = 0; i < nelt; ++i)
44957 d.perm[i] -= nelt;
44958
44959 /* Check whether the mask can be applied to the vector type. */
44960 d.one_operand_p = (which != 3);
44961
44962 /* Implementable with shufps or pshufd. */
44963 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44964 return true;
44965
44966 /* Otherwise we have to go through the motions and see if we can
44967 figure out how to generate the requested permutation. */
44968 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44969 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44970 if (!d.one_operand_p)
44971 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44972
44973 start_sequence ();
44974 ret = ix86_expand_vec_perm_const_1 (&d);
44975 end_sequence ();
44976
44977 return ret;
44978 }
44979
44980 void
44981 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44982 {
44983 struct expand_vec_perm_d d;
44984 unsigned i, nelt;
44985
44986 d.target = targ;
44987 d.op0 = op0;
44988 d.op1 = op1;
44989 d.vmode = GET_MODE (targ);
44990 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44991 d.one_operand_p = false;
44992 d.testing_p = false;
44993
44994 for (i = 0; i < nelt; ++i)
44995 d.perm[i] = i * 2 + odd;
44996
44997 /* We'll either be able to implement the permutation directly... */
44998 if (expand_vec_perm_1 (&d))
44999 return;
45000
45001 /* ... or we use the special-case patterns. */
45002 expand_vec_perm_even_odd_1 (&d, odd);
45003 }
45004
45005 static void
45006 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
45007 {
45008 struct expand_vec_perm_d d;
45009 unsigned i, nelt, base;
45010 bool ok;
45011
45012 d.target = targ;
45013 d.op0 = op0;
45014 d.op1 = op1;
45015 d.vmode = GET_MODE (targ);
45016 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45017 d.one_operand_p = false;
45018 d.testing_p = false;
45019
45020 base = high_p ? nelt / 2 : 0;
45021 for (i = 0; i < nelt / 2; ++i)
45022 {
45023 d.perm[i * 2] = i + base;
45024 d.perm[i * 2 + 1] = i + base + nelt;
45025 }
45026
45027 /* Note that for AVX this isn't one instruction. */
45028 ok = ix86_expand_vec_perm_const_1 (&d);
45029 gcc_assert (ok);
45030 }
45031
45032
45033 /* Expand a vector operation CODE for a V*QImode in terms of the
45034 same operation on V*HImode. */
45035
45036 void
45037 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45038 {
45039 enum machine_mode qimode = GET_MODE (dest);
45040 enum machine_mode himode;
45041 rtx (*gen_il) (rtx, rtx, rtx);
45042 rtx (*gen_ih) (rtx, rtx, rtx);
45043 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45044 struct expand_vec_perm_d d;
45045 bool ok, full_interleave;
45046 bool uns_p = false;
45047 int i;
45048
45049 switch (qimode)
45050 {
45051 case V16QImode:
45052 himode = V8HImode;
45053 gen_il = gen_vec_interleave_lowv16qi;
45054 gen_ih = gen_vec_interleave_highv16qi;
45055 break;
45056 case V32QImode:
45057 himode = V16HImode;
45058 gen_il = gen_avx2_interleave_lowv32qi;
45059 gen_ih = gen_avx2_interleave_highv32qi;
45060 break;
45061 default:
45062 gcc_unreachable ();
45063 }
45064
45065 op2_l = op2_h = op2;
45066 switch (code)
45067 {
45068 case MULT:
45069 /* Unpack data such that we've got a source byte in each low byte of
45070 each word. We don't care what goes into the high byte of each word.
45071 Rather than trying to get zero in there, most convenient is to let
45072 it be a copy of the low byte. */
45073 op2_l = gen_reg_rtx (qimode);
45074 op2_h = gen_reg_rtx (qimode);
45075 emit_insn (gen_il (op2_l, op2, op2));
45076 emit_insn (gen_ih (op2_h, op2, op2));
45077 /* FALLTHRU */
45078
45079 op1_l = gen_reg_rtx (qimode);
45080 op1_h = gen_reg_rtx (qimode);
45081 emit_insn (gen_il (op1_l, op1, op1));
45082 emit_insn (gen_ih (op1_h, op1, op1));
45083 full_interleave = qimode == V16QImode;
45084 break;
45085
45086 case ASHIFT:
45087 case LSHIFTRT:
45088 uns_p = true;
45089 /* FALLTHRU */
45090 case ASHIFTRT:
45091 op1_l = gen_reg_rtx (himode);
45092 op1_h = gen_reg_rtx (himode);
45093 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45094 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45095 full_interleave = true;
45096 break;
45097 default:
45098 gcc_unreachable ();
45099 }
45100
45101 /* Perform the operation. */
45102 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45103 1, OPTAB_DIRECT);
45104 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45105 1, OPTAB_DIRECT);
45106 gcc_assert (res_l && res_h);
45107
45108 /* Merge the data back into the right place. */
45109 d.target = dest;
45110 d.op0 = gen_lowpart (qimode, res_l);
45111 d.op1 = gen_lowpart (qimode, res_h);
45112 d.vmode = qimode;
45113 d.nelt = GET_MODE_NUNITS (qimode);
45114 d.one_operand_p = false;
45115 d.testing_p = false;
45116
45117 if (full_interleave)
45118 {
45119 /* For SSE2, we used an full interleave, so the desired
45120 results are in the even elements. */
45121 for (i = 0; i < 32; ++i)
45122 d.perm[i] = i * 2;
45123 }
45124 else
45125 {
45126 /* For AVX, the interleave used above was not cross-lane. So the
45127 extraction is evens but with the second and third quarter swapped.
45128 Happily, that is even one insn shorter than even extraction. */
45129 for (i = 0; i < 32; ++i)
45130 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45131 }
45132
45133 ok = ix86_expand_vec_perm_const_1 (&d);
45134 gcc_assert (ok);
45135
45136 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45137 gen_rtx_fmt_ee (code, qimode, op1, op2));
45138 }
45139
45140 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45141 if op is CONST_VECTOR with all odd elements equal to their
45142 preceding element. */
45143
45144 static bool
45145 const_vector_equal_evenodd_p (rtx op)
45146 {
45147 enum machine_mode mode = GET_MODE (op);
45148 int i, nunits = GET_MODE_NUNITS (mode);
45149 if (GET_CODE (op) != CONST_VECTOR
45150 || nunits != CONST_VECTOR_NUNITS (op))
45151 return false;
45152 for (i = 0; i < nunits; i += 2)
45153 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45154 return false;
45155 return true;
45156 }
45157
45158 void
45159 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45160 bool uns_p, bool odd_p)
45161 {
45162 enum machine_mode mode = GET_MODE (op1);
45163 enum machine_mode wmode = GET_MODE (dest);
45164 rtx x;
45165 rtx orig_op1 = op1, orig_op2 = op2;
45166
45167 if (!nonimmediate_operand (op1, mode))
45168 op1 = force_reg (mode, op1);
45169 if (!nonimmediate_operand (op2, mode))
45170 op2 = force_reg (mode, op2);
45171
45172 /* We only play even/odd games with vectors of SImode. */
45173 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45174
45175 /* If we're looking for the odd results, shift those members down to
45176 the even slots. For some cpus this is faster than a PSHUFD. */
45177 if (odd_p)
45178 {
45179 /* For XOP use vpmacsdqh, but only for smult, as it is only
45180 signed. */
45181 if (TARGET_XOP && mode == V4SImode && !uns_p)
45182 {
45183 x = force_reg (wmode, CONST0_RTX (wmode));
45184 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45185 return;
45186 }
45187
45188 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45189 if (!const_vector_equal_evenodd_p (orig_op1))
45190 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45191 x, NULL, 1, OPTAB_DIRECT);
45192 if (!const_vector_equal_evenodd_p (orig_op2))
45193 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45194 x, NULL, 1, OPTAB_DIRECT);
45195 op1 = gen_lowpart (mode, op1);
45196 op2 = gen_lowpart (mode, op2);
45197 }
45198
45199 if (mode == V16SImode)
45200 {
45201 if (uns_p)
45202 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45203 else
45204 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45205 }
45206 else if (mode == V8SImode)
45207 {
45208 if (uns_p)
45209 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45210 else
45211 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45212 }
45213 else if (uns_p)
45214 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45215 else if (TARGET_SSE4_1)
45216 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45217 else
45218 {
45219 rtx s1, s2, t0, t1, t2;
45220
45221 /* The easiest way to implement this without PMULDQ is to go through
45222 the motions as if we are performing a full 64-bit multiply. With
45223 the exception that we need to do less shuffling of the elements. */
45224
45225 /* Compute the sign-extension, aka highparts, of the two operands. */
45226 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45227 op1, pc_rtx, pc_rtx);
45228 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45229 op2, pc_rtx, pc_rtx);
45230
45231 /* Multiply LO(A) * HI(B), and vice-versa. */
45232 t1 = gen_reg_rtx (wmode);
45233 t2 = gen_reg_rtx (wmode);
45234 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45235 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45236
45237 /* Multiply LO(A) * LO(B). */
45238 t0 = gen_reg_rtx (wmode);
45239 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45240
45241 /* Combine and shift the highparts into place. */
45242 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45243 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45244 1, OPTAB_DIRECT);
45245
45246 /* Combine high and low parts. */
45247 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45248 return;
45249 }
45250 emit_insn (x);
45251 }
45252
45253 void
45254 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45255 bool uns_p, bool high_p)
45256 {
45257 enum machine_mode wmode = GET_MODE (dest);
45258 enum machine_mode mode = GET_MODE (op1);
45259 rtx t1, t2, t3, t4, mask;
45260
45261 switch (mode)
45262 {
45263 case V4SImode:
45264 t1 = gen_reg_rtx (mode);
45265 t2 = gen_reg_rtx (mode);
45266 if (TARGET_XOP && !uns_p)
45267 {
45268 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45269 shuffle the elements once so that all elements are in the right
45270 place for immediate use: { A C B D }. */
45271 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45272 const1_rtx, GEN_INT (3)));
45273 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45274 const1_rtx, GEN_INT (3)));
45275 }
45276 else
45277 {
45278 /* Put the elements into place for the multiply. */
45279 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45280 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45281 high_p = false;
45282 }
45283 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45284 break;
45285
45286 case V8SImode:
45287 /* Shuffle the elements between the lanes. After this we
45288 have { A B E F | C D G H } for each operand. */
45289 t1 = gen_reg_rtx (V4DImode);
45290 t2 = gen_reg_rtx (V4DImode);
45291 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45292 const0_rtx, const2_rtx,
45293 const1_rtx, GEN_INT (3)));
45294 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45295 const0_rtx, const2_rtx,
45296 const1_rtx, GEN_INT (3)));
45297
45298 /* Shuffle the elements within the lanes. After this we
45299 have { A A B B | C C D D } or { E E F F | G G H H }. */
45300 t3 = gen_reg_rtx (V8SImode);
45301 t4 = gen_reg_rtx (V8SImode);
45302 mask = GEN_INT (high_p
45303 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45304 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45305 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45306 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45307
45308 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45309 break;
45310
45311 case V8HImode:
45312 case V16HImode:
45313 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45314 uns_p, OPTAB_DIRECT);
45315 t2 = expand_binop (mode,
45316 uns_p ? umul_highpart_optab : smul_highpart_optab,
45317 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45318 gcc_assert (t1 && t2);
45319
45320 t3 = gen_reg_rtx (mode);
45321 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45322 emit_move_insn (dest, gen_lowpart (wmode, t3));
45323 break;
45324
45325 case V16QImode:
45326 case V32QImode:
45327 t1 = gen_reg_rtx (wmode);
45328 t2 = gen_reg_rtx (wmode);
45329 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45330 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45331
45332 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45333 break;
45334
45335 default:
45336 gcc_unreachable ();
45337 }
45338 }
45339
45340 void
45341 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45342 {
45343 rtx res_1, res_2, res_3, res_4;
45344
45345 res_1 = gen_reg_rtx (V4SImode);
45346 res_2 = gen_reg_rtx (V4SImode);
45347 res_3 = gen_reg_rtx (V2DImode);
45348 res_4 = gen_reg_rtx (V2DImode);
45349 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45350 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45351
45352 /* Move the results in element 2 down to element 1; we don't care
45353 what goes in elements 2 and 3. Then we can merge the parts
45354 back together with an interleave.
45355
45356 Note that two other sequences were tried:
45357 (1) Use interleaves at the start instead of psrldq, which allows
45358 us to use a single shufps to merge things back at the end.
45359 (2) Use shufps here to combine the two vectors, then pshufd to
45360 put the elements in the correct order.
45361 In both cases the cost of the reformatting stall was too high
45362 and the overall sequence slower. */
45363
45364 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45365 const0_rtx, const2_rtx,
45366 const0_rtx, const0_rtx));
45367 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45368 const0_rtx, const2_rtx,
45369 const0_rtx, const0_rtx));
45370 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45371
45372 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45373 }
45374
45375 void
45376 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45377 {
45378 enum machine_mode mode = GET_MODE (op0);
45379 rtx t1, t2, t3, t4, t5, t6;
45380
45381 if (TARGET_XOP && mode == V2DImode)
45382 {
45383 /* op1: A,B,C,D, op2: E,F,G,H */
45384 op1 = gen_lowpart (V4SImode, op1);
45385 op2 = gen_lowpart (V4SImode, op2);
45386
45387 t1 = gen_reg_rtx (V4SImode);
45388 t2 = gen_reg_rtx (V4SImode);
45389 t3 = gen_reg_rtx (V2DImode);
45390 t4 = gen_reg_rtx (V2DImode);
45391
45392 /* t1: B,A,D,C */
45393 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45394 GEN_INT (1),
45395 GEN_INT (0),
45396 GEN_INT (3),
45397 GEN_INT (2)));
45398
45399 /* t2: (B*E),(A*F),(D*G),(C*H) */
45400 emit_insn (gen_mulv4si3 (t2, t1, op2));
45401
45402 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45403 emit_insn (gen_xop_phadddq (t3, t2));
45404
45405 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45406 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45407
45408 /* Multiply lower parts and add all */
45409 t5 = gen_reg_rtx (V2DImode);
45410 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45411 gen_lowpart (V4SImode, op1),
45412 gen_lowpart (V4SImode, op2)));
45413 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45414
45415 }
45416 else
45417 {
45418 enum machine_mode nmode;
45419 rtx (*umul) (rtx, rtx, rtx);
45420
45421 if (mode == V2DImode)
45422 {
45423 umul = gen_vec_widen_umult_even_v4si;
45424 nmode = V4SImode;
45425 }
45426 else if (mode == V4DImode)
45427 {
45428 umul = gen_vec_widen_umult_even_v8si;
45429 nmode = V8SImode;
45430 }
45431 else if (mode == V8DImode)
45432 {
45433 umul = gen_vec_widen_umult_even_v16si;
45434 nmode = V16SImode;
45435 }
45436 else
45437 gcc_unreachable ();
45438
45439
45440 /* Multiply low parts. */
45441 t1 = gen_reg_rtx (mode);
45442 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45443
45444 /* Shift input vectors right 32 bits so we can multiply high parts. */
45445 t6 = GEN_INT (32);
45446 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45447 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45448
45449 /* Multiply high parts by low parts. */
45450 t4 = gen_reg_rtx (mode);
45451 t5 = gen_reg_rtx (mode);
45452 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45453 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45454
45455 /* Combine and shift the highparts back. */
45456 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45457 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45458
45459 /* Combine high and low parts. */
45460 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45461 }
45462
45463 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45464 gen_rtx_MULT (mode, op1, op2));
45465 }
45466
45467 /* Calculate integer abs() using only SSE2 instructions. */
45468
45469 void
45470 ix86_expand_sse2_abs (rtx target, rtx input)
45471 {
45472 enum machine_mode mode = GET_MODE (target);
45473 rtx tmp0, tmp1, x;
45474
45475 switch (mode)
45476 {
45477 /* For 32-bit signed integer X, the best way to calculate the absolute
45478 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45479 case V4SImode:
45480 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45481 GEN_INT (GET_MODE_BITSIZE
45482 (GET_MODE_INNER (mode)) - 1),
45483 NULL, 0, OPTAB_DIRECT);
45484 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45485 NULL, 0, OPTAB_DIRECT);
45486 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45487 target, 0, OPTAB_DIRECT);
45488 break;
45489
45490 /* For 16-bit signed integer X, the best way to calculate the absolute
45491 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45492 case V8HImode:
45493 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45494
45495 x = expand_simple_binop (mode, SMAX, tmp0, input,
45496 target, 0, OPTAB_DIRECT);
45497 break;
45498
45499 /* For 8-bit signed integer X, the best way to calculate the absolute
45500 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45501 as SSE2 provides the PMINUB insn. */
45502 case V16QImode:
45503 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45504
45505 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45506 target, 0, OPTAB_DIRECT);
45507 break;
45508
45509 default:
45510 gcc_unreachable ();
45511 }
45512
45513 if (x != target)
45514 emit_move_insn (target, x);
45515 }
45516
45517 /* Expand an insert into a vector register through pinsr insn.
45518 Return true if successful. */
45519
45520 bool
45521 ix86_expand_pinsr (rtx *operands)
45522 {
45523 rtx dst = operands[0];
45524 rtx src = operands[3];
45525
45526 unsigned int size = INTVAL (operands[1]);
45527 unsigned int pos = INTVAL (operands[2]);
45528
45529 if (GET_CODE (dst) == SUBREG)
45530 {
45531 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45532 dst = SUBREG_REG (dst);
45533 }
45534
45535 if (GET_CODE (src) == SUBREG)
45536 src = SUBREG_REG (src);
45537
45538 switch (GET_MODE (dst))
45539 {
45540 case V16QImode:
45541 case V8HImode:
45542 case V4SImode:
45543 case V2DImode:
45544 {
45545 enum machine_mode srcmode, dstmode;
45546 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45547
45548 srcmode = mode_for_size (size, MODE_INT, 0);
45549
45550 switch (srcmode)
45551 {
45552 case QImode:
45553 if (!TARGET_SSE4_1)
45554 return false;
45555 dstmode = V16QImode;
45556 pinsr = gen_sse4_1_pinsrb;
45557 break;
45558
45559 case HImode:
45560 if (!TARGET_SSE2)
45561 return false;
45562 dstmode = V8HImode;
45563 pinsr = gen_sse2_pinsrw;
45564 break;
45565
45566 case SImode:
45567 if (!TARGET_SSE4_1)
45568 return false;
45569 dstmode = V4SImode;
45570 pinsr = gen_sse4_1_pinsrd;
45571 break;
45572
45573 case DImode:
45574 gcc_assert (TARGET_64BIT);
45575 if (!TARGET_SSE4_1)
45576 return false;
45577 dstmode = V2DImode;
45578 pinsr = gen_sse4_1_pinsrq;
45579 break;
45580
45581 default:
45582 return false;
45583 }
45584
45585 rtx d = dst;
45586 if (GET_MODE (dst) != dstmode)
45587 d = gen_reg_rtx (dstmode);
45588 src = gen_lowpart (srcmode, src);
45589
45590 pos /= size;
45591
45592 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45593 GEN_INT (1 << pos)));
45594 if (d != dst)
45595 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45596 return true;
45597 }
45598
45599 default:
45600 return false;
45601 }
45602 }
45603 \f
45604 /* This function returns the calling abi specific va_list type node.
45605 It returns the FNDECL specific va_list type. */
45606
45607 static tree
45608 ix86_fn_abi_va_list (tree fndecl)
45609 {
45610 if (!TARGET_64BIT)
45611 return va_list_type_node;
45612 gcc_assert (fndecl != NULL_TREE);
45613
45614 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45615 return ms_va_list_type_node;
45616 else
45617 return sysv_va_list_type_node;
45618 }
45619
45620 /* Returns the canonical va_list type specified by TYPE. If there
45621 is no valid TYPE provided, it return NULL_TREE. */
45622
45623 static tree
45624 ix86_canonical_va_list_type (tree type)
45625 {
45626 tree wtype, htype;
45627
45628 /* Resolve references and pointers to va_list type. */
45629 if (TREE_CODE (type) == MEM_REF)
45630 type = TREE_TYPE (type);
45631 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45632 type = TREE_TYPE (type);
45633 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45634 type = TREE_TYPE (type);
45635
45636 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45637 {
45638 wtype = va_list_type_node;
45639 gcc_assert (wtype != NULL_TREE);
45640 htype = type;
45641 if (TREE_CODE (wtype) == ARRAY_TYPE)
45642 {
45643 /* If va_list is an array type, the argument may have decayed
45644 to a pointer type, e.g. by being passed to another function.
45645 In that case, unwrap both types so that we can compare the
45646 underlying records. */
45647 if (TREE_CODE (htype) == ARRAY_TYPE
45648 || POINTER_TYPE_P (htype))
45649 {
45650 wtype = TREE_TYPE (wtype);
45651 htype = TREE_TYPE (htype);
45652 }
45653 }
45654 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45655 return va_list_type_node;
45656 wtype = sysv_va_list_type_node;
45657 gcc_assert (wtype != NULL_TREE);
45658 htype = type;
45659 if (TREE_CODE (wtype) == ARRAY_TYPE)
45660 {
45661 /* If va_list is an array type, the argument may have decayed
45662 to a pointer type, e.g. by being passed to another function.
45663 In that case, unwrap both types so that we can compare the
45664 underlying records. */
45665 if (TREE_CODE (htype) == ARRAY_TYPE
45666 || POINTER_TYPE_P (htype))
45667 {
45668 wtype = TREE_TYPE (wtype);
45669 htype = TREE_TYPE (htype);
45670 }
45671 }
45672 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45673 return sysv_va_list_type_node;
45674 wtype = ms_va_list_type_node;
45675 gcc_assert (wtype != NULL_TREE);
45676 htype = type;
45677 if (TREE_CODE (wtype) == ARRAY_TYPE)
45678 {
45679 /* If va_list is an array type, the argument may have decayed
45680 to a pointer type, e.g. by being passed to another function.
45681 In that case, unwrap both types so that we can compare the
45682 underlying records. */
45683 if (TREE_CODE (htype) == ARRAY_TYPE
45684 || POINTER_TYPE_P (htype))
45685 {
45686 wtype = TREE_TYPE (wtype);
45687 htype = TREE_TYPE (htype);
45688 }
45689 }
45690 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45691 return ms_va_list_type_node;
45692 return NULL_TREE;
45693 }
45694 return std_canonical_va_list_type (type);
45695 }
45696
45697 /* Iterate through the target-specific builtin types for va_list.
45698 IDX denotes the iterator, *PTREE is set to the result type of
45699 the va_list builtin, and *PNAME to its internal type.
45700 Returns zero if there is no element for this index, otherwise
45701 IDX should be increased upon the next call.
45702 Note, do not iterate a base builtin's name like __builtin_va_list.
45703 Used from c_common_nodes_and_builtins. */
45704
45705 static int
45706 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45707 {
45708 if (TARGET_64BIT)
45709 {
45710 switch (idx)
45711 {
45712 default:
45713 break;
45714
45715 case 0:
45716 *ptree = ms_va_list_type_node;
45717 *pname = "__builtin_ms_va_list";
45718 return 1;
45719
45720 case 1:
45721 *ptree = sysv_va_list_type_node;
45722 *pname = "__builtin_sysv_va_list";
45723 return 1;
45724 }
45725 }
45726
45727 return 0;
45728 }
45729
45730 #undef TARGET_SCHED_DISPATCH
45731 #define TARGET_SCHED_DISPATCH has_dispatch
45732 #undef TARGET_SCHED_DISPATCH_DO
45733 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45734 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45735 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45736 #undef TARGET_SCHED_REORDER
45737 #define TARGET_SCHED_REORDER ix86_sched_reorder
45738 #undef TARGET_SCHED_ADJUST_PRIORITY
45739 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45740 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45741 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45742 ix86_dependencies_evaluation_hook
45743
45744 /* The size of the dispatch window is the total number of bytes of
45745 object code allowed in a window. */
45746 #define DISPATCH_WINDOW_SIZE 16
45747
45748 /* Number of dispatch windows considered for scheduling. */
45749 #define MAX_DISPATCH_WINDOWS 3
45750
45751 /* Maximum number of instructions in a window. */
45752 #define MAX_INSN 4
45753
45754 /* Maximum number of immediate operands in a window. */
45755 #define MAX_IMM 4
45756
45757 /* Maximum number of immediate bits allowed in a window. */
45758 #define MAX_IMM_SIZE 128
45759
45760 /* Maximum number of 32 bit immediates allowed in a window. */
45761 #define MAX_IMM_32 4
45762
45763 /* Maximum number of 64 bit immediates allowed in a window. */
45764 #define MAX_IMM_64 2
45765
45766 /* Maximum total of loads or prefetches allowed in a window. */
45767 #define MAX_LOAD 2
45768
45769 /* Maximum total of stores allowed in a window. */
45770 #define MAX_STORE 1
45771
45772 #undef BIG
45773 #define BIG 100
45774
45775
45776 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45777 enum dispatch_group {
45778 disp_no_group = 0,
45779 disp_load,
45780 disp_store,
45781 disp_load_store,
45782 disp_prefetch,
45783 disp_imm,
45784 disp_imm_32,
45785 disp_imm_64,
45786 disp_branch,
45787 disp_cmp,
45788 disp_jcc,
45789 disp_last
45790 };
45791
45792 /* Number of allowable groups in a dispatch window. It is an array
45793 indexed by dispatch_group enum. 100 is used as a big number,
45794 because the number of these kind of operations does not have any
45795 effect in dispatch window, but we need them for other reasons in
45796 the table. */
45797 static unsigned int num_allowable_groups[disp_last] = {
45798 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45799 };
45800
45801 char group_name[disp_last + 1][16] = {
45802 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45803 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45804 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45805 };
45806
45807 /* Instruction path. */
45808 enum insn_path {
45809 no_path = 0,
45810 path_single, /* Single micro op. */
45811 path_double, /* Double micro op. */
45812 path_multi, /* Instructions with more than 2 micro op.. */
45813 last_path
45814 };
45815
45816 /* sched_insn_info defines a window to the instructions scheduled in
45817 the basic block. It contains a pointer to the insn_info table and
45818 the instruction scheduled.
45819
45820 Windows are allocated for each basic block and are linked
45821 together. */
45822 typedef struct sched_insn_info_s {
45823 rtx insn;
45824 enum dispatch_group group;
45825 enum insn_path path;
45826 int byte_len;
45827 int imm_bytes;
45828 } sched_insn_info;
45829
45830 /* Linked list of dispatch windows. This is a two way list of
45831 dispatch windows of a basic block. It contains information about
45832 the number of uops in the window and the total number of
45833 instructions and of bytes in the object code for this dispatch
45834 window. */
45835 typedef struct dispatch_windows_s {
45836 int num_insn; /* Number of insn in the window. */
45837 int num_uops; /* Number of uops in the window. */
45838 int window_size; /* Number of bytes in the window. */
45839 int window_num; /* Window number between 0 or 1. */
45840 int num_imm; /* Number of immediates in an insn. */
45841 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45842 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45843 int imm_size; /* Total immediates in the window. */
45844 int num_loads; /* Total memory loads in the window. */
45845 int num_stores; /* Total memory stores in the window. */
45846 int violation; /* Violation exists in window. */
45847 sched_insn_info *window; /* Pointer to the window. */
45848 struct dispatch_windows_s *next;
45849 struct dispatch_windows_s *prev;
45850 } dispatch_windows;
45851
45852 /* Immediate valuse used in an insn. */
45853 typedef struct imm_info_s
45854 {
45855 int imm;
45856 int imm32;
45857 int imm64;
45858 } imm_info;
45859
45860 static dispatch_windows *dispatch_window_list;
45861 static dispatch_windows *dispatch_window_list1;
45862
45863 /* Get dispatch group of insn. */
45864
45865 static enum dispatch_group
45866 get_mem_group (rtx_insn *insn)
45867 {
45868 enum attr_memory memory;
45869
45870 if (INSN_CODE (insn) < 0)
45871 return disp_no_group;
45872 memory = get_attr_memory (insn);
45873 if (memory == MEMORY_STORE)
45874 return disp_store;
45875
45876 if (memory == MEMORY_LOAD)
45877 return disp_load;
45878
45879 if (memory == MEMORY_BOTH)
45880 return disp_load_store;
45881
45882 return disp_no_group;
45883 }
45884
45885 /* Return true if insn is a compare instruction. */
45886
45887 static bool
45888 is_cmp (rtx_insn *insn)
45889 {
45890 enum attr_type type;
45891
45892 type = get_attr_type (insn);
45893 return (type == TYPE_TEST
45894 || type == TYPE_ICMP
45895 || type == TYPE_FCMP
45896 || GET_CODE (PATTERN (insn)) == COMPARE);
45897 }
45898
45899 /* Return true if a dispatch violation encountered. */
45900
45901 static bool
45902 dispatch_violation (void)
45903 {
45904 if (dispatch_window_list->next)
45905 return dispatch_window_list->next->violation;
45906 return dispatch_window_list->violation;
45907 }
45908
45909 /* Return true if insn is a branch instruction. */
45910
45911 static bool
45912 is_branch (rtx insn)
45913 {
45914 return (CALL_P (insn) || JUMP_P (insn));
45915 }
45916
45917 /* Return true if insn is a prefetch instruction. */
45918
45919 static bool
45920 is_prefetch (rtx insn)
45921 {
45922 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45923 }
45924
45925 /* This function initializes a dispatch window and the list container holding a
45926 pointer to the window. */
45927
45928 static void
45929 init_window (int window_num)
45930 {
45931 int i;
45932 dispatch_windows *new_list;
45933
45934 if (window_num == 0)
45935 new_list = dispatch_window_list;
45936 else
45937 new_list = dispatch_window_list1;
45938
45939 new_list->num_insn = 0;
45940 new_list->num_uops = 0;
45941 new_list->window_size = 0;
45942 new_list->next = NULL;
45943 new_list->prev = NULL;
45944 new_list->window_num = window_num;
45945 new_list->num_imm = 0;
45946 new_list->num_imm_32 = 0;
45947 new_list->num_imm_64 = 0;
45948 new_list->imm_size = 0;
45949 new_list->num_loads = 0;
45950 new_list->num_stores = 0;
45951 new_list->violation = false;
45952
45953 for (i = 0; i < MAX_INSN; i++)
45954 {
45955 new_list->window[i].insn = NULL;
45956 new_list->window[i].group = disp_no_group;
45957 new_list->window[i].path = no_path;
45958 new_list->window[i].byte_len = 0;
45959 new_list->window[i].imm_bytes = 0;
45960 }
45961 return;
45962 }
45963
45964 /* This function allocates and initializes a dispatch window and the
45965 list container holding a pointer to the window. */
45966
45967 static dispatch_windows *
45968 allocate_window (void)
45969 {
45970 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45971 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45972
45973 return new_list;
45974 }
45975
45976 /* This routine initializes the dispatch scheduling information. It
45977 initiates building dispatch scheduler tables and constructs the
45978 first dispatch window. */
45979
45980 static void
45981 init_dispatch_sched (void)
45982 {
45983 /* Allocate a dispatch list and a window. */
45984 dispatch_window_list = allocate_window ();
45985 dispatch_window_list1 = allocate_window ();
45986 init_window (0);
45987 init_window (1);
45988 }
45989
45990 /* This function returns true if a branch is detected. End of a basic block
45991 does not have to be a branch, but here we assume only branches end a
45992 window. */
45993
45994 static bool
45995 is_end_basic_block (enum dispatch_group group)
45996 {
45997 return group == disp_branch;
45998 }
45999
46000 /* This function is called when the end of a window processing is reached. */
46001
46002 static void
46003 process_end_window (void)
46004 {
46005 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
46006 if (dispatch_window_list->next)
46007 {
46008 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
46009 gcc_assert (dispatch_window_list->window_size
46010 + dispatch_window_list1->window_size <= 48);
46011 init_window (1);
46012 }
46013 init_window (0);
46014 }
46015
46016 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46017 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46018 for 48 bytes of instructions. Note that these windows are not dispatch
46019 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46020
46021 static dispatch_windows *
46022 allocate_next_window (int window_num)
46023 {
46024 if (window_num == 0)
46025 {
46026 if (dispatch_window_list->next)
46027 init_window (1);
46028 init_window (0);
46029 return dispatch_window_list;
46030 }
46031
46032 dispatch_window_list->next = dispatch_window_list1;
46033 dispatch_window_list1->prev = dispatch_window_list;
46034
46035 return dispatch_window_list1;
46036 }
46037
46038 /* Increment the number of immediate operands of an instruction. */
46039
46040 static int
46041 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46042 {
46043 if (*in_rtx == 0)
46044 return 0;
46045
46046 switch ( GET_CODE (*in_rtx))
46047 {
46048 case CONST:
46049 case SYMBOL_REF:
46050 case CONST_INT:
46051 (imm_values->imm)++;
46052 if (x86_64_immediate_operand (*in_rtx, SImode))
46053 (imm_values->imm32)++;
46054 else
46055 (imm_values->imm64)++;
46056 break;
46057
46058 case CONST_DOUBLE:
46059 (imm_values->imm)++;
46060 (imm_values->imm64)++;
46061 break;
46062
46063 case CODE_LABEL:
46064 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46065 {
46066 (imm_values->imm)++;
46067 (imm_values->imm32)++;
46068 }
46069 break;
46070
46071 default:
46072 break;
46073 }
46074
46075 return 0;
46076 }
46077
46078 /* Compute number of immediate operands of an instruction. */
46079
46080 static void
46081 find_constant (rtx in_rtx, imm_info *imm_values)
46082 {
46083 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46084 (rtx_function) find_constant_1, (void *) imm_values);
46085 }
46086
46087 /* Return total size of immediate operands of an instruction along with number
46088 of corresponding immediate-operands. It initializes its parameters to zero
46089 befor calling FIND_CONSTANT.
46090 INSN is the input instruction. IMM is the total of immediates.
46091 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46092 bit immediates. */
46093
46094 static int
46095 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46096 {
46097 imm_info imm_values = {0, 0, 0};
46098
46099 find_constant (insn, &imm_values);
46100 *imm = imm_values.imm;
46101 *imm32 = imm_values.imm32;
46102 *imm64 = imm_values.imm64;
46103 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46104 }
46105
46106 /* This function indicates if an operand of an instruction is an
46107 immediate. */
46108
46109 static bool
46110 has_immediate (rtx insn)
46111 {
46112 int num_imm_operand;
46113 int num_imm32_operand;
46114 int num_imm64_operand;
46115
46116 if (insn)
46117 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46118 &num_imm64_operand);
46119 return false;
46120 }
46121
46122 /* Return single or double path for instructions. */
46123
46124 static enum insn_path
46125 get_insn_path (rtx_insn *insn)
46126 {
46127 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46128
46129 if ((int)path == 0)
46130 return path_single;
46131
46132 if ((int)path == 1)
46133 return path_double;
46134
46135 return path_multi;
46136 }
46137
46138 /* Return insn dispatch group. */
46139
46140 static enum dispatch_group
46141 get_insn_group (rtx_insn *insn)
46142 {
46143 enum dispatch_group group = get_mem_group (insn);
46144 if (group)
46145 return group;
46146
46147 if (is_branch (insn))
46148 return disp_branch;
46149
46150 if (is_cmp (insn))
46151 return disp_cmp;
46152
46153 if (has_immediate (insn))
46154 return disp_imm;
46155
46156 if (is_prefetch (insn))
46157 return disp_prefetch;
46158
46159 return disp_no_group;
46160 }
46161
46162 /* Count number of GROUP restricted instructions in a dispatch
46163 window WINDOW_LIST. */
46164
46165 static int
46166 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
46167 {
46168 enum dispatch_group group = get_insn_group (insn);
46169 int imm_size;
46170 int num_imm_operand;
46171 int num_imm32_operand;
46172 int num_imm64_operand;
46173
46174 if (group == disp_no_group)
46175 return 0;
46176
46177 if (group == disp_imm)
46178 {
46179 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46180 &num_imm64_operand);
46181 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46182 || num_imm_operand + window_list->num_imm > MAX_IMM
46183 || (num_imm32_operand > 0
46184 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46185 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46186 || (num_imm64_operand > 0
46187 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46188 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46189 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46190 && num_imm64_operand > 0
46191 && ((window_list->num_imm_64 > 0
46192 && window_list->num_insn >= 2)
46193 || window_list->num_insn >= 3)))
46194 return BIG;
46195
46196 return 1;
46197 }
46198
46199 if ((group == disp_load_store
46200 && (window_list->num_loads >= MAX_LOAD
46201 || window_list->num_stores >= MAX_STORE))
46202 || ((group == disp_load
46203 || group == disp_prefetch)
46204 && window_list->num_loads >= MAX_LOAD)
46205 || (group == disp_store
46206 && window_list->num_stores >= MAX_STORE))
46207 return BIG;
46208
46209 return 1;
46210 }
46211
46212 /* This function returns true if insn satisfies dispatch rules on the
46213 last window scheduled. */
46214
46215 static bool
46216 fits_dispatch_window (rtx_insn *insn)
46217 {
46218 dispatch_windows *window_list = dispatch_window_list;
46219 dispatch_windows *window_list_next = dispatch_window_list->next;
46220 unsigned int num_restrict;
46221 enum dispatch_group group = get_insn_group (insn);
46222 enum insn_path path = get_insn_path (insn);
46223 int sum;
46224
46225 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46226 instructions should be given the lowest priority in the
46227 scheduling process in Haifa scheduler to make sure they will be
46228 scheduled in the same dispatch window as the reference to them. */
46229 if (group == disp_jcc || group == disp_cmp)
46230 return false;
46231
46232 /* Check nonrestricted. */
46233 if (group == disp_no_group || group == disp_branch)
46234 return true;
46235
46236 /* Get last dispatch window. */
46237 if (window_list_next)
46238 window_list = window_list_next;
46239
46240 if (window_list->window_num == 1)
46241 {
46242 sum = window_list->prev->window_size + window_list->window_size;
46243
46244 if (sum == 32
46245 || (min_insn_size (insn) + sum) >= 48)
46246 /* Window 1 is full. Go for next window. */
46247 return true;
46248 }
46249
46250 num_restrict = count_num_restricted (insn, window_list);
46251
46252 if (num_restrict > num_allowable_groups[group])
46253 return false;
46254
46255 /* See if it fits in the first window. */
46256 if (window_list->window_num == 0)
46257 {
46258 /* The first widow should have only single and double path
46259 uops. */
46260 if (path == path_double
46261 && (window_list->num_uops + 2) > MAX_INSN)
46262 return false;
46263 else if (path != path_single)
46264 return false;
46265 }
46266 return true;
46267 }
46268
46269 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46270 dispatch window WINDOW_LIST. */
46271
46272 static void
46273 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
46274 {
46275 int byte_len = min_insn_size (insn);
46276 int num_insn = window_list->num_insn;
46277 int imm_size;
46278 sched_insn_info *window = window_list->window;
46279 enum dispatch_group group = get_insn_group (insn);
46280 enum insn_path path = get_insn_path (insn);
46281 int num_imm_operand;
46282 int num_imm32_operand;
46283 int num_imm64_operand;
46284
46285 if (!window_list->violation && group != disp_cmp
46286 && !fits_dispatch_window (insn))
46287 window_list->violation = true;
46288
46289 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46290 &num_imm64_operand);
46291
46292 /* Initialize window with new instruction. */
46293 window[num_insn].insn = insn;
46294 window[num_insn].byte_len = byte_len;
46295 window[num_insn].group = group;
46296 window[num_insn].path = path;
46297 window[num_insn].imm_bytes = imm_size;
46298
46299 window_list->window_size += byte_len;
46300 window_list->num_insn = num_insn + 1;
46301 window_list->num_uops = window_list->num_uops + num_uops;
46302 window_list->imm_size += imm_size;
46303 window_list->num_imm += num_imm_operand;
46304 window_list->num_imm_32 += num_imm32_operand;
46305 window_list->num_imm_64 += num_imm64_operand;
46306
46307 if (group == disp_store)
46308 window_list->num_stores += 1;
46309 else if (group == disp_load
46310 || group == disp_prefetch)
46311 window_list->num_loads += 1;
46312 else if (group == disp_load_store)
46313 {
46314 window_list->num_stores += 1;
46315 window_list->num_loads += 1;
46316 }
46317 }
46318
46319 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46320 If the total bytes of instructions or the number of instructions in
46321 the window exceed allowable, it allocates a new window. */
46322
46323 static void
46324 add_to_dispatch_window (rtx_insn *insn)
46325 {
46326 int byte_len;
46327 dispatch_windows *window_list;
46328 dispatch_windows *next_list;
46329 dispatch_windows *window0_list;
46330 enum insn_path path;
46331 enum dispatch_group insn_group;
46332 bool insn_fits;
46333 int num_insn;
46334 int num_uops;
46335 int window_num;
46336 int insn_num_uops;
46337 int sum;
46338
46339 if (INSN_CODE (insn) < 0)
46340 return;
46341
46342 byte_len = min_insn_size (insn);
46343 window_list = dispatch_window_list;
46344 next_list = window_list->next;
46345 path = get_insn_path (insn);
46346 insn_group = get_insn_group (insn);
46347
46348 /* Get the last dispatch window. */
46349 if (next_list)
46350 window_list = dispatch_window_list->next;
46351
46352 if (path == path_single)
46353 insn_num_uops = 1;
46354 else if (path == path_double)
46355 insn_num_uops = 2;
46356 else
46357 insn_num_uops = (int) path;
46358
46359 /* If current window is full, get a new window.
46360 Window number zero is full, if MAX_INSN uops are scheduled in it.
46361 Window number one is full, if window zero's bytes plus window
46362 one's bytes is 32, or if the bytes of the new instruction added
46363 to the total makes it greater than 48, or it has already MAX_INSN
46364 instructions in it. */
46365 num_insn = window_list->num_insn;
46366 num_uops = window_list->num_uops;
46367 window_num = window_list->window_num;
46368 insn_fits = fits_dispatch_window (insn);
46369
46370 if (num_insn >= MAX_INSN
46371 || num_uops + insn_num_uops > MAX_INSN
46372 || !(insn_fits))
46373 {
46374 window_num = ~window_num & 1;
46375 window_list = allocate_next_window (window_num);
46376 }
46377
46378 if (window_num == 0)
46379 {
46380 add_insn_window (insn, window_list, insn_num_uops);
46381 if (window_list->num_insn >= MAX_INSN
46382 && insn_group == disp_branch)
46383 {
46384 process_end_window ();
46385 return;
46386 }
46387 }
46388 else if (window_num == 1)
46389 {
46390 window0_list = window_list->prev;
46391 sum = window0_list->window_size + window_list->window_size;
46392 if (sum == 32
46393 || (byte_len + sum) >= 48)
46394 {
46395 process_end_window ();
46396 window_list = dispatch_window_list;
46397 }
46398
46399 add_insn_window (insn, window_list, insn_num_uops);
46400 }
46401 else
46402 gcc_unreachable ();
46403
46404 if (is_end_basic_block (insn_group))
46405 {
46406 /* End of basic block is reached do end-basic-block process. */
46407 process_end_window ();
46408 return;
46409 }
46410 }
46411
46412 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46413
46414 DEBUG_FUNCTION static void
46415 debug_dispatch_window_file (FILE *file, int window_num)
46416 {
46417 dispatch_windows *list;
46418 int i;
46419
46420 if (window_num == 0)
46421 list = dispatch_window_list;
46422 else
46423 list = dispatch_window_list1;
46424
46425 fprintf (file, "Window #%d:\n", list->window_num);
46426 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46427 list->num_insn, list->num_uops, list->window_size);
46428 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46429 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46430
46431 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46432 list->num_stores);
46433 fprintf (file, " insn info:\n");
46434
46435 for (i = 0; i < MAX_INSN; i++)
46436 {
46437 if (!list->window[i].insn)
46438 break;
46439 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46440 i, group_name[list->window[i].group],
46441 i, (void *)list->window[i].insn,
46442 i, list->window[i].path,
46443 i, list->window[i].byte_len,
46444 i, list->window[i].imm_bytes);
46445 }
46446 }
46447
46448 /* Print to stdout a dispatch window. */
46449
46450 DEBUG_FUNCTION void
46451 debug_dispatch_window (int window_num)
46452 {
46453 debug_dispatch_window_file (stdout, window_num);
46454 }
46455
46456 /* Print INSN dispatch information to FILE. */
46457
46458 DEBUG_FUNCTION static void
46459 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
46460 {
46461 int byte_len;
46462 enum insn_path path;
46463 enum dispatch_group group;
46464 int imm_size;
46465 int num_imm_operand;
46466 int num_imm32_operand;
46467 int num_imm64_operand;
46468
46469 if (INSN_CODE (insn) < 0)
46470 return;
46471
46472 byte_len = min_insn_size (insn);
46473 path = get_insn_path (insn);
46474 group = get_insn_group (insn);
46475 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46476 &num_imm64_operand);
46477
46478 fprintf (file, " insn info:\n");
46479 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46480 group_name[group], path, byte_len);
46481 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46482 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46483 }
46484
46485 /* Print to STDERR the status of the ready list with respect to
46486 dispatch windows. */
46487
46488 DEBUG_FUNCTION void
46489 debug_ready_dispatch (void)
46490 {
46491 int i;
46492 int no_ready = number_in_ready ();
46493
46494 fprintf (stdout, "Number of ready: %d\n", no_ready);
46495
46496 for (i = 0; i < no_ready; i++)
46497 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46498 }
46499
46500 /* This routine is the driver of the dispatch scheduler. */
46501
46502 static void
46503 do_dispatch (rtx_insn *insn, int mode)
46504 {
46505 if (mode == DISPATCH_INIT)
46506 init_dispatch_sched ();
46507 else if (mode == ADD_TO_DISPATCH_WINDOW)
46508 add_to_dispatch_window (insn);
46509 }
46510
46511 /* Return TRUE if Dispatch Scheduling is supported. */
46512
46513 static bool
46514 has_dispatch (rtx_insn *insn, int action)
46515 {
46516 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46517 && flag_dispatch_scheduler)
46518 switch (action)
46519 {
46520 default:
46521 return false;
46522
46523 case IS_DISPATCH_ON:
46524 return true;
46525 break;
46526
46527 case IS_CMP:
46528 return is_cmp (insn);
46529
46530 case DISPATCH_VIOLATION:
46531 return dispatch_violation ();
46532
46533 case FITS_DISPATCH_WINDOW:
46534 return fits_dispatch_window (insn);
46535 }
46536
46537 return false;
46538 }
46539
46540 /* Implementation of reassociation_width target hook used by
46541 reassoc phase to identify parallelism level in reassociated
46542 tree. Statements tree_code is passed in OPC. Arguments type
46543 is passed in MODE.
46544
46545 Currently parallel reassociation is enabled for Atom
46546 processors only and we set reassociation width to be 2
46547 because Atom may issue up to 2 instructions per cycle.
46548
46549 Return value should be fixed if parallel reassociation is
46550 enabled for other processors. */
46551
46552 static int
46553 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46554 {
46555 int res = 1;
46556
46557 /* Vector part. */
46558 if (VECTOR_MODE_P (mode))
46559 {
46560 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46561 return 2;
46562 else
46563 return 1;
46564 }
46565
46566 /* Scalar part. */
46567 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46568 res = 2;
46569 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46570 res = 2;
46571
46572 return res;
46573 }
46574
46575 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46576 place emms and femms instructions. */
46577
46578 static enum machine_mode
46579 ix86_preferred_simd_mode (enum machine_mode mode)
46580 {
46581 if (!TARGET_SSE)
46582 return word_mode;
46583
46584 switch (mode)
46585 {
46586 case QImode:
46587 return TARGET_AVX512BW ? V64QImode :
46588 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46589 case HImode:
46590 return TARGET_AVX512BW ? V32HImode :
46591 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46592 case SImode:
46593 return TARGET_AVX512F ? V16SImode :
46594 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46595 case DImode:
46596 return TARGET_AVX512F ? V8DImode :
46597 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46598
46599 case SFmode:
46600 if (TARGET_AVX512F)
46601 return V16SFmode;
46602 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46603 return V8SFmode;
46604 else
46605 return V4SFmode;
46606
46607 case DFmode:
46608 if (!TARGET_VECTORIZE_DOUBLE)
46609 return word_mode;
46610 else if (TARGET_AVX512F)
46611 return V8DFmode;
46612 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46613 return V4DFmode;
46614 else if (TARGET_SSE2)
46615 return V2DFmode;
46616 /* FALLTHRU */
46617
46618 default:
46619 return word_mode;
46620 }
46621 }
46622
46623 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46624 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46625 256bit and 128bit vectors. */
46626
46627 static unsigned int
46628 ix86_autovectorize_vector_sizes (void)
46629 {
46630 return TARGET_AVX512F ? 64 | 32 | 16 :
46631 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46632 }
46633
46634 \f
46635
46636 /* Return class of registers which could be used for pseudo of MODE
46637 and of class RCLASS for spilling instead of memory. Return NO_REGS
46638 if it is not possible or non-profitable. */
46639 static reg_class_t
46640 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46641 {
46642 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46643 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46644 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46645 return ALL_SSE_REGS;
46646 return NO_REGS;
46647 }
46648
46649 /* Implement targetm.vectorize.init_cost. */
46650
46651 static void *
46652 ix86_init_cost (struct loop *)
46653 {
46654 unsigned *cost = XNEWVEC (unsigned, 3);
46655 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46656 return cost;
46657 }
46658
46659 /* Implement targetm.vectorize.add_stmt_cost. */
46660
46661 static unsigned
46662 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46663 struct _stmt_vec_info *stmt_info, int misalign,
46664 enum vect_cost_model_location where)
46665 {
46666 unsigned *cost = (unsigned *) data;
46667 unsigned retval = 0;
46668
46669 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46670 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46671
46672 /* Statements in an inner loop relative to the loop being
46673 vectorized are weighted more heavily. The value here is
46674 arbitrary and could potentially be improved with analysis. */
46675 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46676 count *= 50; /* FIXME. */
46677
46678 retval = (unsigned) (count * stmt_cost);
46679
46680 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46681 for Silvermont as it has out of order integer pipeline and can execute
46682 2 scalar instruction per tick, but has in order SIMD pipeline. */
46683 if (TARGET_SILVERMONT || TARGET_INTEL)
46684 if (stmt_info && stmt_info->stmt)
46685 {
46686 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46687 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46688 retval = (retval * 17) / 10;
46689 }
46690
46691 cost[where] += retval;
46692
46693 return retval;
46694 }
46695
46696 /* Implement targetm.vectorize.finish_cost. */
46697
46698 static void
46699 ix86_finish_cost (void *data, unsigned *prologue_cost,
46700 unsigned *body_cost, unsigned *epilogue_cost)
46701 {
46702 unsigned *cost = (unsigned *) data;
46703 *prologue_cost = cost[vect_prologue];
46704 *body_cost = cost[vect_body];
46705 *epilogue_cost = cost[vect_epilogue];
46706 }
46707
46708 /* Implement targetm.vectorize.destroy_cost_data. */
46709
46710 static void
46711 ix86_destroy_cost_data (void *data)
46712 {
46713 free (data);
46714 }
46715
46716 /* Validate target specific memory model bits in VAL. */
46717
46718 static unsigned HOST_WIDE_INT
46719 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46720 {
46721 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46722 bool strong;
46723
46724 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46725 |MEMMODEL_MASK)
46726 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46727 {
46728 warning (OPT_Winvalid_memory_model,
46729 "Unknown architecture specific memory model");
46730 return MEMMODEL_SEQ_CST;
46731 }
46732 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46733 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46734 {
46735 warning (OPT_Winvalid_memory_model,
46736 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46737 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46738 }
46739 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46740 {
46741 warning (OPT_Winvalid_memory_model,
46742 "HLE_RELEASE not used with RELEASE or stronger memory model");
46743 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46744 }
46745 return val;
46746 }
46747
46748 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46749 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46750 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46751 or number of vecsize_mangle variants that should be emitted. */
46752
46753 static int
46754 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46755 struct cgraph_simd_clone *clonei,
46756 tree base_type, int num)
46757 {
46758 int ret = 1;
46759
46760 if (clonei->simdlen
46761 && (clonei->simdlen < 2
46762 || clonei->simdlen > 16
46763 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46764 {
46765 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46766 "unsupported simdlen %d", clonei->simdlen);
46767 return 0;
46768 }
46769
46770 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46771 if (TREE_CODE (ret_type) != VOID_TYPE)
46772 switch (TYPE_MODE (ret_type))
46773 {
46774 case QImode:
46775 case HImode:
46776 case SImode:
46777 case DImode:
46778 case SFmode:
46779 case DFmode:
46780 /* case SCmode: */
46781 /* case DCmode: */
46782 break;
46783 default:
46784 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46785 "unsupported return type %qT for simd\n", ret_type);
46786 return 0;
46787 }
46788
46789 tree t;
46790 int i;
46791
46792 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46793 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46794 switch (TYPE_MODE (TREE_TYPE (t)))
46795 {
46796 case QImode:
46797 case HImode:
46798 case SImode:
46799 case DImode:
46800 case SFmode:
46801 case DFmode:
46802 /* case SCmode: */
46803 /* case DCmode: */
46804 break;
46805 default:
46806 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46807 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46808 return 0;
46809 }
46810
46811 if (clonei->cilk_elemental)
46812 {
46813 /* Parse here processor clause. If not present, default to 'b'. */
46814 clonei->vecsize_mangle = 'b';
46815 }
46816 else if (!TREE_PUBLIC (node->decl))
46817 {
46818 /* If the function isn't exported, we can pick up just one ISA
46819 for the clones. */
46820 if (TARGET_AVX2)
46821 clonei->vecsize_mangle = 'd';
46822 else if (TARGET_AVX)
46823 clonei->vecsize_mangle = 'c';
46824 else
46825 clonei->vecsize_mangle = 'b';
46826 ret = 1;
46827 }
46828 else
46829 {
46830 clonei->vecsize_mangle = "bcd"[num];
46831 ret = 3;
46832 }
46833 switch (clonei->vecsize_mangle)
46834 {
46835 case 'b':
46836 clonei->vecsize_int = 128;
46837 clonei->vecsize_float = 128;
46838 break;
46839 case 'c':
46840 clonei->vecsize_int = 128;
46841 clonei->vecsize_float = 256;
46842 break;
46843 case 'd':
46844 clonei->vecsize_int = 256;
46845 clonei->vecsize_float = 256;
46846 break;
46847 }
46848 if (clonei->simdlen == 0)
46849 {
46850 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46851 clonei->simdlen = clonei->vecsize_int;
46852 else
46853 clonei->simdlen = clonei->vecsize_float;
46854 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46855 if (clonei->simdlen > 16)
46856 clonei->simdlen = 16;
46857 }
46858 return ret;
46859 }
46860
46861 /* Add target attribute to SIMD clone NODE if needed. */
46862
46863 static void
46864 ix86_simd_clone_adjust (struct cgraph_node *node)
46865 {
46866 const char *str = NULL;
46867 gcc_assert (node->decl == cfun->decl);
46868 switch (node->simdclone->vecsize_mangle)
46869 {
46870 case 'b':
46871 if (!TARGET_SSE2)
46872 str = "sse2";
46873 break;
46874 case 'c':
46875 if (!TARGET_AVX)
46876 str = "avx";
46877 break;
46878 case 'd':
46879 if (!TARGET_AVX2)
46880 str = "avx2";
46881 break;
46882 default:
46883 gcc_unreachable ();
46884 }
46885 if (str == NULL)
46886 return;
46887 push_cfun (NULL);
46888 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46889 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46890 gcc_assert (ok);
46891 pop_cfun ();
46892 ix86_previous_fndecl = NULL_TREE;
46893 ix86_set_current_function (node->decl);
46894 }
46895
46896 /* If SIMD clone NODE can't be used in a vectorized loop
46897 in current function, return -1, otherwise return a badness of using it
46898 (0 if it is most desirable from vecsize_mangle point of view, 1
46899 slightly less desirable, etc.). */
46900
46901 static int
46902 ix86_simd_clone_usable (struct cgraph_node *node)
46903 {
46904 switch (node->simdclone->vecsize_mangle)
46905 {
46906 case 'b':
46907 if (!TARGET_SSE2)
46908 return -1;
46909 if (!TARGET_AVX)
46910 return 0;
46911 return TARGET_AVX2 ? 2 : 1;
46912 case 'c':
46913 if (!TARGET_AVX)
46914 return -1;
46915 return TARGET_AVX2 ? 1 : 0;
46916 break;
46917 case 'd':
46918 if (!TARGET_AVX2)
46919 return -1;
46920 return 0;
46921 default:
46922 gcc_unreachable ();
46923 }
46924 }
46925
46926 /* This function gives out the number of memory references.
46927 This value determines the unrolling factor for
46928 bdver3 and bdver4 architectures. */
46929
46930 static int
46931 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46932 {
46933 if (*x != NULL_RTX && MEM_P (*x))
46934 {
46935 enum machine_mode mode;
46936 unsigned int n_words;
46937
46938 mode = GET_MODE (*x);
46939 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46940
46941 if (n_words > 4)
46942 (*mem_count)+=2;
46943 else
46944 (*mem_count)+=1;
46945 }
46946 return 0;
46947 }
46948
46949 /* This function adjusts the unroll factor based on
46950 the hardware capabilities. For ex, bdver3 has
46951 a loop buffer which makes unrolling of smaller
46952 loops less important. This function decides the
46953 unroll factor using number of memory references
46954 (value 32 is used) as a heuristic. */
46955
46956 static unsigned
46957 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46958 {
46959 basic_block *bbs;
46960 rtx_insn *insn;
46961 unsigned i;
46962 unsigned mem_count = 0;
46963
46964 if (!TARGET_ADJUST_UNROLL)
46965 return nunroll;
46966
46967 /* Count the number of memory references within the loop body. */
46968 bbs = get_loop_body (loop);
46969 for (i = 0; i < loop->num_nodes; i++)
46970 {
46971 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46972 if (NONDEBUG_INSN_P (insn))
46973 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
46974 &mem_count);
46975 }
46976 free (bbs);
46977
46978 if (mem_count && mem_count <=32)
46979 return 32/mem_count;
46980
46981 return nunroll;
46982 }
46983
46984
46985 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46986
46987 static bool
46988 ix86_float_exceptions_rounding_supported_p (void)
46989 {
46990 /* For x87 floating point with standard excess precision handling,
46991 there is no adddf3 pattern (since x87 floating point only has
46992 XFmode operations) so the default hook implementation gets this
46993 wrong. */
46994 return TARGET_80387 || TARGET_SSE_MATH;
46995 }
46996
46997 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46998
46999 static void
47000 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
47001 {
47002 if (!TARGET_80387 && !TARGET_SSE_MATH)
47003 return;
47004 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
47005 if (TARGET_80387)
47006 {
47007 tree fenv_index_type = build_index_type (size_int (6));
47008 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
47009 tree fenv_var = create_tmp_var (fenv_type, NULL);
47010 mark_addressable (fenv_var);
47011 tree fenv_ptr = build_pointer_type (fenv_type);
47012 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
47013 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
47014 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
47015 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
47016 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
47017 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
47018 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
47019 tree hold_fnclex = build_call_expr (fnclex, 0);
47020 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
47021 hold_fnclex);
47022 *clear = build_call_expr (fnclex, 0);
47023 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
47024 tree fnstsw_call = build_call_expr (fnstsw, 0);
47025 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
47026 sw_var, fnstsw_call);
47027 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
47028 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
47029 exceptions_var, exceptions_x87);
47030 *update = build2 (COMPOUND_EXPR, integer_type_node,
47031 sw_mod, update_mod);
47032 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
47033 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47034 }
47035 if (TARGET_SSE_MATH)
47036 {
47037 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47038 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47039 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47040 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47041 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47042 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47043 mxcsr_orig_var, stmxcsr_hold_call);
47044 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47045 mxcsr_orig_var,
47046 build_int_cst (unsigned_type_node, 0x1f80));
47047 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47048 build_int_cst (unsigned_type_node, 0xffffffc0));
47049 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47050 mxcsr_mod_var, hold_mod_val);
47051 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47052 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47053 hold_assign_orig, hold_assign_mod);
47054 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47055 ldmxcsr_hold_call);
47056 if (*hold)
47057 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47058 else
47059 *hold = hold_all;
47060 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47061 if (*clear)
47062 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47063 ldmxcsr_clear_call);
47064 else
47065 *clear = ldmxcsr_clear_call;
47066 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47067 tree exceptions_sse = fold_convert (integer_type_node,
47068 stxmcsr_update_call);
47069 if (*update)
47070 {
47071 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47072 exceptions_var, exceptions_sse);
47073 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47074 exceptions_var, exceptions_mod);
47075 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47076 exceptions_assign);
47077 }
47078 else
47079 *update = build2 (MODIFY_EXPR, integer_type_node,
47080 exceptions_var, exceptions_sse);
47081 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47082 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47083 ldmxcsr_update_call);
47084 }
47085 tree atomic_feraiseexcept
47086 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47087 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47088 1, exceptions_var);
47089 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47090 atomic_feraiseexcept_call);
47091 }
47092
47093 /* Initialize the GCC target structure. */
47094 #undef TARGET_RETURN_IN_MEMORY
47095 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47096
47097 #undef TARGET_LEGITIMIZE_ADDRESS
47098 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47099
47100 #undef TARGET_ATTRIBUTE_TABLE
47101 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47102 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47103 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47104 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47105 # undef TARGET_MERGE_DECL_ATTRIBUTES
47106 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47107 #endif
47108
47109 #undef TARGET_COMP_TYPE_ATTRIBUTES
47110 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47111
47112 #undef TARGET_INIT_BUILTINS
47113 #define TARGET_INIT_BUILTINS ix86_init_builtins
47114 #undef TARGET_BUILTIN_DECL
47115 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47116 #undef TARGET_EXPAND_BUILTIN
47117 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47118
47119 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47120 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47121 ix86_builtin_vectorized_function
47122
47123 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47124 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47125
47126 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47127 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47128
47129 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47130 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47131
47132 #undef TARGET_BUILTIN_RECIPROCAL
47133 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47134
47135 #undef TARGET_ASM_FUNCTION_EPILOGUE
47136 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47137
47138 #undef TARGET_ENCODE_SECTION_INFO
47139 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47140 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47141 #else
47142 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47143 #endif
47144
47145 #undef TARGET_ASM_OPEN_PAREN
47146 #define TARGET_ASM_OPEN_PAREN ""
47147 #undef TARGET_ASM_CLOSE_PAREN
47148 #define TARGET_ASM_CLOSE_PAREN ""
47149
47150 #undef TARGET_ASM_BYTE_OP
47151 #define TARGET_ASM_BYTE_OP ASM_BYTE
47152
47153 #undef TARGET_ASM_ALIGNED_HI_OP
47154 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47155 #undef TARGET_ASM_ALIGNED_SI_OP
47156 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47157 #ifdef ASM_QUAD
47158 #undef TARGET_ASM_ALIGNED_DI_OP
47159 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47160 #endif
47161
47162 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47163 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47164
47165 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47166 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47167
47168 #undef TARGET_ASM_UNALIGNED_HI_OP
47169 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47170 #undef TARGET_ASM_UNALIGNED_SI_OP
47171 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47172 #undef TARGET_ASM_UNALIGNED_DI_OP
47173 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47174
47175 #undef TARGET_PRINT_OPERAND
47176 #define TARGET_PRINT_OPERAND ix86_print_operand
47177 #undef TARGET_PRINT_OPERAND_ADDRESS
47178 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47179 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47180 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47181 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47182 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47183
47184 #undef TARGET_SCHED_INIT_GLOBAL
47185 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47186 #undef TARGET_SCHED_ADJUST_COST
47187 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47188 #undef TARGET_SCHED_ISSUE_RATE
47189 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47190 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47191 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47192 ia32_multipass_dfa_lookahead
47193 #undef TARGET_SCHED_MACRO_FUSION_P
47194 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47195 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47196 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47197
47198 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47199 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47200
47201 #undef TARGET_MEMMODEL_CHECK
47202 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47203
47204 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47205 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47206
47207 #ifdef HAVE_AS_TLS
47208 #undef TARGET_HAVE_TLS
47209 #define TARGET_HAVE_TLS true
47210 #endif
47211 #undef TARGET_CANNOT_FORCE_CONST_MEM
47212 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47213 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47214 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47215
47216 #undef TARGET_DELEGITIMIZE_ADDRESS
47217 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47218
47219 #undef TARGET_MS_BITFIELD_LAYOUT_P
47220 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47221
47222 #if TARGET_MACHO
47223 #undef TARGET_BINDS_LOCAL_P
47224 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47225 #endif
47226 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47227 #undef TARGET_BINDS_LOCAL_P
47228 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47229 #endif
47230
47231 #undef TARGET_ASM_OUTPUT_MI_THUNK
47232 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47233 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47234 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47235
47236 #undef TARGET_ASM_FILE_START
47237 #define TARGET_ASM_FILE_START x86_file_start
47238
47239 #undef TARGET_OPTION_OVERRIDE
47240 #define TARGET_OPTION_OVERRIDE ix86_option_override
47241
47242 #undef TARGET_REGISTER_MOVE_COST
47243 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47244 #undef TARGET_MEMORY_MOVE_COST
47245 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47246 #undef TARGET_RTX_COSTS
47247 #define TARGET_RTX_COSTS ix86_rtx_costs
47248 #undef TARGET_ADDRESS_COST
47249 #define TARGET_ADDRESS_COST ix86_address_cost
47250
47251 #undef TARGET_FIXED_CONDITION_CODE_REGS
47252 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47253 #undef TARGET_CC_MODES_COMPATIBLE
47254 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47255
47256 #undef TARGET_MACHINE_DEPENDENT_REORG
47257 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47258
47259 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47260 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47261
47262 #undef TARGET_BUILD_BUILTIN_VA_LIST
47263 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47264
47265 #undef TARGET_FOLD_BUILTIN
47266 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47267
47268 #undef TARGET_COMPARE_VERSION_PRIORITY
47269 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47270
47271 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47272 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47273 ix86_generate_version_dispatcher_body
47274
47275 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47276 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47277 ix86_get_function_versions_dispatcher
47278
47279 #undef TARGET_ENUM_VA_LIST_P
47280 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47281
47282 #undef TARGET_FN_ABI_VA_LIST
47283 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47284
47285 #undef TARGET_CANONICAL_VA_LIST_TYPE
47286 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47287
47288 #undef TARGET_EXPAND_BUILTIN_VA_START
47289 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47290
47291 #undef TARGET_MD_ASM_CLOBBERS
47292 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47293
47294 #undef TARGET_PROMOTE_PROTOTYPES
47295 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47296 #undef TARGET_SETUP_INCOMING_VARARGS
47297 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47298 #undef TARGET_MUST_PASS_IN_STACK
47299 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47300 #undef TARGET_FUNCTION_ARG_ADVANCE
47301 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47302 #undef TARGET_FUNCTION_ARG
47303 #define TARGET_FUNCTION_ARG ix86_function_arg
47304 #undef TARGET_FUNCTION_ARG_BOUNDARY
47305 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47306 #undef TARGET_PASS_BY_REFERENCE
47307 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47308 #undef TARGET_INTERNAL_ARG_POINTER
47309 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47310 #undef TARGET_UPDATE_STACK_BOUNDARY
47311 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47312 #undef TARGET_GET_DRAP_RTX
47313 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47314 #undef TARGET_STRICT_ARGUMENT_NAMING
47315 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47316 #undef TARGET_STATIC_CHAIN
47317 #define TARGET_STATIC_CHAIN ix86_static_chain
47318 #undef TARGET_TRAMPOLINE_INIT
47319 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47320 #undef TARGET_RETURN_POPS_ARGS
47321 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47322
47323 #undef TARGET_LEGITIMATE_COMBINED_INSN
47324 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47325
47326 #undef TARGET_ASAN_SHADOW_OFFSET
47327 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47328
47329 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47330 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47331
47332 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47333 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47334
47335 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47336 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47337
47338 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
47339 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
47340 ix86_libgcc_floating_mode_supported_p
47341
47342 #undef TARGET_C_MODE_FOR_SUFFIX
47343 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47344
47345 #ifdef HAVE_AS_TLS
47346 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47347 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47348 #endif
47349
47350 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47351 #undef TARGET_INSERT_ATTRIBUTES
47352 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47353 #endif
47354
47355 #undef TARGET_MANGLE_TYPE
47356 #define TARGET_MANGLE_TYPE ix86_mangle_type
47357
47358 #if !TARGET_MACHO
47359 #undef TARGET_STACK_PROTECT_FAIL
47360 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47361 #endif
47362
47363 #undef TARGET_FUNCTION_VALUE
47364 #define TARGET_FUNCTION_VALUE ix86_function_value
47365
47366 #undef TARGET_FUNCTION_VALUE_REGNO_P
47367 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47368
47369 #undef TARGET_PROMOTE_FUNCTION_MODE
47370 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47371
47372 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47373 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47374
47375 #undef TARGET_INSTANTIATE_DECLS
47376 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47377
47378 #undef TARGET_SECONDARY_RELOAD
47379 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47380
47381 #undef TARGET_CLASS_MAX_NREGS
47382 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47383
47384 #undef TARGET_PREFERRED_RELOAD_CLASS
47385 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47386 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47387 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47388 #undef TARGET_CLASS_LIKELY_SPILLED_P
47389 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47390
47391 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47392 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47393 ix86_builtin_vectorization_cost
47394 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47395 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47396 ix86_vectorize_vec_perm_const_ok
47397 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47398 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47399 ix86_preferred_simd_mode
47400 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47401 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47402 ix86_autovectorize_vector_sizes
47403 #undef TARGET_VECTORIZE_INIT_COST
47404 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47405 #undef TARGET_VECTORIZE_ADD_STMT_COST
47406 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47407 #undef TARGET_VECTORIZE_FINISH_COST
47408 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47409 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47410 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47411
47412 #undef TARGET_SET_CURRENT_FUNCTION
47413 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47414
47415 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47416 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47417
47418 #undef TARGET_OPTION_SAVE
47419 #define TARGET_OPTION_SAVE ix86_function_specific_save
47420
47421 #undef TARGET_OPTION_RESTORE
47422 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47423
47424 #undef TARGET_OPTION_PRINT
47425 #define TARGET_OPTION_PRINT ix86_function_specific_print
47426
47427 #undef TARGET_OPTION_FUNCTION_VERSIONS
47428 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47429
47430 #undef TARGET_CAN_INLINE_P
47431 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47432
47433 #undef TARGET_EXPAND_TO_RTL_HOOK
47434 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47435
47436 #undef TARGET_LEGITIMATE_ADDRESS_P
47437 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47438
47439 #undef TARGET_LRA_P
47440 #define TARGET_LRA_P hook_bool_void_true
47441
47442 #undef TARGET_REGISTER_PRIORITY
47443 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47444
47445 #undef TARGET_REGISTER_USAGE_LEVELING_P
47446 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47447
47448 #undef TARGET_LEGITIMATE_CONSTANT_P
47449 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47450
47451 #undef TARGET_FRAME_POINTER_REQUIRED
47452 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47453
47454 #undef TARGET_CAN_ELIMINATE
47455 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47456
47457 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47458 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47459
47460 #undef TARGET_ASM_CODE_END
47461 #define TARGET_ASM_CODE_END ix86_code_end
47462
47463 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47464 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47465
47466 #if TARGET_MACHO
47467 #undef TARGET_INIT_LIBFUNCS
47468 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47469 #endif
47470
47471 #undef TARGET_LOOP_UNROLL_ADJUST
47472 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47473
47474 #undef TARGET_SPILL_CLASS
47475 #define TARGET_SPILL_CLASS ix86_spill_class
47476
47477 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47478 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47479 ix86_simd_clone_compute_vecsize_and_simdlen
47480
47481 #undef TARGET_SIMD_CLONE_ADJUST
47482 #define TARGET_SIMD_CLONE_ADJUST \
47483 ix86_simd_clone_adjust
47484
47485 #undef TARGET_SIMD_CLONE_USABLE
47486 #define TARGET_SIMD_CLONE_USABLE \
47487 ix86_simd_clone_usable
47488
47489 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47490 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47491 ix86_float_exceptions_rounding_supported_p
47492
47493 #undef TARGET_MODE_EMIT
47494 #define TARGET_MODE_EMIT ix86_emit_mode_set
47495
47496 #undef TARGET_MODE_NEEDED
47497 #define TARGET_MODE_NEEDED ix86_mode_needed
47498
47499 #undef TARGET_MODE_AFTER
47500 #define TARGET_MODE_AFTER ix86_mode_after
47501
47502 #undef TARGET_MODE_ENTRY
47503 #define TARGET_MODE_ENTRY ix86_mode_entry
47504
47505 #undef TARGET_MODE_EXIT
47506 #define TARGET_MODE_EXIT ix86_mode_exit
47507
47508 #undef TARGET_MODE_PRIORITY
47509 #define TARGET_MODE_PRIORITY ix86_mode_priority
47510
47511 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47512 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47513
47514 struct gcc_target targetm = TARGET_INITIALIZER;
47515 \f
47516 #include "gt-i386.h"