re PR target/63534 (Bootstrap failure on x86_64/i686-linux)
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "hash-table.h"
56 #include "vec.h"
57 #include "basic-block.h"
58 #include "tree-ssa-alias.h"
59 #include "internal-fn.h"
60 #include "gimple-fold.h"
61 #include "tree-eh.h"
62 #include "gimple-expr.h"
63 #include "is-a.h"
64 #include "gimple.h"
65 #include "gimplify.h"
66 #include "cfgloop.h"
67 #include "dwarf2.h"
68 #include "df.h"
69 #include "tm-constrs.h"
70 #include "params.h"
71 #include "cselib.h"
72 #include "debug.h"
73 #include "sched-int.h"
74 #include "sbitmap.h"
75 #include "fibheap.h"
76 #include "opts.h"
77 #include "diagnostic.h"
78 #include "dumpfile.h"
79 #include "tree-pass.h"
80 #include "wide-int.h"
81 #include "context.h"
82 #include "pass_manager.h"
83 #include "target-globals.h"
84 #include "tree-vectorizer.h"
85 #include "shrink-wrap.h"
86 #include "builtins.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 TV_NONE, /* tv_id */
2526 0, /* properties_required */
2527 0, /* properties_provided */
2528 0, /* properties_destroyed */
2529 0, /* todo_flags_start */
2530 TODO_df_finish, /* todo_flags_finish */
2531 };
2532
2533 class pass_insert_vzeroupper : public rtl_opt_pass
2534 {
2535 public:
2536 pass_insert_vzeroupper(gcc::context *ctxt)
2537 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2538 {}
2539
2540 /* opt_pass methods: */
2541 virtual bool gate (function *)
2542 {
2543 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2544 }
2545
2546 virtual unsigned int execute (function *)
2547 {
2548 return rest_of_handle_insert_vzeroupper ();
2549 }
2550
2551 }; // class pass_insert_vzeroupper
2552
2553 } // anon namespace
2554
2555 rtl_opt_pass *
2556 make_pass_insert_vzeroupper (gcc::context *ctxt)
2557 {
2558 return new pass_insert_vzeroupper (ctxt);
2559 }
2560
2561 /* Return true if a red-zone is in use. */
2562
2563 static inline bool
2564 ix86_using_red_zone (void)
2565 {
2566 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2567 }
2568 \f
2569 /* Return a string that documents the current -m options. The caller is
2570 responsible for freeing the string. */
2571
2572 static char *
2573 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2574 const char *tune, enum fpmath_unit fpmath,
2575 bool add_nl_p)
2576 {
2577 struct ix86_target_opts
2578 {
2579 const char *option; /* option string */
2580 HOST_WIDE_INT mask; /* isa mask options */
2581 };
2582
2583 /* This table is ordered so that options like -msse4.2 that imply
2584 preceding options while match those first. */
2585 static struct ix86_target_opts isa_opts[] =
2586 {
2587 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2588 { "-mfma", OPTION_MASK_ISA_FMA },
2589 { "-mxop", OPTION_MASK_ISA_XOP },
2590 { "-mlwp", OPTION_MASK_ISA_LWP },
2591 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2592 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2593 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2594 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2595 { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ },
2596 { "-mavx512bw", OPTION_MASK_ISA_AVX512BW },
2597 { "-mavx512vl", OPTION_MASK_ISA_AVX512VL },
2598 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2599 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2600 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2601 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2602 { "-msse3", OPTION_MASK_ISA_SSE3 },
2603 { "-msse2", OPTION_MASK_ISA_SSE2 },
2604 { "-msse", OPTION_MASK_ISA_SSE },
2605 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2606 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2607 { "-mmmx", OPTION_MASK_ISA_MMX },
2608 { "-mabm", OPTION_MASK_ISA_ABM },
2609 { "-mbmi", OPTION_MASK_ISA_BMI },
2610 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2611 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2612 { "-mhle", OPTION_MASK_ISA_HLE },
2613 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2614 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2615 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2616 { "-madx", OPTION_MASK_ISA_ADX },
2617 { "-mtbm", OPTION_MASK_ISA_TBM },
2618 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2619 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2620 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2621 { "-maes", OPTION_MASK_ISA_AES },
2622 { "-msha", OPTION_MASK_ISA_SHA },
2623 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2624 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2625 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2626 { "-mf16c", OPTION_MASK_ISA_F16C },
2627 { "-mrtm", OPTION_MASK_ISA_RTM },
2628 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2629 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2630 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2631 { "-mclflushopt", OPTION_MASK_ISA_CLFLUSHOPT },
2632 { "-mxsavec", OPTION_MASK_ISA_XSAVEC },
2633 { "-mxsaves", OPTION_MASK_ISA_XSAVES },
2634 };
2635
2636 /* Flag options. */
2637 static struct ix86_target_opts flag_opts[] =
2638 {
2639 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2640 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2641 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2642 { "-m80387", MASK_80387 },
2643 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2644 { "-malign-double", MASK_ALIGN_DOUBLE },
2645 { "-mcld", MASK_CLD },
2646 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2647 { "-mieee-fp", MASK_IEEE_FP },
2648 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2649 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2650 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2651 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2652 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2653 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2654 { "-mno-red-zone", MASK_NO_RED_ZONE },
2655 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2656 { "-mrecip", MASK_RECIP },
2657 { "-mrtd", MASK_RTD },
2658 { "-msseregparm", MASK_SSEREGPARM },
2659 { "-mstack-arg-probe", MASK_STACK_PROBE },
2660 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2661 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2662 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2663 { "-mvzeroupper", MASK_VZEROUPPER },
2664 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2665 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2666 { "-mprefer-avx128", MASK_PREFER_AVX128},
2667 };
2668
2669 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2670
2671 char isa_other[40];
2672 char target_other[40];
2673 unsigned num = 0;
2674 unsigned i, j;
2675 char *ret;
2676 char *ptr;
2677 size_t len;
2678 size_t line_len;
2679 size_t sep_len;
2680 const char *abi;
2681
2682 memset (opts, '\0', sizeof (opts));
2683
2684 /* Add -march= option. */
2685 if (arch)
2686 {
2687 opts[num][0] = "-march=";
2688 opts[num++][1] = arch;
2689 }
2690
2691 /* Add -mtune= option. */
2692 if (tune)
2693 {
2694 opts[num][0] = "-mtune=";
2695 opts[num++][1] = tune;
2696 }
2697
2698 /* Add -m32/-m64/-mx32. */
2699 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2700 {
2701 if ((isa & OPTION_MASK_ABI_64) != 0)
2702 abi = "-m64";
2703 else
2704 abi = "-mx32";
2705 isa &= ~ (OPTION_MASK_ISA_64BIT
2706 | OPTION_MASK_ABI_64
2707 | OPTION_MASK_ABI_X32);
2708 }
2709 else
2710 abi = "-m32";
2711 opts[num++][0] = abi;
2712
2713 /* Pick out the options in isa options. */
2714 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2715 {
2716 if ((isa & isa_opts[i].mask) != 0)
2717 {
2718 opts[num++][0] = isa_opts[i].option;
2719 isa &= ~ isa_opts[i].mask;
2720 }
2721 }
2722
2723 if (isa && add_nl_p)
2724 {
2725 opts[num++][0] = isa_other;
2726 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2727 isa);
2728 }
2729
2730 /* Add flag options. */
2731 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2732 {
2733 if ((flags & flag_opts[i].mask) != 0)
2734 {
2735 opts[num++][0] = flag_opts[i].option;
2736 flags &= ~ flag_opts[i].mask;
2737 }
2738 }
2739
2740 if (flags && add_nl_p)
2741 {
2742 opts[num++][0] = target_other;
2743 sprintf (target_other, "(other flags: %#x)", flags);
2744 }
2745
2746 /* Add -fpmath= option. */
2747 if (fpmath)
2748 {
2749 opts[num][0] = "-mfpmath=";
2750 switch ((int) fpmath)
2751 {
2752 case FPMATH_387:
2753 opts[num++][1] = "387";
2754 break;
2755
2756 case FPMATH_SSE:
2757 opts[num++][1] = "sse";
2758 break;
2759
2760 case FPMATH_387 | FPMATH_SSE:
2761 opts[num++][1] = "sse+387";
2762 break;
2763
2764 default:
2765 gcc_unreachable ();
2766 }
2767 }
2768
2769 /* Any options? */
2770 if (num == 0)
2771 return NULL;
2772
2773 gcc_assert (num < ARRAY_SIZE (opts));
2774
2775 /* Size the string. */
2776 len = 0;
2777 sep_len = (add_nl_p) ? 3 : 1;
2778 for (i = 0; i < num; i++)
2779 {
2780 len += sep_len;
2781 for (j = 0; j < 2; j++)
2782 if (opts[i][j])
2783 len += strlen (opts[i][j]);
2784 }
2785
2786 /* Build the string. */
2787 ret = ptr = (char *) xmalloc (len);
2788 line_len = 0;
2789
2790 for (i = 0; i < num; i++)
2791 {
2792 size_t len2[2];
2793
2794 for (j = 0; j < 2; j++)
2795 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2796
2797 if (i != 0)
2798 {
2799 *ptr++ = ' ';
2800 line_len++;
2801
2802 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2803 {
2804 *ptr++ = '\\';
2805 *ptr++ = '\n';
2806 line_len = 0;
2807 }
2808 }
2809
2810 for (j = 0; j < 2; j++)
2811 if (opts[i][j])
2812 {
2813 memcpy (ptr, opts[i][j], len2[j]);
2814 ptr += len2[j];
2815 line_len += len2[j];
2816 }
2817 }
2818
2819 *ptr = '\0';
2820 gcc_assert (ret + len >= ptr);
2821
2822 return ret;
2823 }
2824
2825 /* Return true, if profiling code should be emitted before
2826 prologue. Otherwise it returns false.
2827 Note: For x86 with "hotfix" it is sorried. */
2828 static bool
2829 ix86_profile_before_prologue (void)
2830 {
2831 return flag_fentry != 0;
2832 }
2833
2834 /* Function that is callable from the debugger to print the current
2835 options. */
2836 void ATTRIBUTE_UNUSED
2837 ix86_debug_options (void)
2838 {
2839 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2840 ix86_arch_string, ix86_tune_string,
2841 ix86_fpmath, true);
2842
2843 if (opts)
2844 {
2845 fprintf (stderr, "%s\n\n", opts);
2846 free (opts);
2847 }
2848 else
2849 fputs ("<no options>\n\n", stderr);
2850
2851 return;
2852 }
2853
2854 static const char *stringop_alg_names[] = {
2855 #define DEF_ENUM
2856 #define DEF_ALG(alg, name) #name,
2857 #include "stringop.def"
2858 #undef DEF_ENUM
2859 #undef DEF_ALG
2860 };
2861
2862 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2863 The string is of the following form (or comma separated list of it):
2864
2865 strategy_alg:max_size:[align|noalign]
2866
2867 where the full size range for the strategy is either [0, max_size] or
2868 [min_size, max_size], in which min_size is the max_size + 1 of the
2869 preceding range. The last size range must have max_size == -1.
2870
2871 Examples:
2872
2873 1.
2874 -mmemcpy-strategy=libcall:-1:noalign
2875
2876 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2877
2878
2879 2.
2880 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2881
2882 This is to tell the compiler to use the following strategy for memset
2883 1) when the expected size is between [1, 16], use rep_8byte strategy;
2884 2) when the size is between [17, 2048], use vector_loop;
2885 3) when the size is > 2048, use libcall. */
2886
2887 struct stringop_size_range
2888 {
2889 int max;
2890 stringop_alg alg;
2891 bool noalign;
2892 };
2893
2894 static void
2895 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2896 {
2897 const struct stringop_algs *default_algs;
2898 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2899 char *curr_range_str, *next_range_str;
2900 int i = 0, n = 0;
2901
2902 if (is_memset)
2903 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2904 else
2905 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2906
2907 curr_range_str = strategy_str;
2908
2909 do
2910 {
2911 int maxs;
2912 char alg_name[128];
2913 char align[16];
2914 next_range_str = strchr (curr_range_str, ',');
2915 if (next_range_str)
2916 *next_range_str++ = '\0';
2917
2918 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2919 alg_name, &maxs, align))
2920 {
2921 error ("wrong arg %s to option %s", curr_range_str,
2922 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2923 return;
2924 }
2925
2926 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2927 {
2928 error ("size ranges of option %s should be increasing",
2929 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2930 return;
2931 }
2932
2933 for (i = 0; i < last_alg; i++)
2934 if (!strcmp (alg_name, stringop_alg_names[i]))
2935 break;
2936
2937 if (i == last_alg)
2938 {
2939 error ("wrong stringop strategy name %s specified for option %s",
2940 alg_name,
2941 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2942 return;
2943 }
2944
2945 input_ranges[n].max = maxs;
2946 input_ranges[n].alg = (stringop_alg) i;
2947 if (!strcmp (align, "align"))
2948 input_ranges[n].noalign = false;
2949 else if (!strcmp (align, "noalign"))
2950 input_ranges[n].noalign = true;
2951 else
2952 {
2953 error ("unknown alignment %s specified for option %s",
2954 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2955 return;
2956 }
2957 n++;
2958 curr_range_str = next_range_str;
2959 }
2960 while (curr_range_str);
2961
2962 if (input_ranges[n - 1].max != -1)
2963 {
2964 error ("the max value for the last size range should be -1"
2965 " for option %s",
2966 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2967 return;
2968 }
2969
2970 if (n > MAX_STRINGOP_ALGS)
2971 {
2972 error ("too many size ranges specified in option %s",
2973 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2974 return;
2975 }
2976
2977 /* Now override the default algs array. */
2978 for (i = 0; i < n; i++)
2979 {
2980 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2981 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2982 = input_ranges[i].alg;
2983 *const_cast<int *>(&default_algs->size[i].noalign)
2984 = input_ranges[i].noalign;
2985 }
2986 }
2987
2988 \f
2989 /* parse -mtune-ctrl= option. When DUMP is true,
2990 print the features that are explicitly set. */
2991
2992 static void
2993 parse_mtune_ctrl_str (bool dump)
2994 {
2995 if (!ix86_tune_ctrl_string)
2996 return;
2997
2998 char *next_feature_string = NULL;
2999 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3000 char *orig = curr_feature_string;
3001 int i;
3002 do
3003 {
3004 bool clear = false;
3005
3006 next_feature_string = strchr (curr_feature_string, ',');
3007 if (next_feature_string)
3008 *next_feature_string++ = '\0';
3009 if (*curr_feature_string == '^')
3010 {
3011 curr_feature_string++;
3012 clear = true;
3013 }
3014 for (i = 0; i < X86_TUNE_LAST; i++)
3015 {
3016 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3017 {
3018 ix86_tune_features[i] = !clear;
3019 if (dump)
3020 fprintf (stderr, "Explicitly %s feature %s\n",
3021 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3022 break;
3023 }
3024 }
3025 if (i == X86_TUNE_LAST)
3026 error ("Unknown parameter to option -mtune-ctrl: %s",
3027 clear ? curr_feature_string - 1 : curr_feature_string);
3028 curr_feature_string = next_feature_string;
3029 }
3030 while (curr_feature_string);
3031 free (orig);
3032 }
3033
3034 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3035 processor type. */
3036
3037 static void
3038 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3039 {
3040 unsigned int ix86_tune_mask = 1u << ix86_tune;
3041 int i;
3042
3043 for (i = 0; i < X86_TUNE_LAST; ++i)
3044 {
3045 if (ix86_tune_no_default)
3046 ix86_tune_features[i] = 0;
3047 else
3048 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3049 }
3050
3051 if (dump)
3052 {
3053 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3054 for (i = 0; i < X86_TUNE_LAST; i++)
3055 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3056 ix86_tune_features[i] ? "on" : "off");
3057 }
3058
3059 parse_mtune_ctrl_str (dump);
3060 }
3061
3062
3063 /* Override various settings based on options. If MAIN_ARGS_P, the
3064 options are from the command line, otherwise they are from
3065 attributes. */
3066
3067 static void
3068 ix86_option_override_internal (bool main_args_p,
3069 struct gcc_options *opts,
3070 struct gcc_options *opts_set)
3071 {
3072 int i;
3073 unsigned int ix86_arch_mask;
3074 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3075 const char *prefix;
3076 const char *suffix;
3077 const char *sw;
3078
3079 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3080 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3081 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3082 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3083 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3084 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3085 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3086 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3087 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3088 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3089 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3090 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3091 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3092 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3093 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3094 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3095 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3096 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3097 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3098 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3099 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3100 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3101 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3102 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3103 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3104 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3105 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3106 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3107 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3108 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3109 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3110 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3111 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3112 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3113 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3114 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3115 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3116 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3117 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3118 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3119 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3120 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3121 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3122 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3123 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3124 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3125 #define PTA_CLFLUSHOPT (HOST_WIDE_INT_1 << 47)
3126 #define PTA_XSAVEC (HOST_WIDE_INT_1 << 48)
3127 #define PTA_XSAVES (HOST_WIDE_INT_1 << 49)
3128 #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50)
3129 #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51)
3130 #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52)
3131
3132 #define PTA_CORE2 \
3133 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3134 | PTA_CX16 | PTA_FXSR)
3135 #define PTA_NEHALEM \
3136 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3137 #define PTA_WESTMERE \
3138 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3139 #define PTA_SANDYBRIDGE \
3140 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3141 #define PTA_IVYBRIDGE \
3142 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3143 #define PTA_HASWELL \
3144 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3145 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3146 #define PTA_BROADWELL \
3147 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3148 #define PTA_BONNELL \
3149 (PTA_CORE2 | PTA_MOVBE)
3150 #define PTA_SILVERMONT \
3151 (PTA_WESTMERE | PTA_MOVBE)
3152
3153 /* if this reaches 64, need to widen struct pta flags below */
3154
3155 static struct pta
3156 {
3157 const char *const name; /* processor name or nickname. */
3158 const enum processor_type processor;
3159 const enum attr_cpu schedule;
3160 const unsigned HOST_WIDE_INT flags;
3161 }
3162 const processor_alias_table[] =
3163 {
3164 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3165 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3166 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3167 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3168 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3169 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3170 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3171 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3172 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3173 PTA_MMX | PTA_SSE | PTA_FXSR},
3174 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3175 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3176 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3177 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3178 PTA_MMX | PTA_SSE | PTA_FXSR},
3179 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3180 PTA_MMX | PTA_SSE | PTA_FXSR},
3181 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3182 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3183 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3184 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3185 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3186 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3187 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3188 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3189 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3190 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3191 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3192 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3193 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3194 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3195 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3196 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3197 PTA_SANDYBRIDGE},
3198 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3199 PTA_SANDYBRIDGE},
3200 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3201 PTA_IVYBRIDGE},
3202 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3203 PTA_IVYBRIDGE},
3204 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3205 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3206 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3207 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3208 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3209 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3210 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3211 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3212 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3214 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3215 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3216 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3217 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3218 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3219 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3220 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3221 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3222 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3223 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3224 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3225 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3226 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3227 {"x86-64", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3229 {"k8", PROCESSOR_K8, CPU_K8,
3230 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3231 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3232 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3233 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3234 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3235 {"opteron", PROCESSOR_K8, CPU_K8,
3236 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3237 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3238 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3239 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3240 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3241 {"athlon64", PROCESSOR_K8, CPU_K8,
3242 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3243 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3244 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3245 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3246 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3247 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3248 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3249 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3250 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3251 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3252 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3253 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3254 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3255 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3256 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3261 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3262 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3263 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3264 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3265 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3266 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3267 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3268 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3269 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3270 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3271 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3272 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3273 | PTA_XSAVEOPT | PTA_FSGSBASE},
3274 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3278 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3279 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3280 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3281 | PTA_MOVBE},
3282 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3283 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3284 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3285 | PTA_FXSR | PTA_XSAVE},
3286 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3287 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3288 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3289 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3290 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3291 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3292
3293 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3294 PTA_64BIT
3295 | PTA_HLE /* flags are only used for -march switch. */ },
3296 };
3297
3298 /* -mrecip options. */
3299 static struct
3300 {
3301 const char *string; /* option name */
3302 unsigned int mask; /* mask bits to set */
3303 }
3304 const recip_options[] =
3305 {
3306 { "all", RECIP_MASK_ALL },
3307 { "none", RECIP_MASK_NONE },
3308 { "div", RECIP_MASK_DIV },
3309 { "sqrt", RECIP_MASK_SQRT },
3310 { "vec-div", RECIP_MASK_VEC_DIV },
3311 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3312 };
3313
3314 int const pta_size = ARRAY_SIZE (processor_alias_table);
3315
3316 /* Set up prefix/suffix so the error messages refer to either the command
3317 line argument, or the attribute(target). */
3318 if (main_args_p)
3319 {
3320 prefix = "-m";
3321 suffix = "";
3322 sw = "switch";
3323 }
3324 else
3325 {
3326 prefix = "option(\"";
3327 suffix = "\")";
3328 sw = "attribute";
3329 }
3330
3331 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3332 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3333 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3334 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3335 #ifdef TARGET_BI_ARCH
3336 else
3337 {
3338 #if TARGET_BI_ARCH == 1
3339 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3340 is on and OPTION_MASK_ABI_X32 is off. We turn off
3341 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3342 -mx32. */
3343 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3344 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3345 #else
3346 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3347 on and OPTION_MASK_ABI_64 is off. We turn off
3348 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3349 -m64 or OPTION_MASK_CODE16 is turned on by -m16. */
3350 if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3351 || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3352 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3353 #endif
3354 }
3355 #endif
3356
3357 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3358 {
3359 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3360 OPTION_MASK_ABI_64 for TARGET_X32. */
3361 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3362 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3363 }
3364 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3365 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3366 | OPTION_MASK_ABI_X32
3367 | OPTION_MASK_ABI_64);
3368 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3369 {
3370 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3371 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3372 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3373 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3374 }
3375
3376 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3377 SUBTARGET_OVERRIDE_OPTIONS;
3378 #endif
3379
3380 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3381 SUBSUBTARGET_OVERRIDE_OPTIONS;
3382 #endif
3383
3384 /* -fPIC is the default for x86_64. */
3385 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3386 opts->x_flag_pic = 2;
3387
3388 /* Need to check -mtune=generic first. */
3389 if (opts->x_ix86_tune_string)
3390 {
3391 /* As special support for cross compilers we read -mtune=native
3392 as -mtune=generic. With native compilers we won't see the
3393 -mtune=native, as it was changed by the driver. */
3394 if (!strcmp (opts->x_ix86_tune_string, "native"))
3395 {
3396 opts->x_ix86_tune_string = "generic";
3397 }
3398 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3399 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3400 "%stune=k8%s or %stune=generic%s instead as appropriate",
3401 prefix, suffix, prefix, suffix, prefix, suffix);
3402 }
3403 else
3404 {
3405 if (opts->x_ix86_arch_string)
3406 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3407 if (!opts->x_ix86_tune_string)
3408 {
3409 opts->x_ix86_tune_string
3410 = processor_target_table[TARGET_CPU_DEFAULT].name;
3411 ix86_tune_defaulted = 1;
3412 }
3413
3414 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3415 or defaulted. We need to use a sensible tune option. */
3416 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3417 {
3418 opts->x_ix86_tune_string = "generic";
3419 }
3420 }
3421
3422 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3423 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3424 {
3425 /* rep; movq isn't available in 32-bit code. */
3426 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3427 opts->x_ix86_stringop_alg = no_stringop;
3428 }
3429
3430 if (!opts->x_ix86_arch_string)
3431 opts->x_ix86_arch_string
3432 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3433 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3434 else
3435 ix86_arch_specified = 1;
3436
3437 if (opts_set->x_ix86_pmode)
3438 {
3439 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3440 && opts->x_ix86_pmode == PMODE_SI)
3441 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3442 && opts->x_ix86_pmode == PMODE_DI))
3443 error ("address mode %qs not supported in the %s bit mode",
3444 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3445 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3446 }
3447 else
3448 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3449 ? PMODE_DI : PMODE_SI;
3450
3451 if (!opts_set->x_ix86_abi)
3452 opts->x_ix86_abi = DEFAULT_ABI;
3453
3454 /* For targets using ms ABI enable ms-extensions, if not
3455 explicit turned off. For non-ms ABI we turn off this
3456 option. */
3457 if (!opts_set->x_flag_ms_extensions)
3458 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3459
3460 if (opts_set->x_ix86_cmodel)
3461 {
3462 switch (opts->x_ix86_cmodel)
3463 {
3464 case CM_SMALL:
3465 case CM_SMALL_PIC:
3466 if (opts->x_flag_pic)
3467 opts->x_ix86_cmodel = CM_SMALL_PIC;
3468 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3469 error ("code model %qs not supported in the %s bit mode",
3470 "small", "32");
3471 break;
3472
3473 case CM_MEDIUM:
3474 case CM_MEDIUM_PIC:
3475 if (opts->x_flag_pic)
3476 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3477 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3478 error ("code model %qs not supported in the %s bit mode",
3479 "medium", "32");
3480 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3481 error ("code model %qs not supported in x32 mode",
3482 "medium");
3483 break;
3484
3485 case CM_LARGE:
3486 case CM_LARGE_PIC:
3487 if (opts->x_flag_pic)
3488 opts->x_ix86_cmodel = CM_LARGE_PIC;
3489 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3490 error ("code model %qs not supported in the %s bit mode",
3491 "large", "32");
3492 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3493 error ("code model %qs not supported in x32 mode",
3494 "large");
3495 break;
3496
3497 case CM_32:
3498 if (opts->x_flag_pic)
3499 error ("code model %s does not support PIC mode", "32");
3500 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3501 error ("code model %qs not supported in the %s bit mode",
3502 "32", "64");
3503 break;
3504
3505 case CM_KERNEL:
3506 if (opts->x_flag_pic)
3507 {
3508 error ("code model %s does not support PIC mode", "kernel");
3509 opts->x_ix86_cmodel = CM_32;
3510 }
3511 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3512 error ("code model %qs not supported in the %s bit mode",
3513 "kernel", "32");
3514 break;
3515
3516 default:
3517 gcc_unreachable ();
3518 }
3519 }
3520 else
3521 {
3522 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3523 use of rip-relative addressing. This eliminates fixups that
3524 would otherwise be needed if this object is to be placed in a
3525 DLL, and is essentially just as efficient as direct addressing. */
3526 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3527 && (TARGET_RDOS || TARGET_PECOFF))
3528 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3529 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3530 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3531 else
3532 opts->x_ix86_cmodel = CM_32;
3533 }
3534 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3535 {
3536 error ("-masm=intel not supported in this configuration");
3537 opts->x_ix86_asm_dialect = ASM_ATT;
3538 }
3539 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3540 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3541 sorry ("%i-bit mode not compiled in",
3542 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3543
3544 for (i = 0; i < pta_size; i++)
3545 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3546 {
3547 ix86_schedule = processor_alias_table[i].schedule;
3548 ix86_arch = processor_alias_table[i].processor;
3549 /* Default cpu tuning to the architecture. */
3550 ix86_tune = ix86_arch;
3551
3552 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3553 && !(processor_alias_table[i].flags & PTA_64BIT))
3554 error ("CPU you selected does not support x86-64 "
3555 "instruction set");
3556
3557 if (processor_alias_table[i].flags & PTA_MMX
3558 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3559 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3560 if (processor_alias_table[i].flags & PTA_3DNOW
3561 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3562 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3563 if (processor_alias_table[i].flags & PTA_3DNOW_A
3564 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3565 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3566 if (processor_alias_table[i].flags & PTA_SSE
3567 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3568 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3569 if (processor_alias_table[i].flags & PTA_SSE2
3570 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3571 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3572 if (processor_alias_table[i].flags & PTA_SSE3
3573 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3574 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3575 if (processor_alias_table[i].flags & PTA_SSSE3
3576 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3577 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3578 if (processor_alias_table[i].flags & PTA_SSE4_1
3579 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3580 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3581 if (processor_alias_table[i].flags & PTA_SSE4_2
3582 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3583 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3584 if (processor_alias_table[i].flags & PTA_AVX
3585 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3586 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3587 if (processor_alias_table[i].flags & PTA_AVX2
3588 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3589 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3590 if (processor_alias_table[i].flags & PTA_FMA
3591 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3592 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3593 if (processor_alias_table[i].flags & PTA_SSE4A
3594 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3595 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3596 if (processor_alias_table[i].flags & PTA_FMA4
3597 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3598 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3599 if (processor_alias_table[i].flags & PTA_XOP
3600 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3601 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3602 if (processor_alias_table[i].flags & PTA_LWP
3603 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3604 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3605 if (processor_alias_table[i].flags & PTA_ABM
3606 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3607 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3608 if (processor_alias_table[i].flags & PTA_BMI
3609 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3610 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3611 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3612 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3613 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3614 if (processor_alias_table[i].flags & PTA_TBM
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3617 if (processor_alias_table[i].flags & PTA_BMI2
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3620 if (processor_alias_table[i].flags & PTA_CX16
3621 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3622 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3623 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3624 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3625 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3626 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3627 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3628 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3629 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3630 if (processor_alias_table[i].flags & PTA_MOVBE
3631 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3632 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3633 if (processor_alias_table[i].flags & PTA_AES
3634 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3635 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3636 if (processor_alias_table[i].flags & PTA_SHA
3637 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3638 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3639 if (processor_alias_table[i].flags & PTA_PCLMUL
3640 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3641 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3642 if (processor_alias_table[i].flags & PTA_FSGSBASE
3643 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3644 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3645 if (processor_alias_table[i].flags & PTA_RDRND
3646 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3647 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3648 if (processor_alias_table[i].flags & PTA_F16C
3649 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3650 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3651 if (processor_alias_table[i].flags & PTA_RTM
3652 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3653 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3654 if (processor_alias_table[i].flags & PTA_HLE
3655 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3656 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3657 if (processor_alias_table[i].flags & PTA_PRFCHW
3658 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3659 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3660 if (processor_alias_table[i].flags & PTA_RDSEED
3661 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3662 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3663 if (processor_alias_table[i].flags & PTA_ADX
3664 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3665 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3666 if (processor_alias_table[i].flags & PTA_FXSR
3667 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3668 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3669 if (processor_alias_table[i].flags & PTA_XSAVE
3670 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3671 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3672 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3673 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3674 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3675 if (processor_alias_table[i].flags & PTA_AVX512F
3676 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3677 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3678 if (processor_alias_table[i].flags & PTA_AVX512ER
3679 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3680 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3681 if (processor_alias_table[i].flags & PTA_AVX512PF
3682 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3683 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3684 if (processor_alias_table[i].flags & PTA_AVX512CD
3685 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3686 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3687 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3688 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3689 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3690 if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3691 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3692 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3693 if (processor_alias_table[i].flags & PTA_XSAVEC
3694 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3695 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3696 if (processor_alias_table[i].flags & PTA_XSAVES
3697 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3698 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3699 if (processor_alias_table[i].flags & PTA_AVX512DQ
3700 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3701 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3702 if (processor_alias_table[i].flags & PTA_AVX512BW
3703 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3704 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3705 if (processor_alias_table[i].flags & PTA_AVX512VL
3706 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3707 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3708 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3709 x86_prefetch_sse = true;
3710
3711 break;
3712 }
3713
3714 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3715 error ("generic CPU can be used only for %stune=%s %s",
3716 prefix, suffix, sw);
3717 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3718 error ("intel CPU can be used only for %stune=%s %s",
3719 prefix, suffix, sw);
3720 else if (i == pta_size)
3721 error ("bad value (%s) for %sarch=%s %s",
3722 opts->x_ix86_arch_string, prefix, suffix, sw);
3723
3724 ix86_arch_mask = 1u << ix86_arch;
3725 for (i = 0; i < X86_ARCH_LAST; ++i)
3726 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3727
3728 for (i = 0; i < pta_size; i++)
3729 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3730 {
3731 ix86_schedule = processor_alias_table[i].schedule;
3732 ix86_tune = processor_alias_table[i].processor;
3733 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3734 {
3735 if (!(processor_alias_table[i].flags & PTA_64BIT))
3736 {
3737 if (ix86_tune_defaulted)
3738 {
3739 opts->x_ix86_tune_string = "x86-64";
3740 for (i = 0; i < pta_size; i++)
3741 if (! strcmp (opts->x_ix86_tune_string,
3742 processor_alias_table[i].name))
3743 break;
3744 ix86_schedule = processor_alias_table[i].schedule;
3745 ix86_tune = processor_alias_table[i].processor;
3746 }
3747 else
3748 error ("CPU you selected does not support x86-64 "
3749 "instruction set");
3750 }
3751 }
3752 /* Intel CPUs have always interpreted SSE prefetch instructions as
3753 NOPs; so, we can enable SSE prefetch instructions even when
3754 -mtune (rather than -march) points us to a processor that has them.
3755 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3756 higher processors. */
3757 if (TARGET_CMOV
3758 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3759 x86_prefetch_sse = true;
3760 break;
3761 }
3762
3763 if (ix86_tune_specified && i == pta_size)
3764 error ("bad value (%s) for %stune=%s %s",
3765 opts->x_ix86_tune_string, prefix, suffix, sw);
3766
3767 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3768
3769 #ifndef USE_IX86_FRAME_POINTER
3770 #define USE_IX86_FRAME_POINTER 0
3771 #endif
3772
3773 #ifndef USE_X86_64_FRAME_POINTER
3774 #define USE_X86_64_FRAME_POINTER 0
3775 #endif
3776
3777 /* Set the default values for switches whose default depends on TARGET_64BIT
3778 in case they weren't overwritten by command line options. */
3779 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3780 {
3781 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3782 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3783 if (opts->x_flag_asynchronous_unwind_tables
3784 && !opts_set->x_flag_unwind_tables
3785 && TARGET_64BIT_MS_ABI)
3786 opts->x_flag_unwind_tables = 1;
3787 if (opts->x_flag_asynchronous_unwind_tables == 2)
3788 opts->x_flag_unwind_tables
3789 = opts->x_flag_asynchronous_unwind_tables = 1;
3790 if (opts->x_flag_pcc_struct_return == 2)
3791 opts->x_flag_pcc_struct_return = 0;
3792 }
3793 else
3794 {
3795 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3796 opts->x_flag_omit_frame_pointer
3797 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3798 if (opts->x_flag_asynchronous_unwind_tables == 2)
3799 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3800 if (opts->x_flag_pcc_struct_return == 2)
3801 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3802 }
3803
3804 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3805 if (opts->x_optimize_size)
3806 ix86_cost = &ix86_size_cost;
3807 else
3808 ix86_cost = ix86_tune_cost;
3809
3810 /* Arrange to set up i386_stack_locals for all functions. */
3811 init_machine_status = ix86_init_machine_status;
3812
3813 /* Validate -mregparm= value. */
3814 if (opts_set->x_ix86_regparm)
3815 {
3816 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3817 warning (0, "-mregparm is ignored in 64-bit mode");
3818 if (opts->x_ix86_regparm > REGPARM_MAX)
3819 {
3820 error ("-mregparm=%d is not between 0 and %d",
3821 opts->x_ix86_regparm, REGPARM_MAX);
3822 opts->x_ix86_regparm = 0;
3823 }
3824 }
3825 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3826 opts->x_ix86_regparm = REGPARM_MAX;
3827
3828 /* Default align_* from the processor table. */
3829 if (opts->x_align_loops == 0)
3830 {
3831 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3832 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3833 }
3834 if (opts->x_align_jumps == 0)
3835 {
3836 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3837 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3838 }
3839 if (opts->x_align_functions == 0)
3840 {
3841 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3842 }
3843
3844 /* Provide default for -mbranch-cost= value. */
3845 if (!opts_set->x_ix86_branch_cost)
3846 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3847
3848 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3849 {
3850 opts->x_target_flags
3851 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3852
3853 /* Enable by default the SSE and MMX builtins. Do allow the user to
3854 explicitly disable any of these. In particular, disabling SSE and
3855 MMX for kernel code is extremely useful. */
3856 if (!ix86_arch_specified)
3857 opts->x_ix86_isa_flags
3858 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3859 | TARGET_SUBTARGET64_ISA_DEFAULT)
3860 & ~opts->x_ix86_isa_flags_explicit);
3861
3862 if (TARGET_RTD_P (opts->x_target_flags))
3863 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3864 }
3865 else
3866 {
3867 opts->x_target_flags
3868 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3869
3870 if (!ix86_arch_specified)
3871 opts->x_ix86_isa_flags
3872 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3873
3874 /* i386 ABI does not specify red zone. It still makes sense to use it
3875 when programmer takes care to stack from being destroyed. */
3876 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3877 opts->x_target_flags |= MASK_NO_RED_ZONE;
3878 }
3879
3880 /* Keep nonleaf frame pointers. */
3881 if (opts->x_flag_omit_frame_pointer)
3882 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3883 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3884 opts->x_flag_omit_frame_pointer = 1;
3885
3886 /* If we're doing fast math, we don't care about comparison order
3887 wrt NaNs. This lets us use a shorter comparison sequence. */
3888 if (opts->x_flag_finite_math_only)
3889 opts->x_target_flags &= ~MASK_IEEE_FP;
3890
3891 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3892 since the insns won't need emulation. */
3893 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3894 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3895
3896 /* Likewise, if the target doesn't have a 387, or we've specified
3897 software floating point, don't use 387 inline intrinsics. */
3898 if (!TARGET_80387_P (opts->x_target_flags))
3899 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3900
3901 /* Turn on MMX builtins for -msse. */
3902 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3903 opts->x_ix86_isa_flags
3904 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3905
3906 /* Enable SSE prefetch. */
3907 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3908 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3909 x86_prefetch_sse = true;
3910
3911 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3912 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3913 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3914 opts->x_ix86_isa_flags
3915 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3916
3917 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3918 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3919 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3920 opts->x_ix86_isa_flags
3921 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3922
3923 /* Enable lzcnt instruction for -mabm. */
3924 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3925 opts->x_ix86_isa_flags
3926 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3927
3928 /* Validate -mpreferred-stack-boundary= value or default it to
3929 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3930 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3931 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3932 {
3933 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3934 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3935 int max = (TARGET_SEH ? 4 : 12);
3936
3937 if (opts->x_ix86_preferred_stack_boundary_arg < min
3938 || opts->x_ix86_preferred_stack_boundary_arg > max)
3939 {
3940 if (min == max)
3941 error ("-mpreferred-stack-boundary is not supported "
3942 "for this target");
3943 else
3944 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3945 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3946 }
3947 else
3948 ix86_preferred_stack_boundary
3949 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3950 }
3951
3952 /* Set the default value for -mstackrealign. */
3953 if (opts->x_ix86_force_align_arg_pointer == -1)
3954 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3955
3956 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3957
3958 /* Validate -mincoming-stack-boundary= value or default it to
3959 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3960 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3961 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3962 {
3963 if (opts->x_ix86_incoming_stack_boundary_arg
3964 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3965 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3966 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3967 opts->x_ix86_incoming_stack_boundary_arg,
3968 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3969 else
3970 {
3971 ix86_user_incoming_stack_boundary
3972 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3973 ix86_incoming_stack_boundary
3974 = ix86_user_incoming_stack_boundary;
3975 }
3976 }
3977
3978 #ifndef NO_PROFILE_COUNTERS
3979 if (flag_nop_mcount)
3980 error ("-mnop-mcount is not compatible with this target");
3981 #endif
3982 if (flag_nop_mcount && flag_pic)
3983 error ("-mnop-mcount is not implemented for -fPIC");
3984
3985 /* Accept -msseregparm only if at least SSE support is enabled. */
3986 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3987 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3988 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3989
3990 if (opts_set->x_ix86_fpmath)
3991 {
3992 if (opts->x_ix86_fpmath & FPMATH_SSE)
3993 {
3994 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3995 {
3996 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3997 opts->x_ix86_fpmath = FPMATH_387;
3998 }
3999 else if ((opts->x_ix86_fpmath & FPMATH_387)
4000 && !TARGET_80387_P (opts->x_target_flags))
4001 {
4002 warning (0, "387 instruction set disabled, using SSE arithmetics");
4003 opts->x_ix86_fpmath = FPMATH_SSE;
4004 }
4005 }
4006 }
4007 /* For all chips supporting SSE2, -mfpmath=sse performs better than
4008 fpmath=387. The second is however default at many targets since the
4009 extra 80bit precision of temporaries is considered to be part of ABI.
4010 Overwrite the default at least for -ffast-math.
4011 TODO: -mfpmath=both seems to produce same performing code with bit
4012 smaller binaries. It is however not clear if register allocation is
4013 ready for this setting.
4014 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4015 codegen. We may switch to 387 with -ffast-math for size optimized
4016 functions. */
4017 else if (fast_math_flags_set_p (&global_options)
4018 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4019 opts->x_ix86_fpmath = FPMATH_SSE;
4020 else
4021 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4022
4023 /* If the i387 is disabled, then do not return values in it. */
4024 if (!TARGET_80387_P (opts->x_target_flags))
4025 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4026
4027 /* Use external vectorized library in vectorizing intrinsics. */
4028 if (opts_set->x_ix86_veclibabi_type)
4029 switch (opts->x_ix86_veclibabi_type)
4030 {
4031 case ix86_veclibabi_type_svml:
4032 ix86_veclib_handler = ix86_veclibabi_svml;
4033 break;
4034
4035 case ix86_veclibabi_type_acml:
4036 ix86_veclib_handler = ix86_veclibabi_acml;
4037 break;
4038
4039 default:
4040 gcc_unreachable ();
4041 }
4042
4043 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4044 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4045 && !opts->x_optimize_size)
4046 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4047
4048 /* If stack probes are required, the space used for large function
4049 arguments on the stack must also be probed, so enable
4050 -maccumulate-outgoing-args so this happens in the prologue. */
4051 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4052 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4053 {
4054 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4055 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4056 "for correctness", prefix, suffix);
4057 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4058 }
4059
4060 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4061 {
4062 char *p;
4063 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4064 p = strchr (internal_label_prefix, 'X');
4065 internal_label_prefix_len = p - internal_label_prefix;
4066 *p = '\0';
4067 }
4068
4069 /* When scheduling description is not available, disable scheduler pass
4070 so it won't slow down the compilation and make x87 code slower. */
4071 if (!TARGET_SCHEDULE)
4072 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4073
4074 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4075 ix86_tune_cost->simultaneous_prefetches,
4076 opts->x_param_values,
4077 opts_set->x_param_values);
4078 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4079 ix86_tune_cost->prefetch_block,
4080 opts->x_param_values,
4081 opts_set->x_param_values);
4082 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4083 ix86_tune_cost->l1_cache_size,
4084 opts->x_param_values,
4085 opts_set->x_param_values);
4086 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4087 ix86_tune_cost->l2_cache_size,
4088 opts->x_param_values,
4089 opts_set->x_param_values);
4090
4091 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4092 if (opts->x_flag_prefetch_loop_arrays < 0
4093 && HAVE_prefetch
4094 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4095 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4096 opts->x_flag_prefetch_loop_arrays = 1;
4097
4098 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4099 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4100 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4101 targetm.expand_builtin_va_start = NULL;
4102
4103 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4104 {
4105 ix86_gen_leave = gen_leave_rex64;
4106 if (Pmode == DImode)
4107 {
4108 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4109 ix86_gen_tls_local_dynamic_base_64
4110 = gen_tls_local_dynamic_base_64_di;
4111 }
4112 else
4113 {
4114 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4115 ix86_gen_tls_local_dynamic_base_64
4116 = gen_tls_local_dynamic_base_64_si;
4117 }
4118 }
4119 else
4120 ix86_gen_leave = gen_leave;
4121
4122 if (Pmode == DImode)
4123 {
4124 ix86_gen_add3 = gen_adddi3;
4125 ix86_gen_sub3 = gen_subdi3;
4126 ix86_gen_sub3_carry = gen_subdi3_carry;
4127 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4128 ix86_gen_andsp = gen_anddi3;
4129 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4130 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4131 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4132 ix86_gen_monitor = gen_sse3_monitor_di;
4133 }
4134 else
4135 {
4136 ix86_gen_add3 = gen_addsi3;
4137 ix86_gen_sub3 = gen_subsi3;
4138 ix86_gen_sub3_carry = gen_subsi3_carry;
4139 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4140 ix86_gen_andsp = gen_andsi3;
4141 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4142 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4143 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4144 ix86_gen_monitor = gen_sse3_monitor_si;
4145 }
4146
4147 #ifdef USE_IX86_CLD
4148 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4149 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4150 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4151 #endif
4152
4153 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4154 {
4155 if (opts->x_flag_fentry > 0)
4156 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4157 "with -fpic");
4158 opts->x_flag_fentry = 0;
4159 }
4160 else if (TARGET_SEH)
4161 {
4162 if (opts->x_flag_fentry == 0)
4163 sorry ("-mno-fentry isn%'t compatible with SEH");
4164 opts->x_flag_fentry = 1;
4165 }
4166 else if (opts->x_flag_fentry < 0)
4167 {
4168 #if defined(PROFILE_BEFORE_PROLOGUE)
4169 opts->x_flag_fentry = 1;
4170 #else
4171 opts->x_flag_fentry = 0;
4172 #endif
4173 }
4174
4175 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4176 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4177 AVX unaligned load/store. */
4178 if (!opts->x_optimize_size)
4179 {
4180 if (flag_expensive_optimizations
4181 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4182 opts->x_target_flags |= MASK_VZEROUPPER;
4183 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4184 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4185 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4186 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4187 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4188 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4189 /* Enable 128-bit AVX instruction generation
4190 for the auto-vectorizer. */
4191 if (TARGET_AVX128_OPTIMAL
4192 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4193 opts->x_target_flags |= MASK_PREFER_AVX128;
4194 }
4195
4196 if (opts->x_ix86_recip_name)
4197 {
4198 char *p = ASTRDUP (opts->x_ix86_recip_name);
4199 char *q;
4200 unsigned int mask, i;
4201 bool invert;
4202
4203 while ((q = strtok (p, ",")) != NULL)
4204 {
4205 p = NULL;
4206 if (*q == '!')
4207 {
4208 invert = true;
4209 q++;
4210 }
4211 else
4212 invert = false;
4213
4214 if (!strcmp (q, "default"))
4215 mask = RECIP_MASK_ALL;
4216 else
4217 {
4218 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4219 if (!strcmp (q, recip_options[i].string))
4220 {
4221 mask = recip_options[i].mask;
4222 break;
4223 }
4224
4225 if (i == ARRAY_SIZE (recip_options))
4226 {
4227 error ("unknown option for -mrecip=%s", q);
4228 invert = false;
4229 mask = RECIP_MASK_NONE;
4230 }
4231 }
4232
4233 opts->x_recip_mask_explicit |= mask;
4234 if (invert)
4235 opts->x_recip_mask &= ~mask;
4236 else
4237 opts->x_recip_mask |= mask;
4238 }
4239 }
4240
4241 if (TARGET_RECIP_P (opts->x_target_flags))
4242 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4243 else if (opts_set->x_target_flags & MASK_RECIP)
4244 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4245
4246 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4247 for 64-bit Bionic. */
4248 if (TARGET_HAS_BIONIC
4249 && !(opts_set->x_target_flags
4250 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4251 opts->x_target_flags |= (TARGET_64BIT
4252 ? MASK_LONG_DOUBLE_128
4253 : MASK_LONG_DOUBLE_64);
4254
4255 /* Only one of them can be active. */
4256 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4257 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4258
4259 /* Save the initial options in case the user does function specific
4260 options. */
4261 if (main_args_p)
4262 target_option_default_node = target_option_current_node
4263 = build_target_option_node (opts);
4264
4265 /* Handle stack protector */
4266 if (!opts_set->x_ix86_stack_protector_guard)
4267 opts->x_ix86_stack_protector_guard
4268 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4269
4270 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4271 if (opts->x_ix86_tune_memcpy_strategy)
4272 {
4273 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4274 ix86_parse_stringop_strategy_string (str, false);
4275 free (str);
4276 }
4277
4278 if (opts->x_ix86_tune_memset_strategy)
4279 {
4280 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4281 ix86_parse_stringop_strategy_string (str, true);
4282 free (str);
4283 }
4284 }
4285
4286 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4287
4288 static void
4289 ix86_option_override (void)
4290 {
4291 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4292 static struct register_pass_info insert_vzeroupper_info
4293 = { pass_insert_vzeroupper, "reload",
4294 1, PASS_POS_INSERT_AFTER
4295 };
4296
4297 ix86_option_override_internal (true, &global_options, &global_options_set);
4298
4299
4300 /* This needs to be done at start up. It's convenient to do it here. */
4301 register_pass (&insert_vzeroupper_info);
4302 }
4303
4304 /* Update register usage after having seen the compiler flags. */
4305
4306 static void
4307 ix86_conditional_register_usage (void)
4308 {
4309 int i, c_mask;
4310 unsigned int j;
4311
4312 /* The PIC register, if it exists, is fixed. */
4313 j = PIC_OFFSET_TABLE_REGNUM;
4314 if (j != INVALID_REGNUM)
4315 fixed_regs[j] = call_used_regs[j] = 1;
4316
4317 /* For 32-bit targets, squash the REX registers. */
4318 if (! TARGET_64BIT)
4319 {
4320 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4321 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4322 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4323 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4324 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4325 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4326 }
4327
4328 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4329 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4330 : TARGET_64BIT ? (1 << 2)
4331 : (1 << 1));
4332
4333 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4334
4335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4336 {
4337 /* Set/reset conditionally defined registers from
4338 CALL_USED_REGISTERS initializer. */
4339 if (call_used_regs[i] > 1)
4340 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4341
4342 /* Calculate registers of CLOBBERED_REGS register set
4343 as call used registers from GENERAL_REGS register set. */
4344 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4345 && call_used_regs[i])
4346 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4347 }
4348
4349 /* If MMX is disabled, squash the registers. */
4350 if (! TARGET_MMX)
4351 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4352 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4353 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4354
4355 /* If SSE is disabled, squash the registers. */
4356 if (! TARGET_SSE)
4357 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4358 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4359 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4360
4361 /* If the FPU is disabled, squash the registers. */
4362 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4363 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4364 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4365 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4366
4367 /* If AVX512F is disabled, squash the registers. */
4368 if (! TARGET_AVX512F)
4369 {
4370 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4371 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4372
4373 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4374 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4375 }
4376 }
4377
4378 \f
4379 /* Save the current options */
4380
4381 static void
4382 ix86_function_specific_save (struct cl_target_option *ptr,
4383 struct gcc_options *opts)
4384 {
4385 ptr->arch = ix86_arch;
4386 ptr->schedule = ix86_schedule;
4387 ptr->tune = ix86_tune;
4388 ptr->branch_cost = ix86_branch_cost;
4389 ptr->tune_defaulted = ix86_tune_defaulted;
4390 ptr->arch_specified = ix86_arch_specified;
4391 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4392 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4393 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4394 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4395 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4396 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4397 ptr->x_ix86_abi = opts->x_ix86_abi;
4398 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4399 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4400 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4401 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4402 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4403 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4404 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4405 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4406 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4407 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4408 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4409 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4410 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4411 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4412 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4413 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4414 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4415 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4416 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4417 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4418
4419 /* The fields are char but the variables are not; make sure the
4420 values fit in the fields. */
4421 gcc_assert (ptr->arch == ix86_arch);
4422 gcc_assert (ptr->schedule == ix86_schedule);
4423 gcc_assert (ptr->tune == ix86_tune);
4424 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4425 }
4426
4427 /* Restore the current options */
4428
4429 static void
4430 ix86_function_specific_restore (struct gcc_options *opts,
4431 struct cl_target_option *ptr)
4432 {
4433 enum processor_type old_tune = ix86_tune;
4434 enum processor_type old_arch = ix86_arch;
4435 unsigned int ix86_arch_mask;
4436 int i;
4437
4438 /* We don't change -fPIC. */
4439 opts->x_flag_pic = flag_pic;
4440
4441 ix86_arch = (enum processor_type) ptr->arch;
4442 ix86_schedule = (enum attr_cpu) ptr->schedule;
4443 ix86_tune = (enum processor_type) ptr->tune;
4444 opts->x_ix86_branch_cost = ptr->branch_cost;
4445 ix86_tune_defaulted = ptr->tune_defaulted;
4446 ix86_arch_specified = ptr->arch_specified;
4447 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4448 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4449 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4450 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4451 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4452 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4453 opts->x_ix86_abi = ptr->x_ix86_abi;
4454 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4455 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4456 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4457 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4458 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4459 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4460 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4461 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4462 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4463 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4464 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4465 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4466 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4467 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4468 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4469 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4470 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4471 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4472 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4473 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4474
4475 /* Recreate the arch feature tests if the arch changed */
4476 if (old_arch != ix86_arch)
4477 {
4478 ix86_arch_mask = 1u << ix86_arch;
4479 for (i = 0; i < X86_ARCH_LAST; ++i)
4480 ix86_arch_features[i]
4481 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4482 }
4483
4484 /* Recreate the tune optimization tests */
4485 if (old_tune != ix86_tune)
4486 set_ix86_tune_features (ix86_tune, false);
4487 }
4488
4489 /* Print the current options */
4490
4491 static void
4492 ix86_function_specific_print (FILE *file, int indent,
4493 struct cl_target_option *ptr)
4494 {
4495 char *target_string
4496 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4497 NULL, NULL, ptr->x_ix86_fpmath, false);
4498
4499 gcc_assert (ptr->arch < PROCESSOR_max);
4500 fprintf (file, "%*sarch = %d (%s)\n",
4501 indent, "",
4502 ptr->arch, processor_target_table[ptr->arch].name);
4503
4504 gcc_assert (ptr->tune < PROCESSOR_max);
4505 fprintf (file, "%*stune = %d (%s)\n",
4506 indent, "",
4507 ptr->tune, processor_target_table[ptr->tune].name);
4508
4509 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4510
4511 if (target_string)
4512 {
4513 fprintf (file, "%*s%s\n", indent, "", target_string);
4514 free (target_string);
4515 }
4516 }
4517
4518 \f
4519 /* Inner function to process the attribute((target(...))), take an argument and
4520 set the current options from the argument. If we have a list, recursively go
4521 over the list. */
4522
4523 static bool
4524 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4525 struct gcc_options *opts,
4526 struct gcc_options *opts_set,
4527 struct gcc_options *enum_opts_set)
4528 {
4529 char *next_optstr;
4530 bool ret = true;
4531
4532 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4533 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4534 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4535 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4536 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4537
4538 enum ix86_opt_type
4539 {
4540 ix86_opt_unknown,
4541 ix86_opt_yes,
4542 ix86_opt_no,
4543 ix86_opt_str,
4544 ix86_opt_enum,
4545 ix86_opt_isa
4546 };
4547
4548 static const struct
4549 {
4550 const char *string;
4551 size_t len;
4552 enum ix86_opt_type type;
4553 int opt;
4554 int mask;
4555 } attrs[] = {
4556 /* isa options */
4557 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4558 IX86_ATTR_ISA ("abm", OPT_mabm),
4559 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4560 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4561 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4562 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4563 IX86_ATTR_ISA ("aes", OPT_maes),
4564 IX86_ATTR_ISA ("sha", OPT_msha),
4565 IX86_ATTR_ISA ("avx", OPT_mavx),
4566 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4567 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4568 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4569 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4570 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4571 IX86_ATTR_ISA ("avx512dq", OPT_mavx512dq),
4572 IX86_ATTR_ISA ("avx512bw", OPT_mavx512bw),
4573 IX86_ATTR_ISA ("avx512vl", OPT_mavx512vl),
4574 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4575 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4576 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4577 IX86_ATTR_ISA ("sse", OPT_msse),
4578 IX86_ATTR_ISA ("sse2", OPT_msse2),
4579 IX86_ATTR_ISA ("sse3", OPT_msse3),
4580 IX86_ATTR_ISA ("sse4", OPT_msse4),
4581 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4582 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4583 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4584 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4585 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4586 IX86_ATTR_ISA ("fma", OPT_mfma),
4587 IX86_ATTR_ISA ("xop", OPT_mxop),
4588 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4589 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4590 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4591 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4592 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4593 IX86_ATTR_ISA ("hle", OPT_mhle),
4594 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4595 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4596 IX86_ATTR_ISA ("adx", OPT_madx),
4597 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4598 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4599 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4600 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4601 IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt),
4602 IX86_ATTR_ISA ("xsavec", OPT_mxsavec),
4603 IX86_ATTR_ISA ("xsaves", OPT_mxsaves),
4604
4605 /* enum options */
4606 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4607
4608 /* string options */
4609 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4610 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4611
4612 /* flag options */
4613 IX86_ATTR_YES ("cld",
4614 OPT_mcld,
4615 MASK_CLD),
4616
4617 IX86_ATTR_NO ("fancy-math-387",
4618 OPT_mfancy_math_387,
4619 MASK_NO_FANCY_MATH_387),
4620
4621 IX86_ATTR_YES ("ieee-fp",
4622 OPT_mieee_fp,
4623 MASK_IEEE_FP),
4624
4625 IX86_ATTR_YES ("inline-all-stringops",
4626 OPT_minline_all_stringops,
4627 MASK_INLINE_ALL_STRINGOPS),
4628
4629 IX86_ATTR_YES ("inline-stringops-dynamically",
4630 OPT_minline_stringops_dynamically,
4631 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4632
4633 IX86_ATTR_NO ("align-stringops",
4634 OPT_mno_align_stringops,
4635 MASK_NO_ALIGN_STRINGOPS),
4636
4637 IX86_ATTR_YES ("recip",
4638 OPT_mrecip,
4639 MASK_RECIP),
4640
4641 };
4642
4643 /* If this is a list, recurse to get the options. */
4644 if (TREE_CODE (args) == TREE_LIST)
4645 {
4646 bool ret = true;
4647
4648 for (; args; args = TREE_CHAIN (args))
4649 if (TREE_VALUE (args)
4650 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4651 p_strings, opts, opts_set,
4652 enum_opts_set))
4653 ret = false;
4654
4655 return ret;
4656 }
4657
4658 else if (TREE_CODE (args) != STRING_CST)
4659 {
4660 error ("attribute %<target%> argument not a string");
4661 return false;
4662 }
4663
4664 /* Handle multiple arguments separated by commas. */
4665 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4666
4667 while (next_optstr && *next_optstr != '\0')
4668 {
4669 char *p = next_optstr;
4670 char *orig_p = p;
4671 char *comma = strchr (next_optstr, ',');
4672 const char *opt_string;
4673 size_t len, opt_len;
4674 int opt;
4675 bool opt_set_p;
4676 char ch;
4677 unsigned i;
4678 enum ix86_opt_type type = ix86_opt_unknown;
4679 int mask = 0;
4680
4681 if (comma)
4682 {
4683 *comma = '\0';
4684 len = comma - next_optstr;
4685 next_optstr = comma + 1;
4686 }
4687 else
4688 {
4689 len = strlen (p);
4690 next_optstr = NULL;
4691 }
4692
4693 /* Recognize no-xxx. */
4694 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4695 {
4696 opt_set_p = false;
4697 p += 3;
4698 len -= 3;
4699 }
4700 else
4701 opt_set_p = true;
4702
4703 /* Find the option. */
4704 ch = *p;
4705 opt = N_OPTS;
4706 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4707 {
4708 type = attrs[i].type;
4709 opt_len = attrs[i].len;
4710 if (ch == attrs[i].string[0]
4711 && ((type != ix86_opt_str && type != ix86_opt_enum)
4712 ? len == opt_len
4713 : len > opt_len)
4714 && memcmp (p, attrs[i].string, opt_len) == 0)
4715 {
4716 opt = attrs[i].opt;
4717 mask = attrs[i].mask;
4718 opt_string = attrs[i].string;
4719 break;
4720 }
4721 }
4722
4723 /* Process the option. */
4724 if (opt == N_OPTS)
4725 {
4726 error ("attribute(target(\"%s\")) is unknown", orig_p);
4727 ret = false;
4728 }
4729
4730 else if (type == ix86_opt_isa)
4731 {
4732 struct cl_decoded_option decoded;
4733
4734 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4735 ix86_handle_option (opts, opts_set,
4736 &decoded, input_location);
4737 }
4738
4739 else if (type == ix86_opt_yes || type == ix86_opt_no)
4740 {
4741 if (type == ix86_opt_no)
4742 opt_set_p = !opt_set_p;
4743
4744 if (opt_set_p)
4745 opts->x_target_flags |= mask;
4746 else
4747 opts->x_target_flags &= ~mask;
4748 }
4749
4750 else if (type == ix86_opt_str)
4751 {
4752 if (p_strings[opt])
4753 {
4754 error ("option(\"%s\") was already specified", opt_string);
4755 ret = false;
4756 }
4757 else
4758 p_strings[opt] = xstrdup (p + opt_len);
4759 }
4760
4761 else if (type == ix86_opt_enum)
4762 {
4763 bool arg_ok;
4764 int value;
4765
4766 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4767 if (arg_ok)
4768 set_option (opts, enum_opts_set, opt, value,
4769 p + opt_len, DK_UNSPECIFIED, input_location,
4770 global_dc);
4771 else
4772 {
4773 error ("attribute(target(\"%s\")) is unknown", orig_p);
4774 ret = false;
4775 }
4776 }
4777
4778 else
4779 gcc_unreachable ();
4780 }
4781
4782 return ret;
4783 }
4784
4785 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4786
4787 tree
4788 ix86_valid_target_attribute_tree (tree args,
4789 struct gcc_options *opts,
4790 struct gcc_options *opts_set)
4791 {
4792 const char *orig_arch_string = opts->x_ix86_arch_string;
4793 const char *orig_tune_string = opts->x_ix86_tune_string;
4794 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4795 int orig_tune_defaulted = ix86_tune_defaulted;
4796 int orig_arch_specified = ix86_arch_specified;
4797 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4798 tree t = NULL_TREE;
4799 int i;
4800 struct cl_target_option *def
4801 = TREE_TARGET_OPTION (target_option_default_node);
4802 struct gcc_options enum_opts_set;
4803
4804 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4805
4806 /* Process each of the options on the chain. */
4807 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4808 opts_set, &enum_opts_set))
4809 return error_mark_node;
4810
4811 /* If the changed options are different from the default, rerun
4812 ix86_option_override_internal, and then save the options away.
4813 The string options are are attribute options, and will be undone
4814 when we copy the save structure. */
4815 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4816 || opts->x_target_flags != def->x_target_flags
4817 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4818 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4819 || enum_opts_set.x_ix86_fpmath)
4820 {
4821 /* If we are using the default tune= or arch=, undo the string assigned,
4822 and use the default. */
4823 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4824 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4825 else if (!orig_arch_specified)
4826 opts->x_ix86_arch_string = NULL;
4827
4828 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4829 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4830 else if (orig_tune_defaulted)
4831 opts->x_ix86_tune_string = NULL;
4832
4833 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4834 if (enum_opts_set.x_ix86_fpmath)
4835 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4836 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4837 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4838 {
4839 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4840 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4841 }
4842
4843 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4844 ix86_option_override_internal (false, opts, opts_set);
4845
4846 /* Add any builtin functions with the new isa if any. */
4847 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4848
4849 /* Save the current options unless we are validating options for
4850 #pragma. */
4851 t = build_target_option_node (opts);
4852
4853 opts->x_ix86_arch_string = orig_arch_string;
4854 opts->x_ix86_tune_string = orig_tune_string;
4855 opts_set->x_ix86_fpmath = orig_fpmath_set;
4856
4857 /* Free up memory allocated to hold the strings */
4858 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4859 free (option_strings[i]);
4860 }
4861
4862 return t;
4863 }
4864
4865 /* Hook to validate attribute((target("string"))). */
4866
4867 static bool
4868 ix86_valid_target_attribute_p (tree fndecl,
4869 tree ARG_UNUSED (name),
4870 tree args,
4871 int ARG_UNUSED (flags))
4872 {
4873 struct gcc_options func_options;
4874 tree new_target, new_optimize;
4875 bool ret = true;
4876
4877 /* attribute((target("default"))) does nothing, beyond
4878 affecting multi-versioning. */
4879 if (TREE_VALUE (args)
4880 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4881 && TREE_CHAIN (args) == NULL_TREE
4882 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4883 return true;
4884
4885 tree old_optimize = build_optimization_node (&global_options);
4886
4887 /* Get the optimization options of the current function. */
4888 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4889
4890 if (!func_optimize)
4891 func_optimize = old_optimize;
4892
4893 /* Init func_options. */
4894 memset (&func_options, 0, sizeof (func_options));
4895 init_options_struct (&func_options, NULL);
4896 lang_hooks.init_options_struct (&func_options);
4897
4898 cl_optimization_restore (&func_options,
4899 TREE_OPTIMIZATION (func_optimize));
4900
4901 /* Initialize func_options to the default before its target options can
4902 be set. */
4903 cl_target_option_restore (&func_options,
4904 TREE_TARGET_OPTION (target_option_default_node));
4905
4906 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4907 &global_options_set);
4908
4909 new_optimize = build_optimization_node (&func_options);
4910
4911 if (new_target == error_mark_node)
4912 ret = false;
4913
4914 else if (fndecl && new_target)
4915 {
4916 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4917
4918 if (old_optimize != new_optimize)
4919 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4920 }
4921
4922 return ret;
4923 }
4924
4925 \f
4926 /* Hook to determine if one function can safely inline another. */
4927
4928 static bool
4929 ix86_can_inline_p (tree caller, tree callee)
4930 {
4931 bool ret = false;
4932 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4933 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4934
4935 /* If callee has no option attributes, then it is ok to inline. */
4936 if (!callee_tree)
4937 ret = true;
4938
4939 /* If caller has no option attributes, but callee does then it is not ok to
4940 inline. */
4941 else if (!caller_tree)
4942 ret = false;
4943
4944 else
4945 {
4946 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4947 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4948
4949 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4950 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4951 function. */
4952 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4953 != callee_opts->x_ix86_isa_flags)
4954 ret = false;
4955
4956 /* See if we have the same non-isa options. */
4957 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4958 ret = false;
4959
4960 /* See if arch, tune, etc. are the same. */
4961 else if (caller_opts->arch != callee_opts->arch)
4962 ret = false;
4963
4964 else if (caller_opts->tune != callee_opts->tune)
4965 ret = false;
4966
4967 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4968 ret = false;
4969
4970 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4971 ret = false;
4972
4973 else
4974 ret = true;
4975 }
4976
4977 return ret;
4978 }
4979
4980 \f
4981 /* Remember the last target of ix86_set_current_function. */
4982 static GTY(()) tree ix86_previous_fndecl;
4983
4984 /* Invalidate ix86_previous_fndecl cache. */
4985 void
4986 ix86_reset_previous_fndecl (void)
4987 {
4988 ix86_previous_fndecl = NULL_TREE;
4989 }
4990
4991 /* Establish appropriate back-end context for processing the function
4992 FNDECL. The argument might be NULL to indicate processing at top
4993 level, outside of any function scope. */
4994 static void
4995 ix86_set_current_function (tree fndecl)
4996 {
4997 /* Only change the context if the function changes. This hook is called
4998 several times in the course of compiling a function, and we don't want to
4999 slow things down too much or call target_reinit when it isn't safe. */
5000 if (fndecl && fndecl != ix86_previous_fndecl)
5001 {
5002 tree old_tree = (ix86_previous_fndecl
5003 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
5004 : NULL_TREE);
5005
5006 tree new_tree = (fndecl
5007 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
5008 : NULL_TREE);
5009
5010 ix86_previous_fndecl = fndecl;
5011 if (old_tree == new_tree)
5012 ;
5013
5014 else if (new_tree)
5015 {
5016 cl_target_option_restore (&global_options,
5017 TREE_TARGET_OPTION (new_tree));
5018 if (TREE_TARGET_GLOBALS (new_tree))
5019 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5020 else
5021 TREE_TARGET_GLOBALS (new_tree)
5022 = save_target_globals_default_opts ();
5023 }
5024
5025 else if (old_tree)
5026 {
5027 new_tree = target_option_current_node;
5028 cl_target_option_restore (&global_options,
5029 TREE_TARGET_OPTION (new_tree));
5030 if (TREE_TARGET_GLOBALS (new_tree))
5031 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5032 else if (new_tree == target_option_default_node)
5033 restore_target_globals (&default_target_globals);
5034 else
5035 TREE_TARGET_GLOBALS (new_tree)
5036 = save_target_globals_default_opts ();
5037 }
5038 }
5039 }
5040
5041 \f
5042 /* Return true if this goes in large data/bss. */
5043
5044 static bool
5045 ix86_in_large_data_p (tree exp)
5046 {
5047 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5048 return false;
5049
5050 /* Functions are never large data. */
5051 if (TREE_CODE (exp) == FUNCTION_DECL)
5052 return false;
5053
5054 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5055 {
5056 const char *section = DECL_SECTION_NAME (exp);
5057 if (strcmp (section, ".ldata") == 0
5058 || strcmp (section, ".lbss") == 0)
5059 return true;
5060 return false;
5061 }
5062 else
5063 {
5064 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5065
5066 /* If this is an incomplete type with size 0, then we can't put it
5067 in data because it might be too big when completed. Also,
5068 int_size_in_bytes returns -1 if size can vary or is larger than
5069 an integer in which case also it is safer to assume that it goes in
5070 large data. */
5071 if (size <= 0 || size > ix86_section_threshold)
5072 return true;
5073 }
5074
5075 return false;
5076 }
5077
5078 /* Switch to the appropriate section for output of DECL.
5079 DECL is either a `VAR_DECL' node or a constant of some sort.
5080 RELOC indicates whether forming the initial value of DECL requires
5081 link-time relocations. */
5082
5083 ATTRIBUTE_UNUSED static section *
5084 x86_64_elf_select_section (tree decl, int reloc,
5085 unsigned HOST_WIDE_INT align)
5086 {
5087 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5088 && ix86_in_large_data_p (decl))
5089 {
5090 const char *sname = NULL;
5091 unsigned int flags = SECTION_WRITE;
5092 switch (categorize_decl_for_section (decl, reloc))
5093 {
5094 case SECCAT_DATA:
5095 sname = ".ldata";
5096 break;
5097 case SECCAT_DATA_REL:
5098 sname = ".ldata.rel";
5099 break;
5100 case SECCAT_DATA_REL_LOCAL:
5101 sname = ".ldata.rel.local";
5102 break;
5103 case SECCAT_DATA_REL_RO:
5104 sname = ".ldata.rel.ro";
5105 break;
5106 case SECCAT_DATA_REL_RO_LOCAL:
5107 sname = ".ldata.rel.ro.local";
5108 break;
5109 case SECCAT_BSS:
5110 sname = ".lbss";
5111 flags |= SECTION_BSS;
5112 break;
5113 case SECCAT_RODATA:
5114 case SECCAT_RODATA_MERGE_STR:
5115 case SECCAT_RODATA_MERGE_STR_INIT:
5116 case SECCAT_RODATA_MERGE_CONST:
5117 sname = ".lrodata";
5118 flags = 0;
5119 break;
5120 case SECCAT_SRODATA:
5121 case SECCAT_SDATA:
5122 case SECCAT_SBSS:
5123 gcc_unreachable ();
5124 case SECCAT_TEXT:
5125 case SECCAT_TDATA:
5126 case SECCAT_TBSS:
5127 /* We don't split these for medium model. Place them into
5128 default sections and hope for best. */
5129 break;
5130 }
5131 if (sname)
5132 {
5133 /* We might get called with string constants, but get_named_section
5134 doesn't like them as they are not DECLs. Also, we need to set
5135 flags in that case. */
5136 if (!DECL_P (decl))
5137 return get_section (sname, flags, NULL);
5138 return get_named_section (decl, sname, reloc);
5139 }
5140 }
5141 return default_elf_select_section (decl, reloc, align);
5142 }
5143
5144 /* Select a set of attributes for section NAME based on the properties
5145 of DECL and whether or not RELOC indicates that DECL's initializer
5146 might contain runtime relocations. */
5147
5148 static unsigned int ATTRIBUTE_UNUSED
5149 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5150 {
5151 unsigned int flags = default_section_type_flags (decl, name, reloc);
5152
5153 if (decl == NULL_TREE
5154 && (strcmp (name, ".ldata.rel.ro") == 0
5155 || strcmp (name, ".ldata.rel.ro.local") == 0))
5156 flags |= SECTION_RELRO;
5157
5158 if (strcmp (name, ".lbss") == 0
5159 || strncmp (name, ".lbss.", 5) == 0
5160 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5161 flags |= SECTION_BSS;
5162
5163 return flags;
5164 }
5165
5166 /* Build up a unique section name, expressed as a
5167 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5168 RELOC indicates whether the initial value of EXP requires
5169 link-time relocations. */
5170
5171 static void ATTRIBUTE_UNUSED
5172 x86_64_elf_unique_section (tree decl, int reloc)
5173 {
5174 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5175 && ix86_in_large_data_p (decl))
5176 {
5177 const char *prefix = NULL;
5178 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5179 bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5180
5181 switch (categorize_decl_for_section (decl, reloc))
5182 {
5183 case SECCAT_DATA:
5184 case SECCAT_DATA_REL:
5185 case SECCAT_DATA_REL_LOCAL:
5186 case SECCAT_DATA_REL_RO:
5187 case SECCAT_DATA_REL_RO_LOCAL:
5188 prefix = one_only ? ".ld" : ".ldata";
5189 break;
5190 case SECCAT_BSS:
5191 prefix = one_only ? ".lb" : ".lbss";
5192 break;
5193 case SECCAT_RODATA:
5194 case SECCAT_RODATA_MERGE_STR:
5195 case SECCAT_RODATA_MERGE_STR_INIT:
5196 case SECCAT_RODATA_MERGE_CONST:
5197 prefix = one_only ? ".lr" : ".lrodata";
5198 break;
5199 case SECCAT_SRODATA:
5200 case SECCAT_SDATA:
5201 case SECCAT_SBSS:
5202 gcc_unreachable ();
5203 case SECCAT_TEXT:
5204 case SECCAT_TDATA:
5205 case SECCAT_TBSS:
5206 /* We don't split these for medium model. Place them into
5207 default sections and hope for best. */
5208 break;
5209 }
5210 if (prefix)
5211 {
5212 const char *name, *linkonce;
5213 char *string;
5214
5215 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5216 name = targetm.strip_name_encoding (name);
5217
5218 /* If we're using one_only, then there needs to be a .gnu.linkonce
5219 prefix to the section name. */
5220 linkonce = one_only ? ".gnu.linkonce" : "";
5221
5222 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5223
5224 set_decl_section_name (decl, string);
5225 return;
5226 }
5227 }
5228 default_unique_section (decl, reloc);
5229 }
5230
5231 #ifdef COMMON_ASM_OP
5232 /* This says how to output assembler code to declare an
5233 uninitialized external linkage data object.
5234
5235 For medium model x86-64 we need to use .largecomm opcode for
5236 large objects. */
5237 void
5238 x86_elf_aligned_common (FILE *file,
5239 const char *name, unsigned HOST_WIDE_INT size,
5240 int align)
5241 {
5242 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5243 && size > (unsigned int)ix86_section_threshold)
5244 fputs (".largecomm\t", file);
5245 else
5246 fputs (COMMON_ASM_OP, file);
5247 assemble_name (file, name);
5248 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5249 size, align / BITS_PER_UNIT);
5250 }
5251 #endif
5252
5253 /* Utility function for targets to use in implementing
5254 ASM_OUTPUT_ALIGNED_BSS. */
5255
5256 void
5257 x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5258 unsigned HOST_WIDE_INT size, int align)
5259 {
5260 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5261 && size > (unsigned int)ix86_section_threshold)
5262 switch_to_section (get_named_section (decl, ".lbss", 0));
5263 else
5264 switch_to_section (bss_section);
5265 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5266 #ifdef ASM_DECLARE_OBJECT_NAME
5267 last_assemble_variable_decl = decl;
5268 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5269 #else
5270 /* Standard thing is just output label for the object. */
5271 ASM_OUTPUT_LABEL (file, name);
5272 #endif /* ASM_DECLARE_OBJECT_NAME */
5273 ASM_OUTPUT_SKIP (file, size ? size : 1);
5274 }
5275 \f
5276 /* Decide whether we must probe the stack before any space allocation
5277 on this target. It's essentially TARGET_STACK_PROBE except when
5278 -fstack-check causes the stack to be already probed differently. */
5279
5280 bool
5281 ix86_target_stack_probe (void)
5282 {
5283 /* Do not probe the stack twice if static stack checking is enabled. */
5284 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5285 return false;
5286
5287 return TARGET_STACK_PROBE;
5288 }
5289 \f
5290 /* Decide whether we can make a sibling call to a function. DECL is the
5291 declaration of the function being targeted by the call and EXP is the
5292 CALL_EXPR representing the call. */
5293
5294 static bool
5295 ix86_function_ok_for_sibcall (tree decl, tree exp)
5296 {
5297 tree type, decl_or_type;
5298 rtx a, b;
5299
5300 /* If we are generating position-independent code, we cannot sibcall
5301 optimize any indirect call, or a direct call to a global function,
5302 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5303 if (!TARGET_MACHO
5304 && !TARGET_64BIT
5305 && flag_pic
5306 && (!decl || !targetm.binds_local_p (decl)))
5307 return false;
5308
5309 /* If we need to align the outgoing stack, then sibcalling would
5310 unalign the stack, which may break the called function. */
5311 if (ix86_minimum_incoming_stack_boundary (true)
5312 < PREFERRED_STACK_BOUNDARY)
5313 return false;
5314
5315 if (decl)
5316 {
5317 decl_or_type = decl;
5318 type = TREE_TYPE (decl);
5319 }
5320 else
5321 {
5322 /* We're looking at the CALL_EXPR, we need the type of the function. */
5323 type = CALL_EXPR_FN (exp); /* pointer expression */
5324 type = TREE_TYPE (type); /* pointer type */
5325 type = TREE_TYPE (type); /* function type */
5326 decl_or_type = type;
5327 }
5328
5329 /* Check that the return value locations are the same. Like
5330 if we are returning floats on the 80387 register stack, we cannot
5331 make a sibcall from a function that doesn't return a float to a
5332 function that does or, conversely, from a function that does return
5333 a float to a function that doesn't; the necessary stack adjustment
5334 would not be executed. This is also the place we notice
5335 differences in the return value ABI. Note that it is ok for one
5336 of the functions to have void return type as long as the return
5337 value of the other is passed in a register. */
5338 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5339 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5340 cfun->decl, false);
5341 if (STACK_REG_P (a) || STACK_REG_P (b))
5342 {
5343 if (!rtx_equal_p (a, b))
5344 return false;
5345 }
5346 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5347 ;
5348 else if (!rtx_equal_p (a, b))
5349 return false;
5350
5351 if (TARGET_64BIT)
5352 {
5353 /* The SYSV ABI has more call-clobbered registers;
5354 disallow sibcalls from MS to SYSV. */
5355 if (cfun->machine->call_abi == MS_ABI
5356 && ix86_function_type_abi (type) == SYSV_ABI)
5357 return false;
5358 }
5359 else
5360 {
5361 /* If this call is indirect, we'll need to be able to use a
5362 call-clobbered register for the address of the target function.
5363 Make sure that all such registers are not used for passing
5364 parameters. Note that DLLIMPORT functions are indirect. */
5365 if (!decl
5366 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5367 {
5368 if (ix86_function_regparm (type, NULL) >= 3)
5369 {
5370 /* ??? Need to count the actual number of registers to be used,
5371 not the possible number of registers. Fix later. */
5372 return false;
5373 }
5374 }
5375 }
5376
5377 /* Otherwise okay. That also includes certain types of indirect calls. */
5378 return true;
5379 }
5380
5381 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5382 and "sseregparm" calling convention attributes;
5383 arguments as in struct attribute_spec.handler. */
5384
5385 static tree
5386 ix86_handle_cconv_attribute (tree *node, tree name,
5387 tree args,
5388 int,
5389 bool *no_add_attrs)
5390 {
5391 if (TREE_CODE (*node) != FUNCTION_TYPE
5392 && TREE_CODE (*node) != METHOD_TYPE
5393 && TREE_CODE (*node) != FIELD_DECL
5394 && TREE_CODE (*node) != TYPE_DECL)
5395 {
5396 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5397 name);
5398 *no_add_attrs = true;
5399 return NULL_TREE;
5400 }
5401
5402 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5403 if (is_attribute_p ("regparm", name))
5404 {
5405 tree cst;
5406
5407 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5408 {
5409 error ("fastcall and regparm attributes are not compatible");
5410 }
5411
5412 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5413 {
5414 error ("regparam and thiscall attributes are not compatible");
5415 }
5416
5417 cst = TREE_VALUE (args);
5418 if (TREE_CODE (cst) != INTEGER_CST)
5419 {
5420 warning (OPT_Wattributes,
5421 "%qE attribute requires an integer constant argument",
5422 name);
5423 *no_add_attrs = true;
5424 }
5425 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5426 {
5427 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5428 name, REGPARM_MAX);
5429 *no_add_attrs = true;
5430 }
5431
5432 return NULL_TREE;
5433 }
5434
5435 if (TARGET_64BIT)
5436 {
5437 /* Do not warn when emulating the MS ABI. */
5438 if ((TREE_CODE (*node) != FUNCTION_TYPE
5439 && TREE_CODE (*node) != METHOD_TYPE)
5440 || ix86_function_type_abi (*node) != MS_ABI)
5441 warning (OPT_Wattributes, "%qE attribute ignored",
5442 name);
5443 *no_add_attrs = true;
5444 return NULL_TREE;
5445 }
5446
5447 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5448 if (is_attribute_p ("fastcall", name))
5449 {
5450 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5451 {
5452 error ("fastcall and cdecl attributes are not compatible");
5453 }
5454 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5455 {
5456 error ("fastcall and stdcall attributes are not compatible");
5457 }
5458 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5459 {
5460 error ("fastcall and regparm attributes are not compatible");
5461 }
5462 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5463 {
5464 error ("fastcall and thiscall attributes are not compatible");
5465 }
5466 }
5467
5468 /* Can combine stdcall with fastcall (redundant), regparm and
5469 sseregparm. */
5470 else if (is_attribute_p ("stdcall", name))
5471 {
5472 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5473 {
5474 error ("stdcall and cdecl attributes are not compatible");
5475 }
5476 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5477 {
5478 error ("stdcall and fastcall attributes are not compatible");
5479 }
5480 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5481 {
5482 error ("stdcall and thiscall attributes are not compatible");
5483 }
5484 }
5485
5486 /* Can combine cdecl with regparm and sseregparm. */
5487 else if (is_attribute_p ("cdecl", name))
5488 {
5489 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5490 {
5491 error ("stdcall and cdecl attributes are not compatible");
5492 }
5493 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5494 {
5495 error ("fastcall and cdecl attributes are not compatible");
5496 }
5497 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5498 {
5499 error ("cdecl and thiscall attributes are not compatible");
5500 }
5501 }
5502 else if (is_attribute_p ("thiscall", name))
5503 {
5504 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5505 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5506 name);
5507 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5508 {
5509 error ("stdcall and thiscall attributes are not compatible");
5510 }
5511 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5512 {
5513 error ("fastcall and thiscall attributes are not compatible");
5514 }
5515 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5516 {
5517 error ("cdecl and thiscall attributes are not compatible");
5518 }
5519 }
5520
5521 /* Can combine sseregparm with all attributes. */
5522
5523 return NULL_TREE;
5524 }
5525
5526 /* The transactional memory builtins are implicitly regparm or fastcall
5527 depending on the ABI. Override the generic do-nothing attribute that
5528 these builtins were declared with, and replace it with one of the two
5529 attributes that we expect elsewhere. */
5530
5531 static tree
5532 ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5533 int flags, bool *no_add_attrs)
5534 {
5535 tree alt;
5536
5537 /* In no case do we want to add the placeholder attribute. */
5538 *no_add_attrs = true;
5539
5540 /* The 64-bit ABI is unchanged for transactional memory. */
5541 if (TARGET_64BIT)
5542 return NULL_TREE;
5543
5544 /* ??? Is there a better way to validate 32-bit windows? We have
5545 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5546 if (CHECK_STACK_LIMIT > 0)
5547 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5548 else
5549 {
5550 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5551 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5552 }
5553 decl_attributes (node, alt, flags);
5554
5555 return NULL_TREE;
5556 }
5557
5558 /* This function determines from TYPE the calling-convention. */
5559
5560 unsigned int
5561 ix86_get_callcvt (const_tree type)
5562 {
5563 unsigned int ret = 0;
5564 bool is_stdarg;
5565 tree attrs;
5566
5567 if (TARGET_64BIT)
5568 return IX86_CALLCVT_CDECL;
5569
5570 attrs = TYPE_ATTRIBUTES (type);
5571 if (attrs != NULL_TREE)
5572 {
5573 if (lookup_attribute ("cdecl", attrs))
5574 ret |= IX86_CALLCVT_CDECL;
5575 else if (lookup_attribute ("stdcall", attrs))
5576 ret |= IX86_CALLCVT_STDCALL;
5577 else if (lookup_attribute ("fastcall", attrs))
5578 ret |= IX86_CALLCVT_FASTCALL;
5579 else if (lookup_attribute ("thiscall", attrs))
5580 ret |= IX86_CALLCVT_THISCALL;
5581
5582 /* Regparam isn't allowed for thiscall and fastcall. */
5583 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5584 {
5585 if (lookup_attribute ("regparm", attrs))
5586 ret |= IX86_CALLCVT_REGPARM;
5587 if (lookup_attribute ("sseregparm", attrs))
5588 ret |= IX86_CALLCVT_SSEREGPARM;
5589 }
5590
5591 if (IX86_BASE_CALLCVT(ret) != 0)
5592 return ret;
5593 }
5594
5595 is_stdarg = stdarg_p (type);
5596 if (TARGET_RTD && !is_stdarg)
5597 return IX86_CALLCVT_STDCALL | ret;
5598
5599 if (ret != 0
5600 || is_stdarg
5601 || TREE_CODE (type) != METHOD_TYPE
5602 || ix86_function_type_abi (type) != MS_ABI)
5603 return IX86_CALLCVT_CDECL | ret;
5604
5605 return IX86_CALLCVT_THISCALL;
5606 }
5607
5608 /* Return 0 if the attributes for two types are incompatible, 1 if they
5609 are compatible, and 2 if they are nearly compatible (which causes a
5610 warning to be generated). */
5611
5612 static int
5613 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5614 {
5615 unsigned int ccvt1, ccvt2;
5616
5617 if (TREE_CODE (type1) != FUNCTION_TYPE
5618 && TREE_CODE (type1) != METHOD_TYPE)
5619 return 1;
5620
5621 ccvt1 = ix86_get_callcvt (type1);
5622 ccvt2 = ix86_get_callcvt (type2);
5623 if (ccvt1 != ccvt2)
5624 return 0;
5625 if (ix86_function_regparm (type1, NULL)
5626 != ix86_function_regparm (type2, NULL))
5627 return 0;
5628
5629 return 1;
5630 }
5631 \f
5632 /* Return the regparm value for a function with the indicated TYPE and DECL.
5633 DECL may be NULL when calling function indirectly
5634 or considering a libcall. */
5635
5636 static int
5637 ix86_function_regparm (const_tree type, const_tree decl)
5638 {
5639 tree attr;
5640 int regparm;
5641 unsigned int ccvt;
5642
5643 if (TARGET_64BIT)
5644 return (ix86_function_type_abi (type) == SYSV_ABI
5645 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5646 ccvt = ix86_get_callcvt (type);
5647 regparm = ix86_regparm;
5648
5649 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5650 {
5651 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5652 if (attr)
5653 {
5654 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5655 return regparm;
5656 }
5657 }
5658 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5659 return 2;
5660 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5661 return 1;
5662
5663 /* Use register calling convention for local functions when possible. */
5664 if (decl
5665 && TREE_CODE (decl) == FUNCTION_DECL
5666 /* Caller and callee must agree on the calling convention, so
5667 checking here just optimize means that with
5668 __attribute__((optimize (...))) caller could use regparm convention
5669 and callee not, or vice versa. Instead look at whether the callee
5670 is optimized or not. */
5671 && opt_for_fn (decl, optimize)
5672 && !(profile_flag && !flag_fentry))
5673 {
5674 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5675 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE (decl));
5676 if (i && i->local && i->can_change_signature)
5677 {
5678 int local_regparm, globals = 0, regno;
5679
5680 /* Make sure no regparm register is taken by a
5681 fixed register variable. */
5682 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5683 if (fixed_regs[local_regparm])
5684 break;
5685
5686 /* We don't want to use regparm(3) for nested functions as
5687 these use a static chain pointer in the third argument. */
5688 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5689 local_regparm = 2;
5690
5691 /* In 32-bit mode save a register for the split stack. */
5692 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5693 local_regparm = 2;
5694
5695 /* Each fixed register usage increases register pressure,
5696 so less registers should be used for argument passing.
5697 This functionality can be overriden by an explicit
5698 regparm value. */
5699 for (regno = AX_REG; regno <= DI_REG; regno++)
5700 if (fixed_regs[regno])
5701 globals++;
5702
5703 local_regparm
5704 = globals < local_regparm ? local_regparm - globals : 0;
5705
5706 if (local_regparm > regparm)
5707 regparm = local_regparm;
5708 }
5709 }
5710
5711 return regparm;
5712 }
5713
5714 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5715 DFmode (2) arguments in SSE registers for a function with the
5716 indicated TYPE and DECL. DECL may be NULL when calling function
5717 indirectly or considering a libcall. Otherwise return 0. */
5718
5719 static int
5720 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5721 {
5722 gcc_assert (!TARGET_64BIT);
5723
5724 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5725 by the sseregparm attribute. */
5726 if (TARGET_SSEREGPARM
5727 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5728 {
5729 if (!TARGET_SSE)
5730 {
5731 if (warn)
5732 {
5733 if (decl)
5734 error ("calling %qD with attribute sseregparm without "
5735 "SSE/SSE2 enabled", decl);
5736 else
5737 error ("calling %qT with attribute sseregparm without "
5738 "SSE/SSE2 enabled", type);
5739 }
5740 return 0;
5741 }
5742
5743 return 2;
5744 }
5745
5746 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5747 (and DFmode for SSE2) arguments in SSE registers. */
5748 if (decl && TARGET_SSE_MATH && optimize
5749 && !(profile_flag && !flag_fentry))
5750 {
5751 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5752 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5753 if (i && i->local && i->can_change_signature)
5754 return TARGET_SSE2 ? 2 : 1;
5755 }
5756
5757 return 0;
5758 }
5759
5760 /* Return true if EAX is live at the start of the function. Used by
5761 ix86_expand_prologue to determine if we need special help before
5762 calling allocate_stack_worker. */
5763
5764 static bool
5765 ix86_eax_live_at_start_p (void)
5766 {
5767 /* Cheat. Don't bother working forward from ix86_function_regparm
5768 to the function type to whether an actual argument is located in
5769 eax. Instead just look at cfg info, which is still close enough
5770 to correct at this point. This gives false positives for broken
5771 functions that might use uninitialized data that happens to be
5772 allocated in eax, but who cares? */
5773 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5774 }
5775
5776 static bool
5777 ix86_keep_aggregate_return_pointer (tree fntype)
5778 {
5779 tree attr;
5780
5781 if (!TARGET_64BIT)
5782 {
5783 attr = lookup_attribute ("callee_pop_aggregate_return",
5784 TYPE_ATTRIBUTES (fntype));
5785 if (attr)
5786 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5787
5788 /* For 32-bit MS-ABI the default is to keep aggregate
5789 return pointer. */
5790 if (ix86_function_type_abi (fntype) == MS_ABI)
5791 return true;
5792 }
5793 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5794 }
5795
5796 /* Value is the number of bytes of arguments automatically
5797 popped when returning from a subroutine call.
5798 FUNDECL is the declaration node of the function (as a tree),
5799 FUNTYPE is the data type of the function (as a tree),
5800 or for a library call it is an identifier node for the subroutine name.
5801 SIZE is the number of bytes of arguments passed on the stack.
5802
5803 On the 80386, the RTD insn may be used to pop them if the number
5804 of args is fixed, but if the number is variable then the caller
5805 must pop them all. RTD can't be used for library calls now
5806 because the library is compiled with the Unix compiler.
5807 Use of RTD is a selectable option, since it is incompatible with
5808 standard Unix calling sequences. If the option is not selected,
5809 the caller must always pop the args.
5810
5811 The attribute stdcall is equivalent to RTD on a per module basis. */
5812
5813 static int
5814 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5815 {
5816 unsigned int ccvt;
5817
5818 /* None of the 64-bit ABIs pop arguments. */
5819 if (TARGET_64BIT)
5820 return 0;
5821
5822 ccvt = ix86_get_callcvt (funtype);
5823
5824 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5825 | IX86_CALLCVT_THISCALL)) != 0
5826 && ! stdarg_p (funtype))
5827 return size;
5828
5829 /* Lose any fake structure return argument if it is passed on the stack. */
5830 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5831 && !ix86_keep_aggregate_return_pointer (funtype))
5832 {
5833 int nregs = ix86_function_regparm (funtype, fundecl);
5834 if (nregs == 0)
5835 return GET_MODE_SIZE (Pmode);
5836 }
5837
5838 return 0;
5839 }
5840
5841 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5842
5843 static bool
5844 ix86_legitimate_combined_insn (rtx_insn *insn)
5845 {
5846 /* Check operand constraints in case hard registers were propagated
5847 into insn pattern. This check prevents combine pass from
5848 generating insn patterns with invalid hard register operands.
5849 These invalid insns can eventually confuse reload to error out
5850 with a spill failure. See also PRs 46829 and 46843. */
5851 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5852 {
5853 int i;
5854
5855 extract_insn (insn);
5856 preprocess_constraints (insn);
5857
5858 int n_operands = recog_data.n_operands;
5859 int n_alternatives = recog_data.n_alternatives;
5860 for (i = 0; i < n_operands; i++)
5861 {
5862 rtx op = recog_data.operand[i];
5863 enum machine_mode mode = GET_MODE (op);
5864 const operand_alternative *op_alt;
5865 int offset = 0;
5866 bool win;
5867 int j;
5868
5869 /* For pre-AVX disallow unaligned loads/stores where the
5870 instructions don't support it. */
5871 if (!TARGET_AVX
5872 && VECTOR_MODE_P (GET_MODE (op))
5873 && misaligned_operand (op, GET_MODE (op)))
5874 {
5875 int min_align = get_attr_ssememalign (insn);
5876 if (min_align == 0)
5877 return false;
5878 }
5879
5880 /* A unary operator may be accepted by the predicate, but it
5881 is irrelevant for matching constraints. */
5882 if (UNARY_P (op))
5883 op = XEXP (op, 0);
5884
5885 if (GET_CODE (op) == SUBREG)
5886 {
5887 if (REG_P (SUBREG_REG (op))
5888 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5889 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5890 GET_MODE (SUBREG_REG (op)),
5891 SUBREG_BYTE (op),
5892 GET_MODE (op));
5893 op = SUBREG_REG (op);
5894 }
5895
5896 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5897 continue;
5898
5899 op_alt = recog_op_alt;
5900
5901 /* Operand has no constraints, anything is OK. */
5902 win = !n_alternatives;
5903
5904 alternative_mask enabled = recog_data.enabled_alternatives;
5905 for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
5906 {
5907 if (!TEST_BIT (enabled, j))
5908 continue;
5909 if (op_alt[i].anything_ok
5910 || (op_alt[i].matches != -1
5911 && operands_match_p
5912 (recog_data.operand[i],
5913 recog_data.operand[op_alt[i].matches]))
5914 || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
5915 {
5916 win = true;
5917 break;
5918 }
5919 }
5920
5921 if (!win)
5922 return false;
5923 }
5924 }
5925
5926 return true;
5927 }
5928 \f
5929 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5930
5931 static unsigned HOST_WIDE_INT
5932 ix86_asan_shadow_offset (void)
5933 {
5934 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5935 : HOST_WIDE_INT_C (0x7fff8000))
5936 : (HOST_WIDE_INT_1 << 29);
5937 }
5938 \f
5939 /* Argument support functions. */
5940
5941 /* Return true when register may be used to pass function parameters. */
5942 bool
5943 ix86_function_arg_regno_p (int regno)
5944 {
5945 int i;
5946 const int *parm_regs;
5947
5948 if (!TARGET_64BIT)
5949 {
5950 if (TARGET_MACHO)
5951 return (regno < REGPARM_MAX
5952 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5953 else
5954 return (regno < REGPARM_MAX
5955 || (TARGET_MMX && MMX_REGNO_P (regno)
5956 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5957 || (TARGET_SSE && SSE_REGNO_P (regno)
5958 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5959 }
5960
5961 if (TARGET_SSE && SSE_REGNO_P (regno)
5962 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5963 return true;
5964
5965 /* TODO: The function should depend on current function ABI but
5966 builtins.c would need updating then. Therefore we use the
5967 default ABI. */
5968
5969 /* RAX is used as hidden argument to va_arg functions. */
5970 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5971 return true;
5972
5973 if (ix86_abi == MS_ABI)
5974 parm_regs = x86_64_ms_abi_int_parameter_registers;
5975 else
5976 parm_regs = x86_64_int_parameter_registers;
5977 for (i = 0; i < (ix86_abi == MS_ABI
5978 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5979 if (regno == parm_regs[i])
5980 return true;
5981 return false;
5982 }
5983
5984 /* Return if we do not know how to pass TYPE solely in registers. */
5985
5986 static bool
5987 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5988 {
5989 if (must_pass_in_stack_var_size_or_pad (mode, type))
5990 return true;
5991
5992 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5993 The layout_type routine is crafty and tries to trick us into passing
5994 currently unsupported vector types on the stack by using TImode. */
5995 return (!TARGET_64BIT && mode == TImode
5996 && type && TREE_CODE (type) != VECTOR_TYPE);
5997 }
5998
5999 /* It returns the size, in bytes, of the area reserved for arguments passed
6000 in registers for the function represented by fndecl dependent to the used
6001 abi format. */
6002 int
6003 ix86_reg_parm_stack_space (const_tree fndecl)
6004 {
6005 enum calling_abi call_abi = SYSV_ABI;
6006 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6007 call_abi = ix86_function_abi (fndecl);
6008 else
6009 call_abi = ix86_function_type_abi (fndecl);
6010 if (TARGET_64BIT && call_abi == MS_ABI)
6011 return 32;
6012 return 0;
6013 }
6014
6015 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6016 call abi used. */
6017 enum calling_abi
6018 ix86_function_type_abi (const_tree fntype)
6019 {
6020 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6021 {
6022 enum calling_abi abi = ix86_abi;
6023 if (abi == SYSV_ABI)
6024 {
6025 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6026 abi = MS_ABI;
6027 }
6028 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6029 abi = SYSV_ABI;
6030 return abi;
6031 }
6032 return ix86_abi;
6033 }
6034
6035 /* We add this as a workaround in order to use libc_has_function
6036 hook in i386.md. */
6037 bool
6038 ix86_libc_has_function (enum function_class fn_class)
6039 {
6040 return targetm.libc_has_function (fn_class);
6041 }
6042
6043 static bool
6044 ix86_function_ms_hook_prologue (const_tree fn)
6045 {
6046 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6047 {
6048 if (decl_function_context (fn) != NULL_TREE)
6049 error_at (DECL_SOURCE_LOCATION (fn),
6050 "ms_hook_prologue is not compatible with nested function");
6051 else
6052 return true;
6053 }
6054 return false;
6055 }
6056
6057 static enum calling_abi
6058 ix86_function_abi (const_tree fndecl)
6059 {
6060 if (! fndecl)
6061 return ix86_abi;
6062 return ix86_function_type_abi (TREE_TYPE (fndecl));
6063 }
6064
6065 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6066 call abi used. */
6067 enum calling_abi
6068 ix86_cfun_abi (void)
6069 {
6070 if (! cfun)
6071 return ix86_abi;
6072 return cfun->machine->call_abi;
6073 }
6074
6075 /* Write the extra assembler code needed to declare a function properly. */
6076
6077 void
6078 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6079 tree decl)
6080 {
6081 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6082
6083 if (is_ms_hook)
6084 {
6085 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6086 unsigned int filler_cc = 0xcccccccc;
6087
6088 for (i = 0; i < filler_count; i += 4)
6089 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6090 }
6091
6092 #ifdef SUBTARGET_ASM_UNWIND_INIT
6093 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6094 #endif
6095
6096 ASM_OUTPUT_LABEL (asm_out_file, fname);
6097
6098 /* Output magic byte marker, if hot-patch attribute is set. */
6099 if (is_ms_hook)
6100 {
6101 if (TARGET_64BIT)
6102 {
6103 /* leaq [%rsp + 0], %rsp */
6104 asm_fprintf (asm_out_file, ASM_BYTE
6105 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6106 }
6107 else
6108 {
6109 /* movl.s %edi, %edi
6110 push %ebp
6111 movl.s %esp, %ebp */
6112 asm_fprintf (asm_out_file, ASM_BYTE
6113 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6114 }
6115 }
6116 }
6117
6118 /* regclass.c */
6119 extern void init_regs (void);
6120
6121 /* Implementation of call abi switching target hook. Specific to FNDECL
6122 the specific call register sets are set. See also
6123 ix86_conditional_register_usage for more details. */
6124 void
6125 ix86_call_abi_override (const_tree fndecl)
6126 {
6127 if (fndecl == NULL_TREE)
6128 cfun->machine->call_abi = ix86_abi;
6129 else
6130 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6131 }
6132
6133 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6134 expensive re-initialization of init_regs each time we switch function context
6135 since this is needed only during RTL expansion. */
6136 static void
6137 ix86_maybe_switch_abi (void)
6138 {
6139 if (TARGET_64BIT &&
6140 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6141 reinit_regs ();
6142 }
6143
6144 /* Return 1 if pseudo register should be created and used to hold
6145 GOT address for PIC code. */
6146 static bool
6147 ix86_use_pseudo_pic_reg (void)
6148 {
6149 if ((TARGET_64BIT
6150 && (ix86_cmodel == CM_SMALL_PIC
6151 || TARGET_PECOFF))
6152 || !flag_pic)
6153 return false;
6154 return true;
6155 }
6156
6157 /* Create and initialize PIC register if required. */
6158 static void
6159 ix86_init_pic_reg (void)
6160 {
6161 edge entry_edge;
6162 rtx_insn *seq;
6163
6164 if (!ix86_use_pseudo_pic_reg ())
6165 return;
6166
6167 start_sequence ();
6168
6169 if (TARGET_64BIT)
6170 {
6171 if (ix86_cmodel == CM_LARGE_PIC)
6172 {
6173 rtx_code_label *label;
6174 rtx tmp_reg;
6175
6176 gcc_assert (Pmode == DImode);
6177 label = gen_label_rtx ();
6178 emit_label (label);
6179 LABEL_PRESERVE_P (label) = 1;
6180 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
6181 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6182 emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6183 label));
6184 emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6185 emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6186 pic_offset_table_rtx, tmp_reg));
6187 }
6188 else
6189 emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6190 }
6191 else
6192 {
6193 rtx insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6194 RTX_FRAME_RELATED_P (insn) = 1;
6195 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6196 }
6197
6198 seq = get_insns ();
6199 end_sequence ();
6200
6201 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6202 insert_insn_on_edge (seq, entry_edge);
6203 commit_one_edge_insertion (entry_edge);
6204 }
6205
6206 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6207 for a call to a function whose data type is FNTYPE.
6208 For a library call, FNTYPE is 0. */
6209
6210 void
6211 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6212 tree fntype, /* tree ptr for function decl */
6213 rtx libname, /* SYMBOL_REF of library name or 0 */
6214 tree fndecl,
6215 int caller)
6216 {
6217 struct cgraph_local_info *i;
6218
6219 memset (cum, 0, sizeof (*cum));
6220
6221 if (fndecl)
6222 {
6223 i = cgraph_node::local_info (fndecl);
6224 cum->call_abi = ix86_function_abi (fndecl);
6225 }
6226 else
6227 {
6228 i = NULL;
6229 cum->call_abi = ix86_function_type_abi (fntype);
6230 }
6231
6232 cum->caller = caller;
6233
6234 /* Set up the number of registers to use for passing arguments. */
6235 cum->nregs = ix86_regparm;
6236 if (TARGET_64BIT)
6237 {
6238 cum->nregs = (cum->call_abi == SYSV_ABI
6239 ? X86_64_REGPARM_MAX
6240 : X86_64_MS_REGPARM_MAX);
6241 }
6242 if (TARGET_SSE)
6243 {
6244 cum->sse_nregs = SSE_REGPARM_MAX;
6245 if (TARGET_64BIT)
6246 {
6247 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6248 ? X86_64_SSE_REGPARM_MAX
6249 : X86_64_MS_SSE_REGPARM_MAX);
6250 }
6251 }
6252 if (TARGET_MMX)
6253 cum->mmx_nregs = MMX_REGPARM_MAX;
6254 cum->warn_avx512f = true;
6255 cum->warn_avx = true;
6256 cum->warn_sse = true;
6257 cum->warn_mmx = true;
6258
6259 /* Because type might mismatch in between caller and callee, we need to
6260 use actual type of function for local calls.
6261 FIXME: cgraph_analyze can be told to actually record if function uses
6262 va_start so for local functions maybe_vaarg can be made aggressive
6263 helping K&R code.
6264 FIXME: once typesytem is fixed, we won't need this code anymore. */
6265 if (i && i->local && i->can_change_signature)
6266 fntype = TREE_TYPE (fndecl);
6267 cum->maybe_vaarg = (fntype
6268 ? (!prototype_p (fntype) || stdarg_p (fntype))
6269 : !libname);
6270
6271 if (!TARGET_64BIT)
6272 {
6273 /* If there are variable arguments, then we won't pass anything
6274 in registers in 32-bit mode. */
6275 if (stdarg_p (fntype))
6276 {
6277 cum->nregs = 0;
6278 cum->sse_nregs = 0;
6279 cum->mmx_nregs = 0;
6280 cum->warn_avx512f = false;
6281 cum->warn_avx = false;
6282 cum->warn_sse = false;
6283 cum->warn_mmx = false;
6284 return;
6285 }
6286
6287 /* Use ecx and edx registers if function has fastcall attribute,
6288 else look for regparm information. */
6289 if (fntype)
6290 {
6291 unsigned int ccvt = ix86_get_callcvt (fntype);
6292 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6293 {
6294 cum->nregs = 1;
6295 cum->fastcall = 1; /* Same first register as in fastcall. */
6296 }
6297 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6298 {
6299 cum->nregs = 2;
6300 cum->fastcall = 1;
6301 }
6302 else
6303 cum->nregs = ix86_function_regparm (fntype, fndecl);
6304 }
6305
6306 /* Set up the number of SSE registers used for passing SFmode
6307 and DFmode arguments. Warn for mismatching ABI. */
6308 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6309 }
6310 }
6311
6312 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6313 But in the case of vector types, it is some vector mode.
6314
6315 When we have only some of our vector isa extensions enabled, then there
6316 are some modes for which vector_mode_supported_p is false. For these
6317 modes, the generic vector support in gcc will choose some non-vector mode
6318 in order to implement the type. By computing the natural mode, we'll
6319 select the proper ABI location for the operand and not depend on whatever
6320 the middle-end decides to do with these vector types.
6321
6322 The midde-end can't deal with the vector types > 16 bytes. In this
6323 case, we return the original mode and warn ABI change if CUM isn't
6324 NULL.
6325
6326 If INT_RETURN is true, warn ABI change if the vector mode isn't
6327 available for function return value. */
6328
6329 static enum machine_mode
6330 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6331 bool in_return)
6332 {
6333 enum machine_mode mode = TYPE_MODE (type);
6334
6335 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6336 {
6337 HOST_WIDE_INT size = int_size_in_bytes (type);
6338 if ((size == 8 || size == 16 || size == 32 || size == 64)
6339 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6340 && TYPE_VECTOR_SUBPARTS (type) > 1)
6341 {
6342 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6343
6344 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6345 mode = MIN_MODE_VECTOR_FLOAT;
6346 else
6347 mode = MIN_MODE_VECTOR_INT;
6348
6349 /* Get the mode which has this inner mode and number of units. */
6350 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6351 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6352 && GET_MODE_INNER (mode) == innermode)
6353 {
6354 if (size == 64 && !TARGET_AVX512F)
6355 {
6356 static bool warnedavx512f;
6357 static bool warnedavx512f_ret;
6358
6359 if (cum && cum->warn_avx512f && !warnedavx512f)
6360 {
6361 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6362 "without AVX512F enabled changes the ABI"))
6363 warnedavx512f = true;
6364 }
6365 else if (in_return && !warnedavx512f_ret)
6366 {
6367 if (warning (OPT_Wpsabi, "AVX512F vector return "
6368 "without AVX512F enabled changes the ABI"))
6369 warnedavx512f_ret = true;
6370 }
6371
6372 return TYPE_MODE (type);
6373 }
6374 else if (size == 32 && !TARGET_AVX)
6375 {
6376 static bool warnedavx;
6377 static bool warnedavx_ret;
6378
6379 if (cum && cum->warn_avx && !warnedavx)
6380 {
6381 if (warning (OPT_Wpsabi, "AVX vector argument "
6382 "without AVX enabled changes the ABI"))
6383 warnedavx = true;
6384 }
6385 else if (in_return && !warnedavx_ret)
6386 {
6387 if (warning (OPT_Wpsabi, "AVX vector return "
6388 "without AVX enabled changes the ABI"))
6389 warnedavx_ret = true;
6390 }
6391
6392 return TYPE_MODE (type);
6393 }
6394 else if (((size == 8 && TARGET_64BIT) || size == 16)
6395 && !TARGET_SSE)
6396 {
6397 static bool warnedsse;
6398 static bool warnedsse_ret;
6399
6400 if (cum && cum->warn_sse && !warnedsse)
6401 {
6402 if (warning (OPT_Wpsabi, "SSE vector argument "
6403 "without SSE enabled changes the ABI"))
6404 warnedsse = true;
6405 }
6406 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6407 {
6408 if (warning (OPT_Wpsabi, "SSE vector return "
6409 "without SSE enabled changes the ABI"))
6410 warnedsse_ret = true;
6411 }
6412 }
6413 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6414 {
6415 static bool warnedmmx;
6416 static bool warnedmmx_ret;
6417
6418 if (cum && cum->warn_mmx && !warnedmmx)
6419 {
6420 if (warning (OPT_Wpsabi, "MMX vector argument "
6421 "without MMX enabled changes the ABI"))
6422 warnedmmx = true;
6423 }
6424 else if (in_return && !warnedmmx_ret)
6425 {
6426 if (warning (OPT_Wpsabi, "MMX vector return "
6427 "without MMX enabled changes the ABI"))
6428 warnedmmx_ret = true;
6429 }
6430 }
6431 return mode;
6432 }
6433
6434 gcc_unreachable ();
6435 }
6436 }
6437
6438 return mode;
6439 }
6440
6441 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6442 this may not agree with the mode that the type system has chosen for the
6443 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6444 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6445
6446 static rtx
6447 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6448 unsigned int regno)
6449 {
6450 rtx tmp;
6451
6452 if (orig_mode != BLKmode)
6453 tmp = gen_rtx_REG (orig_mode, regno);
6454 else
6455 {
6456 tmp = gen_rtx_REG (mode, regno);
6457 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6458 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6459 }
6460
6461 return tmp;
6462 }
6463
6464 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6465 of this code is to classify each 8bytes of incoming argument by the register
6466 class and assign registers accordingly. */
6467
6468 /* Return the union class of CLASS1 and CLASS2.
6469 See the x86-64 PS ABI for details. */
6470
6471 static enum x86_64_reg_class
6472 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6473 {
6474 /* Rule #1: If both classes are equal, this is the resulting class. */
6475 if (class1 == class2)
6476 return class1;
6477
6478 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6479 the other class. */
6480 if (class1 == X86_64_NO_CLASS)
6481 return class2;
6482 if (class2 == X86_64_NO_CLASS)
6483 return class1;
6484
6485 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6486 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6487 return X86_64_MEMORY_CLASS;
6488
6489 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6490 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6491 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6492 return X86_64_INTEGERSI_CLASS;
6493 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6494 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6495 return X86_64_INTEGER_CLASS;
6496
6497 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6498 MEMORY is used. */
6499 if (class1 == X86_64_X87_CLASS
6500 || class1 == X86_64_X87UP_CLASS
6501 || class1 == X86_64_COMPLEX_X87_CLASS
6502 || class2 == X86_64_X87_CLASS
6503 || class2 == X86_64_X87UP_CLASS
6504 || class2 == X86_64_COMPLEX_X87_CLASS)
6505 return X86_64_MEMORY_CLASS;
6506
6507 /* Rule #6: Otherwise class SSE is used. */
6508 return X86_64_SSE_CLASS;
6509 }
6510
6511 /* Classify the argument of type TYPE and mode MODE.
6512 CLASSES will be filled by the register class used to pass each word
6513 of the operand. The number of words is returned. In case the parameter
6514 should be passed in memory, 0 is returned. As a special case for zero
6515 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6516
6517 BIT_OFFSET is used internally for handling records and specifies offset
6518 of the offset in bits modulo 512 to avoid overflow cases.
6519
6520 See the x86-64 PS ABI for details.
6521 */
6522
6523 static int
6524 classify_argument (enum machine_mode mode, const_tree type,
6525 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6526 {
6527 HOST_WIDE_INT bytes =
6528 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6529 int words
6530 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6531
6532 /* Variable sized entities are always passed/returned in memory. */
6533 if (bytes < 0)
6534 return 0;
6535
6536 if (mode != VOIDmode
6537 && targetm.calls.must_pass_in_stack (mode, type))
6538 return 0;
6539
6540 if (type && AGGREGATE_TYPE_P (type))
6541 {
6542 int i;
6543 tree field;
6544 enum x86_64_reg_class subclasses[MAX_CLASSES];
6545
6546 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6547 if (bytes > 64)
6548 return 0;
6549
6550 for (i = 0; i < words; i++)
6551 classes[i] = X86_64_NO_CLASS;
6552
6553 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6554 signalize memory class, so handle it as special case. */
6555 if (!words)
6556 {
6557 classes[0] = X86_64_NO_CLASS;
6558 return 1;
6559 }
6560
6561 /* Classify each field of record and merge classes. */
6562 switch (TREE_CODE (type))
6563 {
6564 case RECORD_TYPE:
6565 /* And now merge the fields of structure. */
6566 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6567 {
6568 if (TREE_CODE (field) == FIELD_DECL)
6569 {
6570 int num;
6571
6572 if (TREE_TYPE (field) == error_mark_node)
6573 continue;
6574
6575 /* Bitfields are always classified as integer. Handle them
6576 early, since later code would consider them to be
6577 misaligned integers. */
6578 if (DECL_BIT_FIELD (field))
6579 {
6580 for (i = (int_bit_position (field)
6581 + (bit_offset % 64)) / 8 / 8;
6582 i < ((int_bit_position (field) + (bit_offset % 64))
6583 + tree_to_shwi (DECL_SIZE (field))
6584 + 63) / 8 / 8; i++)
6585 classes[i] =
6586 merge_classes (X86_64_INTEGER_CLASS,
6587 classes[i]);
6588 }
6589 else
6590 {
6591 int pos;
6592
6593 type = TREE_TYPE (field);
6594
6595 /* Flexible array member is ignored. */
6596 if (TYPE_MODE (type) == BLKmode
6597 && TREE_CODE (type) == ARRAY_TYPE
6598 && TYPE_SIZE (type) == NULL_TREE
6599 && TYPE_DOMAIN (type) != NULL_TREE
6600 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6601 == NULL_TREE))
6602 {
6603 static bool warned;
6604
6605 if (!warned && warn_psabi)
6606 {
6607 warned = true;
6608 inform (input_location,
6609 "the ABI of passing struct with"
6610 " a flexible array member has"
6611 " changed in GCC 4.4");
6612 }
6613 continue;
6614 }
6615 num = classify_argument (TYPE_MODE (type), type,
6616 subclasses,
6617 (int_bit_position (field)
6618 + bit_offset) % 512);
6619 if (!num)
6620 return 0;
6621 pos = (int_bit_position (field)
6622 + (bit_offset % 64)) / 8 / 8;
6623 for (i = 0; i < num && (i + pos) < words; i++)
6624 classes[i + pos] =
6625 merge_classes (subclasses[i], classes[i + pos]);
6626 }
6627 }
6628 }
6629 break;
6630
6631 case ARRAY_TYPE:
6632 /* Arrays are handled as small records. */
6633 {
6634 int num;
6635 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6636 TREE_TYPE (type), subclasses, bit_offset);
6637 if (!num)
6638 return 0;
6639
6640 /* The partial classes are now full classes. */
6641 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6642 subclasses[0] = X86_64_SSE_CLASS;
6643 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6644 && !((bit_offset % 64) == 0 && bytes == 4))
6645 subclasses[0] = X86_64_INTEGER_CLASS;
6646
6647 for (i = 0; i < words; i++)
6648 classes[i] = subclasses[i % num];
6649
6650 break;
6651 }
6652 case UNION_TYPE:
6653 case QUAL_UNION_TYPE:
6654 /* Unions are similar to RECORD_TYPE but offset is always 0.
6655 */
6656 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6657 {
6658 if (TREE_CODE (field) == FIELD_DECL)
6659 {
6660 int num;
6661
6662 if (TREE_TYPE (field) == error_mark_node)
6663 continue;
6664
6665 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6666 TREE_TYPE (field), subclasses,
6667 bit_offset);
6668 if (!num)
6669 return 0;
6670 for (i = 0; i < num && i < words; i++)
6671 classes[i] = merge_classes (subclasses[i], classes[i]);
6672 }
6673 }
6674 break;
6675
6676 default:
6677 gcc_unreachable ();
6678 }
6679
6680 if (words > 2)
6681 {
6682 /* When size > 16 bytes, if the first one isn't
6683 X86_64_SSE_CLASS or any other ones aren't
6684 X86_64_SSEUP_CLASS, everything should be passed in
6685 memory. */
6686 if (classes[0] != X86_64_SSE_CLASS)
6687 return 0;
6688
6689 for (i = 1; i < words; i++)
6690 if (classes[i] != X86_64_SSEUP_CLASS)
6691 return 0;
6692 }
6693
6694 /* Final merger cleanup. */
6695 for (i = 0; i < words; i++)
6696 {
6697 /* If one class is MEMORY, everything should be passed in
6698 memory. */
6699 if (classes[i] == X86_64_MEMORY_CLASS)
6700 return 0;
6701
6702 /* The X86_64_SSEUP_CLASS should be always preceded by
6703 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6704 if (classes[i] == X86_64_SSEUP_CLASS
6705 && classes[i - 1] != X86_64_SSE_CLASS
6706 && classes[i - 1] != X86_64_SSEUP_CLASS)
6707 {
6708 /* The first one should never be X86_64_SSEUP_CLASS. */
6709 gcc_assert (i != 0);
6710 classes[i] = X86_64_SSE_CLASS;
6711 }
6712
6713 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6714 everything should be passed in memory. */
6715 if (classes[i] == X86_64_X87UP_CLASS
6716 && (classes[i - 1] != X86_64_X87_CLASS))
6717 {
6718 static bool warned;
6719
6720 /* The first one should never be X86_64_X87UP_CLASS. */
6721 gcc_assert (i != 0);
6722 if (!warned && warn_psabi)
6723 {
6724 warned = true;
6725 inform (input_location,
6726 "the ABI of passing union with long double"
6727 " has changed in GCC 4.4");
6728 }
6729 return 0;
6730 }
6731 }
6732 return words;
6733 }
6734
6735 /* Compute alignment needed. We align all types to natural boundaries with
6736 exception of XFmode that is aligned to 64bits. */
6737 if (mode != VOIDmode && mode != BLKmode)
6738 {
6739 int mode_alignment = GET_MODE_BITSIZE (mode);
6740
6741 if (mode == XFmode)
6742 mode_alignment = 128;
6743 else if (mode == XCmode)
6744 mode_alignment = 256;
6745 if (COMPLEX_MODE_P (mode))
6746 mode_alignment /= 2;
6747 /* Misaligned fields are always returned in memory. */
6748 if (bit_offset % mode_alignment)
6749 return 0;
6750 }
6751
6752 /* for V1xx modes, just use the base mode */
6753 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6754 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6755 mode = GET_MODE_INNER (mode);
6756
6757 /* Classification of atomic types. */
6758 switch (mode)
6759 {
6760 case SDmode:
6761 case DDmode:
6762 classes[0] = X86_64_SSE_CLASS;
6763 return 1;
6764 case TDmode:
6765 classes[0] = X86_64_SSE_CLASS;
6766 classes[1] = X86_64_SSEUP_CLASS;
6767 return 2;
6768 case DImode:
6769 case SImode:
6770 case HImode:
6771 case QImode:
6772 case CSImode:
6773 case CHImode:
6774 case CQImode:
6775 {
6776 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6777
6778 /* Analyze last 128 bits only. */
6779 size = (size - 1) & 0x7f;
6780
6781 if (size < 32)
6782 {
6783 classes[0] = X86_64_INTEGERSI_CLASS;
6784 return 1;
6785 }
6786 else if (size < 64)
6787 {
6788 classes[0] = X86_64_INTEGER_CLASS;
6789 return 1;
6790 }
6791 else if (size < 64+32)
6792 {
6793 classes[0] = X86_64_INTEGER_CLASS;
6794 classes[1] = X86_64_INTEGERSI_CLASS;
6795 return 2;
6796 }
6797 else if (size < 64+64)
6798 {
6799 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6800 return 2;
6801 }
6802 else
6803 gcc_unreachable ();
6804 }
6805 case CDImode:
6806 case TImode:
6807 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6808 return 2;
6809 case COImode:
6810 case OImode:
6811 /* OImode shouldn't be used directly. */
6812 gcc_unreachable ();
6813 case CTImode:
6814 return 0;
6815 case SFmode:
6816 if (!(bit_offset % 64))
6817 classes[0] = X86_64_SSESF_CLASS;
6818 else
6819 classes[0] = X86_64_SSE_CLASS;
6820 return 1;
6821 case DFmode:
6822 classes[0] = X86_64_SSEDF_CLASS;
6823 return 1;
6824 case XFmode:
6825 classes[0] = X86_64_X87_CLASS;
6826 classes[1] = X86_64_X87UP_CLASS;
6827 return 2;
6828 case TFmode:
6829 classes[0] = X86_64_SSE_CLASS;
6830 classes[1] = X86_64_SSEUP_CLASS;
6831 return 2;
6832 case SCmode:
6833 classes[0] = X86_64_SSE_CLASS;
6834 if (!(bit_offset % 64))
6835 return 1;
6836 else
6837 {
6838 static bool warned;
6839
6840 if (!warned && warn_psabi)
6841 {
6842 warned = true;
6843 inform (input_location,
6844 "the ABI of passing structure with complex float"
6845 " member has changed in GCC 4.4");
6846 }
6847 classes[1] = X86_64_SSESF_CLASS;
6848 return 2;
6849 }
6850 case DCmode:
6851 classes[0] = X86_64_SSEDF_CLASS;
6852 classes[1] = X86_64_SSEDF_CLASS;
6853 return 2;
6854 case XCmode:
6855 classes[0] = X86_64_COMPLEX_X87_CLASS;
6856 return 1;
6857 case TCmode:
6858 /* This modes is larger than 16 bytes. */
6859 return 0;
6860 case V8SFmode:
6861 case V8SImode:
6862 case V32QImode:
6863 case V16HImode:
6864 case V4DFmode:
6865 case V4DImode:
6866 classes[0] = X86_64_SSE_CLASS;
6867 classes[1] = X86_64_SSEUP_CLASS;
6868 classes[2] = X86_64_SSEUP_CLASS;
6869 classes[3] = X86_64_SSEUP_CLASS;
6870 return 4;
6871 case V8DFmode:
6872 case V16SFmode:
6873 case V8DImode:
6874 case V16SImode:
6875 case V32HImode:
6876 case V64QImode:
6877 classes[0] = X86_64_SSE_CLASS;
6878 classes[1] = X86_64_SSEUP_CLASS;
6879 classes[2] = X86_64_SSEUP_CLASS;
6880 classes[3] = X86_64_SSEUP_CLASS;
6881 classes[4] = X86_64_SSEUP_CLASS;
6882 classes[5] = X86_64_SSEUP_CLASS;
6883 classes[6] = X86_64_SSEUP_CLASS;
6884 classes[7] = X86_64_SSEUP_CLASS;
6885 return 8;
6886 case V4SFmode:
6887 case V4SImode:
6888 case V16QImode:
6889 case V8HImode:
6890 case V2DFmode:
6891 case V2DImode:
6892 classes[0] = X86_64_SSE_CLASS;
6893 classes[1] = X86_64_SSEUP_CLASS;
6894 return 2;
6895 case V1TImode:
6896 case V1DImode:
6897 case V2SFmode:
6898 case V2SImode:
6899 case V4HImode:
6900 case V8QImode:
6901 classes[0] = X86_64_SSE_CLASS;
6902 return 1;
6903 case BLKmode:
6904 case VOIDmode:
6905 return 0;
6906 default:
6907 gcc_assert (VECTOR_MODE_P (mode));
6908
6909 if (bytes > 16)
6910 return 0;
6911
6912 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6913
6914 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6915 classes[0] = X86_64_INTEGERSI_CLASS;
6916 else
6917 classes[0] = X86_64_INTEGER_CLASS;
6918 classes[1] = X86_64_INTEGER_CLASS;
6919 return 1 + (bytes > 8);
6920 }
6921 }
6922
6923 /* Examine the argument and return set number of register required in each
6924 class. Return true iff parameter should be passed in memory. */
6925
6926 static bool
6927 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6928 int *int_nregs, int *sse_nregs)
6929 {
6930 enum x86_64_reg_class regclass[MAX_CLASSES];
6931 int n = classify_argument (mode, type, regclass, 0);
6932
6933 *int_nregs = 0;
6934 *sse_nregs = 0;
6935
6936 if (!n)
6937 return true;
6938 for (n--; n >= 0; n--)
6939 switch (regclass[n])
6940 {
6941 case X86_64_INTEGER_CLASS:
6942 case X86_64_INTEGERSI_CLASS:
6943 (*int_nregs)++;
6944 break;
6945 case X86_64_SSE_CLASS:
6946 case X86_64_SSESF_CLASS:
6947 case X86_64_SSEDF_CLASS:
6948 (*sse_nregs)++;
6949 break;
6950 case X86_64_NO_CLASS:
6951 case X86_64_SSEUP_CLASS:
6952 break;
6953 case X86_64_X87_CLASS:
6954 case X86_64_X87UP_CLASS:
6955 case X86_64_COMPLEX_X87_CLASS:
6956 if (!in_return)
6957 return true;
6958 break;
6959 case X86_64_MEMORY_CLASS:
6960 gcc_unreachable ();
6961 }
6962
6963 return false;
6964 }
6965
6966 /* Construct container for the argument used by GCC interface. See
6967 FUNCTION_ARG for the detailed description. */
6968
6969 static rtx
6970 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6971 const_tree type, int in_return, int nintregs, int nsseregs,
6972 const int *intreg, int sse_regno)
6973 {
6974 /* The following variables hold the static issued_error state. */
6975 static bool issued_sse_arg_error;
6976 static bool issued_sse_ret_error;
6977 static bool issued_x87_ret_error;
6978
6979 enum machine_mode tmpmode;
6980 int bytes =
6981 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6982 enum x86_64_reg_class regclass[MAX_CLASSES];
6983 int n;
6984 int i;
6985 int nexps = 0;
6986 int needed_sseregs, needed_intregs;
6987 rtx exp[MAX_CLASSES];
6988 rtx ret;
6989
6990 n = classify_argument (mode, type, regclass, 0);
6991 if (!n)
6992 return NULL;
6993 if (examine_argument (mode, type, in_return, &needed_intregs,
6994 &needed_sseregs))
6995 return NULL;
6996 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6997 return NULL;
6998
6999 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
7000 some less clueful developer tries to use floating-point anyway. */
7001 if (needed_sseregs && !TARGET_SSE)
7002 {
7003 if (in_return)
7004 {
7005 if (!issued_sse_ret_error)
7006 {
7007 error ("SSE register return with SSE disabled");
7008 issued_sse_ret_error = true;
7009 }
7010 }
7011 else if (!issued_sse_arg_error)
7012 {
7013 error ("SSE register argument with SSE disabled");
7014 issued_sse_arg_error = true;
7015 }
7016 return NULL;
7017 }
7018
7019 /* Likewise, error if the ABI requires us to return values in the
7020 x87 registers and the user specified -mno-80387. */
7021 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7022 for (i = 0; i < n; i++)
7023 if (regclass[i] == X86_64_X87_CLASS
7024 || regclass[i] == X86_64_X87UP_CLASS
7025 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7026 {
7027 if (!issued_x87_ret_error)
7028 {
7029 error ("x87 register return with x87 disabled");
7030 issued_x87_ret_error = true;
7031 }
7032 return NULL;
7033 }
7034
7035 /* First construct simple cases. Avoid SCmode, since we want to use
7036 single register to pass this type. */
7037 if (n == 1 && mode != SCmode)
7038 switch (regclass[0])
7039 {
7040 case X86_64_INTEGER_CLASS:
7041 case X86_64_INTEGERSI_CLASS:
7042 return gen_rtx_REG (mode, intreg[0]);
7043 case X86_64_SSE_CLASS:
7044 case X86_64_SSESF_CLASS:
7045 case X86_64_SSEDF_CLASS:
7046 if (mode != BLKmode)
7047 return gen_reg_or_parallel (mode, orig_mode,
7048 SSE_REGNO (sse_regno));
7049 break;
7050 case X86_64_X87_CLASS:
7051 case X86_64_COMPLEX_X87_CLASS:
7052 return gen_rtx_REG (mode, FIRST_STACK_REG);
7053 case X86_64_NO_CLASS:
7054 /* Zero sized array, struct or class. */
7055 return NULL;
7056 default:
7057 gcc_unreachable ();
7058 }
7059 if (n == 2
7060 && regclass[0] == X86_64_SSE_CLASS
7061 && regclass[1] == X86_64_SSEUP_CLASS
7062 && mode != BLKmode)
7063 return gen_reg_or_parallel (mode, orig_mode,
7064 SSE_REGNO (sse_regno));
7065 if (n == 4
7066 && regclass[0] == X86_64_SSE_CLASS
7067 && regclass[1] == X86_64_SSEUP_CLASS
7068 && regclass[2] == X86_64_SSEUP_CLASS
7069 && regclass[3] == X86_64_SSEUP_CLASS
7070 && mode != BLKmode)
7071 return gen_reg_or_parallel (mode, orig_mode,
7072 SSE_REGNO (sse_regno));
7073 if (n == 8
7074 && regclass[0] == X86_64_SSE_CLASS
7075 && regclass[1] == X86_64_SSEUP_CLASS
7076 && regclass[2] == X86_64_SSEUP_CLASS
7077 && regclass[3] == X86_64_SSEUP_CLASS
7078 && regclass[4] == X86_64_SSEUP_CLASS
7079 && regclass[5] == X86_64_SSEUP_CLASS
7080 && regclass[6] == X86_64_SSEUP_CLASS
7081 && regclass[7] == X86_64_SSEUP_CLASS
7082 && mode != BLKmode)
7083 return gen_reg_or_parallel (mode, orig_mode,
7084 SSE_REGNO (sse_regno));
7085 if (n == 2
7086 && regclass[0] == X86_64_X87_CLASS
7087 && regclass[1] == X86_64_X87UP_CLASS)
7088 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7089
7090 if (n == 2
7091 && regclass[0] == X86_64_INTEGER_CLASS
7092 && regclass[1] == X86_64_INTEGER_CLASS
7093 && (mode == CDImode || mode == TImode)
7094 && intreg[0] + 1 == intreg[1])
7095 return gen_rtx_REG (mode, intreg[0]);
7096
7097 /* Otherwise figure out the entries of the PARALLEL. */
7098 for (i = 0; i < n; i++)
7099 {
7100 int pos;
7101
7102 switch (regclass[i])
7103 {
7104 case X86_64_NO_CLASS:
7105 break;
7106 case X86_64_INTEGER_CLASS:
7107 case X86_64_INTEGERSI_CLASS:
7108 /* Merge TImodes on aligned occasions here too. */
7109 if (i * 8 + 8 > bytes)
7110 tmpmode
7111 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7112 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7113 tmpmode = SImode;
7114 else
7115 tmpmode = DImode;
7116 /* We've requested 24 bytes we
7117 don't have mode for. Use DImode. */
7118 if (tmpmode == BLKmode)
7119 tmpmode = DImode;
7120 exp [nexps++]
7121 = gen_rtx_EXPR_LIST (VOIDmode,
7122 gen_rtx_REG (tmpmode, *intreg),
7123 GEN_INT (i*8));
7124 intreg++;
7125 break;
7126 case X86_64_SSESF_CLASS:
7127 exp [nexps++]
7128 = gen_rtx_EXPR_LIST (VOIDmode,
7129 gen_rtx_REG (SFmode,
7130 SSE_REGNO (sse_regno)),
7131 GEN_INT (i*8));
7132 sse_regno++;
7133 break;
7134 case X86_64_SSEDF_CLASS:
7135 exp [nexps++]
7136 = gen_rtx_EXPR_LIST (VOIDmode,
7137 gen_rtx_REG (DFmode,
7138 SSE_REGNO (sse_regno)),
7139 GEN_INT (i*8));
7140 sse_regno++;
7141 break;
7142 case X86_64_SSE_CLASS:
7143 pos = i;
7144 switch (n)
7145 {
7146 case 1:
7147 tmpmode = DImode;
7148 break;
7149 case 2:
7150 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7151 {
7152 tmpmode = TImode;
7153 i++;
7154 }
7155 else
7156 tmpmode = DImode;
7157 break;
7158 case 4:
7159 gcc_assert (i == 0
7160 && regclass[1] == X86_64_SSEUP_CLASS
7161 && regclass[2] == X86_64_SSEUP_CLASS
7162 && regclass[3] == X86_64_SSEUP_CLASS);
7163 tmpmode = OImode;
7164 i += 3;
7165 break;
7166 case 8:
7167 gcc_assert (i == 0
7168 && regclass[1] == X86_64_SSEUP_CLASS
7169 && regclass[2] == X86_64_SSEUP_CLASS
7170 && regclass[3] == X86_64_SSEUP_CLASS
7171 && regclass[4] == X86_64_SSEUP_CLASS
7172 && regclass[5] == X86_64_SSEUP_CLASS
7173 && regclass[6] == X86_64_SSEUP_CLASS
7174 && regclass[7] == X86_64_SSEUP_CLASS);
7175 tmpmode = XImode;
7176 i += 7;
7177 break;
7178 default:
7179 gcc_unreachable ();
7180 }
7181 exp [nexps++]
7182 = gen_rtx_EXPR_LIST (VOIDmode,
7183 gen_rtx_REG (tmpmode,
7184 SSE_REGNO (sse_regno)),
7185 GEN_INT (pos*8));
7186 sse_regno++;
7187 break;
7188 default:
7189 gcc_unreachable ();
7190 }
7191 }
7192
7193 /* Empty aligned struct, union or class. */
7194 if (nexps == 0)
7195 return NULL;
7196
7197 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7198 for (i = 0; i < nexps; i++)
7199 XVECEXP (ret, 0, i) = exp [i];
7200 return ret;
7201 }
7202
7203 /* Update the data in CUM to advance over an argument of mode MODE
7204 and data type TYPE. (TYPE is null for libcalls where that information
7205 may not be available.) */
7206
7207 static void
7208 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7209 const_tree type, HOST_WIDE_INT bytes,
7210 HOST_WIDE_INT words)
7211 {
7212 switch (mode)
7213 {
7214 default:
7215 break;
7216
7217 case BLKmode:
7218 if (bytes < 0)
7219 break;
7220 /* FALLTHRU */
7221
7222 case DImode:
7223 case SImode:
7224 case HImode:
7225 case QImode:
7226 cum->words += words;
7227 cum->nregs -= words;
7228 cum->regno += words;
7229
7230 if (cum->nregs <= 0)
7231 {
7232 cum->nregs = 0;
7233 cum->regno = 0;
7234 }
7235 break;
7236
7237 case OImode:
7238 /* OImode shouldn't be used directly. */
7239 gcc_unreachable ();
7240
7241 case DFmode:
7242 if (cum->float_in_sse < 2)
7243 break;
7244 case SFmode:
7245 if (cum->float_in_sse < 1)
7246 break;
7247 /* FALLTHRU */
7248
7249 case V8SFmode:
7250 case V8SImode:
7251 case V64QImode:
7252 case V32HImode:
7253 case V16SImode:
7254 case V8DImode:
7255 case V16SFmode:
7256 case V8DFmode:
7257 case V32QImode:
7258 case V16HImode:
7259 case V4DFmode:
7260 case V4DImode:
7261 case TImode:
7262 case V16QImode:
7263 case V8HImode:
7264 case V4SImode:
7265 case V2DImode:
7266 case V4SFmode:
7267 case V2DFmode:
7268 if (!type || !AGGREGATE_TYPE_P (type))
7269 {
7270 cum->sse_words += words;
7271 cum->sse_nregs -= 1;
7272 cum->sse_regno += 1;
7273 if (cum->sse_nregs <= 0)
7274 {
7275 cum->sse_nregs = 0;
7276 cum->sse_regno = 0;
7277 }
7278 }
7279 break;
7280
7281 case V8QImode:
7282 case V4HImode:
7283 case V2SImode:
7284 case V2SFmode:
7285 case V1TImode:
7286 case V1DImode:
7287 if (!type || !AGGREGATE_TYPE_P (type))
7288 {
7289 cum->mmx_words += words;
7290 cum->mmx_nregs -= 1;
7291 cum->mmx_regno += 1;
7292 if (cum->mmx_nregs <= 0)
7293 {
7294 cum->mmx_nregs = 0;
7295 cum->mmx_regno = 0;
7296 }
7297 }
7298 break;
7299 }
7300 }
7301
7302 static void
7303 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7304 const_tree type, HOST_WIDE_INT words, bool named)
7305 {
7306 int int_nregs, sse_nregs;
7307
7308 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7309 if (!named && (VALID_AVX512F_REG_MODE (mode)
7310 || VALID_AVX256_REG_MODE (mode)))
7311 return;
7312
7313 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7314 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7315 {
7316 cum->nregs -= int_nregs;
7317 cum->sse_nregs -= sse_nregs;
7318 cum->regno += int_nregs;
7319 cum->sse_regno += sse_nregs;
7320 }
7321 else
7322 {
7323 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7324 cum->words = (cum->words + align - 1) & ~(align - 1);
7325 cum->words += words;
7326 }
7327 }
7328
7329 static void
7330 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7331 HOST_WIDE_INT words)
7332 {
7333 /* Otherwise, this should be passed indirect. */
7334 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7335
7336 cum->words += words;
7337 if (cum->nregs > 0)
7338 {
7339 cum->nregs -= 1;
7340 cum->regno += 1;
7341 }
7342 }
7343
7344 /* Update the data in CUM to advance over an argument of mode MODE and
7345 data type TYPE. (TYPE is null for libcalls where that information
7346 may not be available.) */
7347
7348 static void
7349 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7350 const_tree type, bool named)
7351 {
7352 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7353 HOST_WIDE_INT bytes, words;
7354
7355 if (mode == BLKmode)
7356 bytes = int_size_in_bytes (type);
7357 else
7358 bytes = GET_MODE_SIZE (mode);
7359 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7360
7361 if (type)
7362 mode = type_natural_mode (type, NULL, false);
7363
7364 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7365 function_arg_advance_ms_64 (cum, bytes, words);
7366 else if (TARGET_64BIT)
7367 function_arg_advance_64 (cum, mode, type, words, named);
7368 else
7369 function_arg_advance_32 (cum, mode, type, bytes, words);
7370 }
7371
7372 /* Define where to put the arguments to a function.
7373 Value is zero to push the argument on the stack,
7374 or a hard register in which to store the argument.
7375
7376 MODE is the argument's machine mode.
7377 TYPE is the data type of the argument (as a tree).
7378 This is null for libcalls where that information may
7379 not be available.
7380 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7381 the preceding args and about the function being called.
7382 NAMED is nonzero if this argument is a named parameter
7383 (otherwise it is an extra parameter matching an ellipsis). */
7384
7385 static rtx
7386 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7387 enum machine_mode orig_mode, const_tree type,
7388 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7389 {
7390 /* Avoid the AL settings for the Unix64 ABI. */
7391 if (mode == VOIDmode)
7392 return constm1_rtx;
7393
7394 switch (mode)
7395 {
7396 default:
7397 break;
7398
7399 case BLKmode:
7400 if (bytes < 0)
7401 break;
7402 /* FALLTHRU */
7403 case DImode:
7404 case SImode:
7405 case HImode:
7406 case QImode:
7407 if (words <= cum->nregs)
7408 {
7409 int regno = cum->regno;
7410
7411 /* Fastcall allocates the first two DWORD (SImode) or
7412 smaller arguments to ECX and EDX if it isn't an
7413 aggregate type . */
7414 if (cum->fastcall)
7415 {
7416 if (mode == BLKmode
7417 || mode == DImode
7418 || (type && AGGREGATE_TYPE_P (type)))
7419 break;
7420
7421 /* ECX not EAX is the first allocated register. */
7422 if (regno == AX_REG)
7423 regno = CX_REG;
7424 }
7425 return gen_rtx_REG (mode, regno);
7426 }
7427 break;
7428
7429 case DFmode:
7430 if (cum->float_in_sse < 2)
7431 break;
7432 case SFmode:
7433 if (cum->float_in_sse < 1)
7434 break;
7435 /* FALLTHRU */
7436 case TImode:
7437 /* In 32bit, we pass TImode in xmm registers. */
7438 case V16QImode:
7439 case V8HImode:
7440 case V4SImode:
7441 case V2DImode:
7442 case V4SFmode:
7443 case V2DFmode:
7444 if (!type || !AGGREGATE_TYPE_P (type))
7445 {
7446 if (cum->sse_nregs)
7447 return gen_reg_or_parallel (mode, orig_mode,
7448 cum->sse_regno + FIRST_SSE_REG);
7449 }
7450 break;
7451
7452 case OImode:
7453 case XImode:
7454 /* OImode and XImode shouldn't be used directly. */
7455 gcc_unreachable ();
7456
7457 case V64QImode:
7458 case V32HImode:
7459 case V16SImode:
7460 case V8DImode:
7461 case V16SFmode:
7462 case V8DFmode:
7463 case V8SFmode:
7464 case V8SImode:
7465 case V32QImode:
7466 case V16HImode:
7467 case V4DFmode:
7468 case V4DImode:
7469 if (!type || !AGGREGATE_TYPE_P (type))
7470 {
7471 if (cum->sse_nregs)
7472 return gen_reg_or_parallel (mode, orig_mode,
7473 cum->sse_regno + FIRST_SSE_REG);
7474 }
7475 break;
7476
7477 case V8QImode:
7478 case V4HImode:
7479 case V2SImode:
7480 case V2SFmode:
7481 case V1TImode:
7482 case V1DImode:
7483 if (!type || !AGGREGATE_TYPE_P (type))
7484 {
7485 if (cum->mmx_nregs)
7486 return gen_reg_or_parallel (mode, orig_mode,
7487 cum->mmx_regno + FIRST_MMX_REG);
7488 }
7489 break;
7490 }
7491
7492 return NULL_RTX;
7493 }
7494
7495 static rtx
7496 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7497 enum machine_mode orig_mode, const_tree type, bool named)
7498 {
7499 /* Handle a hidden AL argument containing number of registers
7500 for varargs x86-64 functions. */
7501 if (mode == VOIDmode)
7502 return GEN_INT (cum->maybe_vaarg
7503 ? (cum->sse_nregs < 0
7504 ? X86_64_SSE_REGPARM_MAX
7505 : cum->sse_regno)
7506 : -1);
7507
7508 switch (mode)
7509 {
7510 default:
7511 break;
7512
7513 case V8SFmode:
7514 case V8SImode:
7515 case V32QImode:
7516 case V16HImode:
7517 case V4DFmode:
7518 case V4DImode:
7519 case V16SFmode:
7520 case V16SImode:
7521 case V64QImode:
7522 case V32HImode:
7523 case V8DFmode:
7524 case V8DImode:
7525 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7526 if (!named)
7527 return NULL;
7528 break;
7529 }
7530
7531 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7532 cum->sse_nregs,
7533 &x86_64_int_parameter_registers [cum->regno],
7534 cum->sse_regno);
7535 }
7536
7537 static rtx
7538 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7539 enum machine_mode orig_mode, bool named,
7540 HOST_WIDE_INT bytes)
7541 {
7542 unsigned int regno;
7543
7544 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7545 We use value of -2 to specify that current function call is MSABI. */
7546 if (mode == VOIDmode)
7547 return GEN_INT (-2);
7548
7549 /* If we've run out of registers, it goes on the stack. */
7550 if (cum->nregs == 0)
7551 return NULL_RTX;
7552
7553 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7554
7555 /* Only floating point modes are passed in anything but integer regs. */
7556 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7557 {
7558 if (named)
7559 regno = cum->regno + FIRST_SSE_REG;
7560 else
7561 {
7562 rtx t1, t2;
7563
7564 /* Unnamed floating parameters are passed in both the
7565 SSE and integer registers. */
7566 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7567 t2 = gen_rtx_REG (mode, regno);
7568 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7569 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7570 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7571 }
7572 }
7573 /* Handle aggregated types passed in register. */
7574 if (orig_mode == BLKmode)
7575 {
7576 if (bytes > 0 && bytes <= 8)
7577 mode = (bytes > 4 ? DImode : SImode);
7578 if (mode == BLKmode)
7579 mode = DImode;
7580 }
7581
7582 return gen_reg_or_parallel (mode, orig_mode, regno);
7583 }
7584
7585 /* Return where to put the arguments to a function.
7586 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7587
7588 MODE is the argument's machine mode. TYPE is the data type of the
7589 argument. It is null for libcalls where that information may not be
7590 available. CUM gives information about the preceding args and about
7591 the function being called. NAMED is nonzero if this argument is a
7592 named parameter (otherwise it is an extra parameter matching an
7593 ellipsis). */
7594
7595 static rtx
7596 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7597 const_tree type, bool named)
7598 {
7599 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7600 enum machine_mode mode = omode;
7601 HOST_WIDE_INT bytes, words;
7602 rtx arg;
7603
7604 if (mode == BLKmode)
7605 bytes = int_size_in_bytes (type);
7606 else
7607 bytes = GET_MODE_SIZE (mode);
7608 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7609
7610 /* To simplify the code below, represent vector types with a vector mode
7611 even if MMX/SSE are not active. */
7612 if (type && TREE_CODE (type) == VECTOR_TYPE)
7613 mode = type_natural_mode (type, cum, false);
7614
7615 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7616 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7617 else if (TARGET_64BIT)
7618 arg = function_arg_64 (cum, mode, omode, type, named);
7619 else
7620 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7621
7622 return arg;
7623 }
7624
7625 /* A C expression that indicates when an argument must be passed by
7626 reference. If nonzero for an argument, a copy of that argument is
7627 made in memory and a pointer to the argument is passed instead of
7628 the argument itself. The pointer is passed in whatever way is
7629 appropriate for passing a pointer to that type. */
7630
7631 static bool
7632 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7633 const_tree type, bool)
7634 {
7635 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7636
7637 /* See Windows x64 Software Convention. */
7638 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7639 {
7640 int msize = (int) GET_MODE_SIZE (mode);
7641 if (type)
7642 {
7643 /* Arrays are passed by reference. */
7644 if (TREE_CODE (type) == ARRAY_TYPE)
7645 return true;
7646
7647 if (AGGREGATE_TYPE_P (type))
7648 {
7649 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7650 are passed by reference. */
7651 msize = int_size_in_bytes (type);
7652 }
7653 }
7654
7655 /* __m128 is passed by reference. */
7656 switch (msize) {
7657 case 1: case 2: case 4: case 8:
7658 break;
7659 default:
7660 return true;
7661 }
7662 }
7663 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7664 return 1;
7665
7666 return 0;
7667 }
7668
7669 /* Return true when TYPE should be 128bit aligned for 32bit argument
7670 passing ABI. XXX: This function is obsolete and is only used for
7671 checking psABI compatibility with previous versions of GCC. */
7672
7673 static bool
7674 ix86_compat_aligned_value_p (const_tree type)
7675 {
7676 enum machine_mode mode = TYPE_MODE (type);
7677 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7678 || mode == TDmode
7679 || mode == TFmode
7680 || mode == TCmode)
7681 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7682 return true;
7683 if (TYPE_ALIGN (type) < 128)
7684 return false;
7685
7686 if (AGGREGATE_TYPE_P (type))
7687 {
7688 /* Walk the aggregates recursively. */
7689 switch (TREE_CODE (type))
7690 {
7691 case RECORD_TYPE:
7692 case UNION_TYPE:
7693 case QUAL_UNION_TYPE:
7694 {
7695 tree field;
7696
7697 /* Walk all the structure fields. */
7698 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7699 {
7700 if (TREE_CODE (field) == FIELD_DECL
7701 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7702 return true;
7703 }
7704 break;
7705 }
7706
7707 case ARRAY_TYPE:
7708 /* Just for use if some languages passes arrays by value. */
7709 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7710 return true;
7711 break;
7712
7713 default:
7714 gcc_unreachable ();
7715 }
7716 }
7717 return false;
7718 }
7719
7720 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7721 XXX: This function is obsolete and is only used for checking psABI
7722 compatibility with previous versions of GCC. */
7723
7724 static unsigned int
7725 ix86_compat_function_arg_boundary (enum machine_mode mode,
7726 const_tree type, unsigned int align)
7727 {
7728 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7729 natural boundaries. */
7730 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7731 {
7732 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7733 make an exception for SSE modes since these require 128bit
7734 alignment.
7735
7736 The handling here differs from field_alignment. ICC aligns MMX
7737 arguments to 4 byte boundaries, while structure fields are aligned
7738 to 8 byte boundaries. */
7739 if (!type)
7740 {
7741 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7742 align = PARM_BOUNDARY;
7743 }
7744 else
7745 {
7746 if (!ix86_compat_aligned_value_p (type))
7747 align = PARM_BOUNDARY;
7748 }
7749 }
7750 if (align > BIGGEST_ALIGNMENT)
7751 align = BIGGEST_ALIGNMENT;
7752 return align;
7753 }
7754
7755 /* Return true when TYPE should be 128bit aligned for 32bit argument
7756 passing ABI. */
7757
7758 static bool
7759 ix86_contains_aligned_value_p (const_tree type)
7760 {
7761 enum machine_mode mode = TYPE_MODE (type);
7762
7763 if (mode == XFmode || mode == XCmode)
7764 return false;
7765
7766 if (TYPE_ALIGN (type) < 128)
7767 return false;
7768
7769 if (AGGREGATE_TYPE_P (type))
7770 {
7771 /* Walk the aggregates recursively. */
7772 switch (TREE_CODE (type))
7773 {
7774 case RECORD_TYPE:
7775 case UNION_TYPE:
7776 case QUAL_UNION_TYPE:
7777 {
7778 tree field;
7779
7780 /* Walk all the structure fields. */
7781 for (field = TYPE_FIELDS (type);
7782 field;
7783 field = DECL_CHAIN (field))
7784 {
7785 if (TREE_CODE (field) == FIELD_DECL
7786 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7787 return true;
7788 }
7789 break;
7790 }
7791
7792 case ARRAY_TYPE:
7793 /* Just for use if some languages passes arrays by value. */
7794 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7795 return true;
7796 break;
7797
7798 default:
7799 gcc_unreachable ();
7800 }
7801 }
7802 else
7803 return TYPE_ALIGN (type) >= 128;
7804
7805 return false;
7806 }
7807
7808 /* Gives the alignment boundary, in bits, of an argument with the
7809 specified mode and type. */
7810
7811 static unsigned int
7812 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7813 {
7814 unsigned int align;
7815 if (type)
7816 {
7817 /* Since the main variant type is used for call, we convert it to
7818 the main variant type. */
7819 type = TYPE_MAIN_VARIANT (type);
7820 align = TYPE_ALIGN (type);
7821 }
7822 else
7823 align = GET_MODE_ALIGNMENT (mode);
7824 if (align < PARM_BOUNDARY)
7825 align = PARM_BOUNDARY;
7826 else
7827 {
7828 static bool warned;
7829 unsigned int saved_align = align;
7830
7831 if (!TARGET_64BIT)
7832 {
7833 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7834 if (!type)
7835 {
7836 if (mode == XFmode || mode == XCmode)
7837 align = PARM_BOUNDARY;
7838 }
7839 else if (!ix86_contains_aligned_value_p (type))
7840 align = PARM_BOUNDARY;
7841
7842 if (align < 128)
7843 align = PARM_BOUNDARY;
7844 }
7845
7846 if (warn_psabi
7847 && !warned
7848 && align != ix86_compat_function_arg_boundary (mode, type,
7849 saved_align))
7850 {
7851 warned = true;
7852 inform (input_location,
7853 "The ABI for passing parameters with %d-byte"
7854 " alignment has changed in GCC 4.6",
7855 align / BITS_PER_UNIT);
7856 }
7857 }
7858
7859 return align;
7860 }
7861
7862 /* Return true if N is a possible register number of function value. */
7863
7864 static bool
7865 ix86_function_value_regno_p (const unsigned int regno)
7866 {
7867 switch (regno)
7868 {
7869 case AX_REG:
7870 return true;
7871 case DX_REG:
7872 return (!TARGET_64BIT || ix86_abi != MS_ABI);
7873 case DI_REG:
7874 case SI_REG:
7875 return TARGET_64BIT && ix86_abi != MS_ABI;
7876
7877 /* Complex values are returned in %st(0)/%st(1) pair. */
7878 case ST0_REG:
7879 case ST1_REG:
7880 /* TODO: The function should depend on current function ABI but
7881 builtins.c would need updating then. Therefore we use the
7882 default ABI. */
7883 if (TARGET_64BIT && ix86_abi == MS_ABI)
7884 return false;
7885 return TARGET_FLOAT_RETURNS_IN_80387;
7886
7887 /* Complex values are returned in %xmm0/%xmm1 pair. */
7888 case XMM0_REG:
7889 case XMM1_REG:
7890 return TARGET_SSE;
7891
7892 case MM0_REG:
7893 if (TARGET_MACHO || TARGET_64BIT)
7894 return false;
7895 return TARGET_MMX;
7896 }
7897
7898 return false;
7899 }
7900
7901 /* Define how to find the value returned by a function.
7902 VALTYPE is the data type of the value (as a tree).
7903 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7904 otherwise, FUNC is 0. */
7905
7906 static rtx
7907 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7908 const_tree fntype, const_tree fn)
7909 {
7910 unsigned int regno;
7911
7912 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7913 we normally prevent this case when mmx is not available. However
7914 some ABIs may require the result to be returned like DImode. */
7915 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7916 regno = FIRST_MMX_REG;
7917
7918 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7919 we prevent this case when sse is not available. However some ABIs
7920 may require the result to be returned like integer TImode. */
7921 else if (mode == TImode
7922 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7923 regno = FIRST_SSE_REG;
7924
7925 /* 32-byte vector modes in %ymm0. */
7926 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7927 regno = FIRST_SSE_REG;
7928
7929 /* 64-byte vector modes in %zmm0. */
7930 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7931 regno = FIRST_SSE_REG;
7932
7933 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7934 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7935 regno = FIRST_FLOAT_REG;
7936 else
7937 /* Most things go in %eax. */
7938 regno = AX_REG;
7939
7940 /* Override FP return register with %xmm0 for local functions when
7941 SSE math is enabled or for functions with sseregparm attribute. */
7942 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7943 {
7944 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7945 if ((sse_level >= 1 && mode == SFmode)
7946 || (sse_level == 2 && mode == DFmode))
7947 regno = FIRST_SSE_REG;
7948 }
7949
7950 /* OImode shouldn't be used directly. */
7951 gcc_assert (mode != OImode);
7952
7953 return gen_rtx_REG (orig_mode, regno);
7954 }
7955
7956 static rtx
7957 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7958 const_tree valtype)
7959 {
7960 rtx ret;
7961
7962 /* Handle libcalls, which don't provide a type node. */
7963 if (valtype == NULL)
7964 {
7965 unsigned int regno;
7966
7967 switch (mode)
7968 {
7969 case SFmode:
7970 case SCmode:
7971 case DFmode:
7972 case DCmode:
7973 case TFmode:
7974 case SDmode:
7975 case DDmode:
7976 case TDmode:
7977 regno = FIRST_SSE_REG;
7978 break;
7979 case XFmode:
7980 case XCmode:
7981 regno = FIRST_FLOAT_REG;
7982 break;
7983 case TCmode:
7984 return NULL;
7985 default:
7986 regno = AX_REG;
7987 }
7988
7989 return gen_rtx_REG (mode, regno);
7990 }
7991 else if (POINTER_TYPE_P (valtype))
7992 {
7993 /* Pointers are always returned in word_mode. */
7994 mode = word_mode;
7995 }
7996
7997 ret = construct_container (mode, orig_mode, valtype, 1,
7998 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7999 x86_64_int_return_registers, 0);
8000
8001 /* For zero sized structures, construct_container returns NULL, but we
8002 need to keep rest of compiler happy by returning meaningful value. */
8003 if (!ret)
8004 ret = gen_rtx_REG (orig_mode, AX_REG);
8005
8006 return ret;
8007 }
8008
8009 static rtx
8010 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
8011 const_tree valtype)
8012 {
8013 unsigned int regno = AX_REG;
8014
8015 if (TARGET_SSE)
8016 {
8017 switch (GET_MODE_SIZE (mode))
8018 {
8019 case 16:
8020 if (valtype != NULL_TREE
8021 && !VECTOR_INTEGER_TYPE_P (valtype)
8022 && !VECTOR_INTEGER_TYPE_P (valtype)
8023 && !INTEGRAL_TYPE_P (valtype)
8024 && !VECTOR_FLOAT_TYPE_P (valtype))
8025 break;
8026 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8027 && !COMPLEX_MODE_P (mode))
8028 regno = FIRST_SSE_REG;
8029 break;
8030 case 8:
8031 case 4:
8032 if (mode == SFmode || mode == DFmode)
8033 regno = FIRST_SSE_REG;
8034 break;
8035 default:
8036 break;
8037 }
8038 }
8039 return gen_rtx_REG (orig_mode, regno);
8040 }
8041
8042 static rtx
8043 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8044 enum machine_mode orig_mode, enum machine_mode mode)
8045 {
8046 const_tree fn, fntype;
8047
8048 fn = NULL_TREE;
8049 if (fntype_or_decl && DECL_P (fntype_or_decl))
8050 fn = fntype_or_decl;
8051 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8052
8053 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8054 return function_value_ms_64 (orig_mode, mode, valtype);
8055 else if (TARGET_64BIT)
8056 return function_value_64 (orig_mode, mode, valtype);
8057 else
8058 return function_value_32 (orig_mode, mode, fntype, fn);
8059 }
8060
8061 static rtx
8062 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8063 {
8064 enum machine_mode mode, orig_mode;
8065
8066 orig_mode = TYPE_MODE (valtype);
8067 mode = type_natural_mode (valtype, NULL, true);
8068 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8069 }
8070
8071 /* Pointer function arguments and return values are promoted to
8072 word_mode. */
8073
8074 static enum machine_mode
8075 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
8076 int *punsignedp, const_tree fntype,
8077 int for_return)
8078 {
8079 if (type != NULL_TREE && POINTER_TYPE_P (type))
8080 {
8081 *punsignedp = POINTERS_EXTEND_UNSIGNED;
8082 return word_mode;
8083 }
8084 return default_promote_function_mode (type, mode, punsignedp, fntype,
8085 for_return);
8086 }
8087
8088 /* Return true if a structure, union or array with MODE containing FIELD
8089 should be accessed using BLKmode. */
8090
8091 static bool
8092 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
8093 {
8094 /* Union with XFmode must be in BLKmode. */
8095 return (mode == XFmode
8096 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8097 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8098 }
8099
8100 rtx
8101 ix86_libcall_value (enum machine_mode mode)
8102 {
8103 return ix86_function_value_1 (NULL, NULL, mode, mode);
8104 }
8105
8106 /* Return true iff type is returned in memory. */
8107
8108 static bool
8109 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8110 {
8111 #ifdef SUBTARGET_RETURN_IN_MEMORY
8112 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8113 #else
8114 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8115 HOST_WIDE_INT size;
8116
8117 if (TARGET_64BIT)
8118 {
8119 if (ix86_function_type_abi (fntype) == MS_ABI)
8120 {
8121 size = int_size_in_bytes (type);
8122
8123 /* __m128 is returned in xmm0. */
8124 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8125 || INTEGRAL_TYPE_P (type)
8126 || VECTOR_FLOAT_TYPE_P (type))
8127 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8128 && !COMPLEX_MODE_P (mode)
8129 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8130 return false;
8131
8132 /* Otherwise, the size must be exactly in [1248]. */
8133 return size != 1 && size != 2 && size != 4 && size != 8;
8134 }
8135 else
8136 {
8137 int needed_intregs, needed_sseregs;
8138
8139 return examine_argument (mode, type, 1,
8140 &needed_intregs, &needed_sseregs);
8141 }
8142 }
8143 else
8144 {
8145 if (mode == BLKmode)
8146 return true;
8147
8148 size = int_size_in_bytes (type);
8149
8150 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8151 return false;
8152
8153 if (VECTOR_MODE_P (mode) || mode == TImode)
8154 {
8155 /* User-created vectors small enough to fit in EAX. */
8156 if (size < 8)
8157 return false;
8158
8159 /* Unless ABI prescibes otherwise,
8160 MMX/3dNow values are returned in MM0 if available. */
8161
8162 if (size == 8)
8163 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8164
8165 /* SSE values are returned in XMM0 if available. */
8166 if (size == 16)
8167 return !TARGET_SSE;
8168
8169 /* AVX values are returned in YMM0 if available. */
8170 if (size == 32)
8171 return !TARGET_AVX;
8172
8173 /* AVX512F values are returned in ZMM0 if available. */
8174 if (size == 64)
8175 return !TARGET_AVX512F;
8176 }
8177
8178 if (mode == XFmode)
8179 return false;
8180
8181 if (size > 12)
8182 return true;
8183
8184 /* OImode shouldn't be used directly. */
8185 gcc_assert (mode != OImode);
8186
8187 return false;
8188 }
8189 #endif
8190 }
8191
8192 \f
8193 /* Create the va_list data type. */
8194
8195 /* Returns the calling convention specific va_list date type.
8196 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8197
8198 static tree
8199 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8200 {
8201 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8202
8203 /* For i386 we use plain pointer to argument area. */
8204 if (!TARGET_64BIT || abi == MS_ABI)
8205 return build_pointer_type (char_type_node);
8206
8207 record = lang_hooks.types.make_type (RECORD_TYPE);
8208 type_decl = build_decl (BUILTINS_LOCATION,
8209 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8210
8211 f_gpr = build_decl (BUILTINS_LOCATION,
8212 FIELD_DECL, get_identifier ("gp_offset"),
8213 unsigned_type_node);
8214 f_fpr = build_decl (BUILTINS_LOCATION,
8215 FIELD_DECL, get_identifier ("fp_offset"),
8216 unsigned_type_node);
8217 f_ovf = build_decl (BUILTINS_LOCATION,
8218 FIELD_DECL, get_identifier ("overflow_arg_area"),
8219 ptr_type_node);
8220 f_sav = build_decl (BUILTINS_LOCATION,
8221 FIELD_DECL, get_identifier ("reg_save_area"),
8222 ptr_type_node);
8223
8224 va_list_gpr_counter_field = f_gpr;
8225 va_list_fpr_counter_field = f_fpr;
8226
8227 DECL_FIELD_CONTEXT (f_gpr) = record;
8228 DECL_FIELD_CONTEXT (f_fpr) = record;
8229 DECL_FIELD_CONTEXT (f_ovf) = record;
8230 DECL_FIELD_CONTEXT (f_sav) = record;
8231
8232 TYPE_STUB_DECL (record) = type_decl;
8233 TYPE_NAME (record) = type_decl;
8234 TYPE_FIELDS (record) = f_gpr;
8235 DECL_CHAIN (f_gpr) = f_fpr;
8236 DECL_CHAIN (f_fpr) = f_ovf;
8237 DECL_CHAIN (f_ovf) = f_sav;
8238
8239 layout_type (record);
8240
8241 /* The correct type is an array type of one element. */
8242 return build_array_type (record, build_index_type (size_zero_node));
8243 }
8244
8245 /* Setup the builtin va_list data type and for 64-bit the additional
8246 calling convention specific va_list data types. */
8247
8248 static tree
8249 ix86_build_builtin_va_list (void)
8250 {
8251 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8252
8253 /* Initialize abi specific va_list builtin types. */
8254 if (TARGET_64BIT)
8255 {
8256 tree t;
8257 if (ix86_abi == MS_ABI)
8258 {
8259 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8260 if (TREE_CODE (t) != RECORD_TYPE)
8261 t = build_variant_type_copy (t);
8262 sysv_va_list_type_node = t;
8263 }
8264 else
8265 {
8266 t = ret;
8267 if (TREE_CODE (t) != RECORD_TYPE)
8268 t = build_variant_type_copy (t);
8269 sysv_va_list_type_node = t;
8270 }
8271 if (ix86_abi != MS_ABI)
8272 {
8273 t = ix86_build_builtin_va_list_abi (MS_ABI);
8274 if (TREE_CODE (t) != RECORD_TYPE)
8275 t = build_variant_type_copy (t);
8276 ms_va_list_type_node = t;
8277 }
8278 else
8279 {
8280 t = ret;
8281 if (TREE_CODE (t) != RECORD_TYPE)
8282 t = build_variant_type_copy (t);
8283 ms_va_list_type_node = t;
8284 }
8285 }
8286
8287 return ret;
8288 }
8289
8290 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8291
8292 static void
8293 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8294 {
8295 rtx save_area, mem;
8296 alias_set_type set;
8297 int i, max;
8298
8299 /* GPR size of varargs save area. */
8300 if (cfun->va_list_gpr_size)
8301 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8302 else
8303 ix86_varargs_gpr_size = 0;
8304
8305 /* FPR size of varargs save area. We don't need it if we don't pass
8306 anything in SSE registers. */
8307 if (TARGET_SSE && cfun->va_list_fpr_size)
8308 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8309 else
8310 ix86_varargs_fpr_size = 0;
8311
8312 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8313 return;
8314
8315 save_area = frame_pointer_rtx;
8316 set = get_varargs_alias_set ();
8317
8318 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8319 if (max > X86_64_REGPARM_MAX)
8320 max = X86_64_REGPARM_MAX;
8321
8322 for (i = cum->regno; i < max; i++)
8323 {
8324 mem = gen_rtx_MEM (word_mode,
8325 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8326 MEM_NOTRAP_P (mem) = 1;
8327 set_mem_alias_set (mem, set);
8328 emit_move_insn (mem,
8329 gen_rtx_REG (word_mode,
8330 x86_64_int_parameter_registers[i]));
8331 }
8332
8333 if (ix86_varargs_fpr_size)
8334 {
8335 enum machine_mode smode;
8336 rtx_code_label *label;
8337 rtx test;
8338
8339 /* Now emit code to save SSE registers. The AX parameter contains number
8340 of SSE parameter registers used to call this function, though all we
8341 actually check here is the zero/non-zero status. */
8342
8343 label = gen_label_rtx ();
8344 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8345 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8346 label));
8347
8348 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8349 we used movdqa (i.e. TImode) instead? Perhaps even better would
8350 be if we could determine the real mode of the data, via a hook
8351 into pass_stdarg. Ignore all that for now. */
8352 smode = V4SFmode;
8353 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8354 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8355
8356 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8357 if (max > X86_64_SSE_REGPARM_MAX)
8358 max = X86_64_SSE_REGPARM_MAX;
8359
8360 for (i = cum->sse_regno; i < max; ++i)
8361 {
8362 mem = plus_constant (Pmode, save_area,
8363 i * 16 + ix86_varargs_gpr_size);
8364 mem = gen_rtx_MEM (smode, mem);
8365 MEM_NOTRAP_P (mem) = 1;
8366 set_mem_alias_set (mem, set);
8367 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8368
8369 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8370 }
8371
8372 emit_label (label);
8373 }
8374 }
8375
8376 static void
8377 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8378 {
8379 alias_set_type set = get_varargs_alias_set ();
8380 int i;
8381
8382 /* Reset to zero, as there might be a sysv vaarg used
8383 before. */
8384 ix86_varargs_gpr_size = 0;
8385 ix86_varargs_fpr_size = 0;
8386
8387 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8388 {
8389 rtx reg, mem;
8390
8391 mem = gen_rtx_MEM (Pmode,
8392 plus_constant (Pmode, virtual_incoming_args_rtx,
8393 i * UNITS_PER_WORD));
8394 MEM_NOTRAP_P (mem) = 1;
8395 set_mem_alias_set (mem, set);
8396
8397 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8398 emit_move_insn (mem, reg);
8399 }
8400 }
8401
8402 static void
8403 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8404 tree type, int *, int no_rtl)
8405 {
8406 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8407 CUMULATIVE_ARGS next_cum;
8408 tree fntype;
8409
8410 /* This argument doesn't appear to be used anymore. Which is good,
8411 because the old code here didn't suppress rtl generation. */
8412 gcc_assert (!no_rtl);
8413
8414 if (!TARGET_64BIT)
8415 return;
8416
8417 fntype = TREE_TYPE (current_function_decl);
8418
8419 /* For varargs, we do not want to skip the dummy va_dcl argument.
8420 For stdargs, we do want to skip the last named argument. */
8421 next_cum = *cum;
8422 if (stdarg_p (fntype))
8423 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8424 true);
8425
8426 if (cum->call_abi == MS_ABI)
8427 setup_incoming_varargs_ms_64 (&next_cum);
8428 else
8429 setup_incoming_varargs_64 (&next_cum);
8430 }
8431
8432 /* Checks if TYPE is of kind va_list char *. */
8433
8434 static bool
8435 is_va_list_char_pointer (tree type)
8436 {
8437 tree canonic;
8438
8439 /* For 32-bit it is always true. */
8440 if (!TARGET_64BIT)
8441 return true;
8442 canonic = ix86_canonical_va_list_type (type);
8443 return (canonic == ms_va_list_type_node
8444 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8445 }
8446
8447 /* Implement va_start. */
8448
8449 static void
8450 ix86_va_start (tree valist, rtx nextarg)
8451 {
8452 HOST_WIDE_INT words, n_gpr, n_fpr;
8453 tree f_gpr, f_fpr, f_ovf, f_sav;
8454 tree gpr, fpr, ovf, sav, t;
8455 tree type;
8456 rtx ovf_rtx;
8457
8458 if (flag_split_stack
8459 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8460 {
8461 unsigned int scratch_regno;
8462
8463 /* When we are splitting the stack, we can't refer to the stack
8464 arguments using internal_arg_pointer, because they may be on
8465 the old stack. The split stack prologue will arrange to
8466 leave a pointer to the old stack arguments in a scratch
8467 register, which we here copy to a pseudo-register. The split
8468 stack prologue can't set the pseudo-register directly because
8469 it (the prologue) runs before any registers have been saved. */
8470
8471 scratch_regno = split_stack_prologue_scratch_regno ();
8472 if (scratch_regno != INVALID_REGNUM)
8473 {
8474 rtx reg;
8475 rtx_insn *seq;
8476
8477 reg = gen_reg_rtx (Pmode);
8478 cfun->machine->split_stack_varargs_pointer = reg;
8479
8480 start_sequence ();
8481 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8482 seq = get_insns ();
8483 end_sequence ();
8484
8485 push_topmost_sequence ();
8486 emit_insn_after (seq, entry_of_function ());
8487 pop_topmost_sequence ();
8488 }
8489 }
8490
8491 /* Only 64bit target needs something special. */
8492 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8493 {
8494 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8495 std_expand_builtin_va_start (valist, nextarg);
8496 else
8497 {
8498 rtx va_r, next;
8499
8500 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8501 next = expand_binop (ptr_mode, add_optab,
8502 cfun->machine->split_stack_varargs_pointer,
8503 crtl->args.arg_offset_rtx,
8504 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8505 convert_move (va_r, next, 0);
8506 }
8507 return;
8508 }
8509
8510 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8511 f_fpr = DECL_CHAIN (f_gpr);
8512 f_ovf = DECL_CHAIN (f_fpr);
8513 f_sav = DECL_CHAIN (f_ovf);
8514
8515 valist = build_simple_mem_ref (valist);
8516 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8517 /* The following should be folded into the MEM_REF offset. */
8518 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8519 f_gpr, NULL_TREE);
8520 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8521 f_fpr, NULL_TREE);
8522 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8523 f_ovf, NULL_TREE);
8524 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8525 f_sav, NULL_TREE);
8526
8527 /* Count number of gp and fp argument registers used. */
8528 words = crtl->args.info.words;
8529 n_gpr = crtl->args.info.regno;
8530 n_fpr = crtl->args.info.sse_regno;
8531
8532 if (cfun->va_list_gpr_size)
8533 {
8534 type = TREE_TYPE (gpr);
8535 t = build2 (MODIFY_EXPR, type,
8536 gpr, build_int_cst (type, n_gpr * 8));
8537 TREE_SIDE_EFFECTS (t) = 1;
8538 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8539 }
8540
8541 if (TARGET_SSE && cfun->va_list_fpr_size)
8542 {
8543 type = TREE_TYPE (fpr);
8544 t = build2 (MODIFY_EXPR, type, fpr,
8545 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8546 TREE_SIDE_EFFECTS (t) = 1;
8547 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8548 }
8549
8550 /* Find the overflow area. */
8551 type = TREE_TYPE (ovf);
8552 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8553 ovf_rtx = crtl->args.internal_arg_pointer;
8554 else
8555 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8556 t = make_tree (type, ovf_rtx);
8557 if (words != 0)
8558 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8559 t = build2 (MODIFY_EXPR, type, ovf, t);
8560 TREE_SIDE_EFFECTS (t) = 1;
8561 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8562
8563 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8564 {
8565 /* Find the register save area.
8566 Prologue of the function save it right above stack frame. */
8567 type = TREE_TYPE (sav);
8568 t = make_tree (type, frame_pointer_rtx);
8569 if (!ix86_varargs_gpr_size)
8570 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8571 t = build2 (MODIFY_EXPR, type, sav, t);
8572 TREE_SIDE_EFFECTS (t) = 1;
8573 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8574 }
8575 }
8576
8577 /* Implement va_arg. */
8578
8579 static tree
8580 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8581 gimple_seq *post_p)
8582 {
8583 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8584 tree f_gpr, f_fpr, f_ovf, f_sav;
8585 tree gpr, fpr, ovf, sav, t;
8586 int size, rsize;
8587 tree lab_false, lab_over = NULL_TREE;
8588 tree addr, t2;
8589 rtx container;
8590 int indirect_p = 0;
8591 tree ptrtype;
8592 enum machine_mode nat_mode;
8593 unsigned int arg_boundary;
8594
8595 /* Only 64bit target needs something special. */
8596 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8597 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8598
8599 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8600 f_fpr = DECL_CHAIN (f_gpr);
8601 f_ovf = DECL_CHAIN (f_fpr);
8602 f_sav = DECL_CHAIN (f_ovf);
8603
8604 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8605 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8606 valist = build_va_arg_indirect_ref (valist);
8607 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8608 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8609 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8610
8611 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8612 if (indirect_p)
8613 type = build_pointer_type (type);
8614 size = int_size_in_bytes (type);
8615 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8616
8617 nat_mode = type_natural_mode (type, NULL, false);
8618 switch (nat_mode)
8619 {
8620 case V8SFmode:
8621 case V8SImode:
8622 case V32QImode:
8623 case V16HImode:
8624 case V4DFmode:
8625 case V4DImode:
8626 case V16SFmode:
8627 case V16SImode:
8628 case V64QImode:
8629 case V32HImode:
8630 case V8DFmode:
8631 case V8DImode:
8632 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8633 if (!TARGET_64BIT_MS_ABI)
8634 {
8635 container = NULL;
8636 break;
8637 }
8638
8639 default:
8640 container = construct_container (nat_mode, TYPE_MODE (type),
8641 type, 0, X86_64_REGPARM_MAX,
8642 X86_64_SSE_REGPARM_MAX, intreg,
8643 0);
8644 break;
8645 }
8646
8647 /* Pull the value out of the saved registers. */
8648
8649 addr = create_tmp_var (ptr_type_node, "addr");
8650
8651 if (container)
8652 {
8653 int needed_intregs, needed_sseregs;
8654 bool need_temp;
8655 tree int_addr, sse_addr;
8656
8657 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8658 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8659
8660 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8661
8662 need_temp = (!REG_P (container)
8663 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8664 || TYPE_ALIGN (type) > 128));
8665
8666 /* In case we are passing structure, verify that it is consecutive block
8667 on the register save area. If not we need to do moves. */
8668 if (!need_temp && !REG_P (container))
8669 {
8670 /* Verify that all registers are strictly consecutive */
8671 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8672 {
8673 int i;
8674
8675 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8676 {
8677 rtx slot = XVECEXP (container, 0, i);
8678 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8679 || INTVAL (XEXP (slot, 1)) != i * 16)
8680 need_temp = 1;
8681 }
8682 }
8683 else
8684 {
8685 int i;
8686
8687 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8688 {
8689 rtx slot = XVECEXP (container, 0, i);
8690 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8691 || INTVAL (XEXP (slot, 1)) != i * 8)
8692 need_temp = 1;
8693 }
8694 }
8695 }
8696 if (!need_temp)
8697 {
8698 int_addr = addr;
8699 sse_addr = addr;
8700 }
8701 else
8702 {
8703 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8704 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8705 }
8706
8707 /* First ensure that we fit completely in registers. */
8708 if (needed_intregs)
8709 {
8710 t = build_int_cst (TREE_TYPE (gpr),
8711 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8712 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8713 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8714 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8715 gimplify_and_add (t, pre_p);
8716 }
8717 if (needed_sseregs)
8718 {
8719 t = build_int_cst (TREE_TYPE (fpr),
8720 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8721 + X86_64_REGPARM_MAX * 8);
8722 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8723 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8724 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8725 gimplify_and_add (t, pre_p);
8726 }
8727
8728 /* Compute index to start of area used for integer regs. */
8729 if (needed_intregs)
8730 {
8731 /* int_addr = gpr + sav; */
8732 t = fold_build_pointer_plus (sav, gpr);
8733 gimplify_assign (int_addr, t, pre_p);
8734 }
8735 if (needed_sseregs)
8736 {
8737 /* sse_addr = fpr + sav; */
8738 t = fold_build_pointer_plus (sav, fpr);
8739 gimplify_assign (sse_addr, t, pre_p);
8740 }
8741 if (need_temp)
8742 {
8743 int i, prev_size = 0;
8744 tree temp = create_tmp_var (type, "va_arg_tmp");
8745
8746 /* addr = &temp; */
8747 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8748 gimplify_assign (addr, t, pre_p);
8749
8750 for (i = 0; i < XVECLEN (container, 0); i++)
8751 {
8752 rtx slot = XVECEXP (container, 0, i);
8753 rtx reg = XEXP (slot, 0);
8754 enum machine_mode mode = GET_MODE (reg);
8755 tree piece_type;
8756 tree addr_type;
8757 tree daddr_type;
8758 tree src_addr, src;
8759 int src_offset;
8760 tree dest_addr, dest;
8761 int cur_size = GET_MODE_SIZE (mode);
8762
8763 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8764 prev_size = INTVAL (XEXP (slot, 1));
8765 if (prev_size + cur_size > size)
8766 {
8767 cur_size = size - prev_size;
8768 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8769 if (mode == BLKmode)
8770 mode = QImode;
8771 }
8772 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8773 if (mode == GET_MODE (reg))
8774 addr_type = build_pointer_type (piece_type);
8775 else
8776 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8777 true);
8778 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8779 true);
8780
8781 if (SSE_REGNO_P (REGNO (reg)))
8782 {
8783 src_addr = sse_addr;
8784 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8785 }
8786 else
8787 {
8788 src_addr = int_addr;
8789 src_offset = REGNO (reg) * 8;
8790 }
8791 src_addr = fold_convert (addr_type, src_addr);
8792 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8793
8794 dest_addr = fold_convert (daddr_type, addr);
8795 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8796 if (cur_size == GET_MODE_SIZE (mode))
8797 {
8798 src = build_va_arg_indirect_ref (src_addr);
8799 dest = build_va_arg_indirect_ref (dest_addr);
8800
8801 gimplify_assign (dest, src, pre_p);
8802 }
8803 else
8804 {
8805 tree copy
8806 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8807 3, dest_addr, src_addr,
8808 size_int (cur_size));
8809 gimplify_and_add (copy, pre_p);
8810 }
8811 prev_size += cur_size;
8812 }
8813 }
8814
8815 if (needed_intregs)
8816 {
8817 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8818 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8819 gimplify_assign (gpr, t, pre_p);
8820 }
8821
8822 if (needed_sseregs)
8823 {
8824 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8825 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8826 gimplify_assign (fpr, t, pre_p);
8827 }
8828
8829 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8830
8831 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8832 }
8833
8834 /* ... otherwise out of the overflow area. */
8835
8836 /* When we align parameter on stack for caller, if the parameter
8837 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8838 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8839 here with caller. */
8840 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8841 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8842 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8843
8844 /* Care for on-stack alignment if needed. */
8845 if (arg_boundary <= 64 || size == 0)
8846 t = ovf;
8847 else
8848 {
8849 HOST_WIDE_INT align = arg_boundary / 8;
8850 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8851 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8852 build_int_cst (TREE_TYPE (t), -align));
8853 }
8854
8855 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8856 gimplify_assign (addr, t, pre_p);
8857
8858 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8859 gimplify_assign (unshare_expr (ovf), t, pre_p);
8860
8861 if (container)
8862 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8863
8864 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8865 addr = fold_convert (ptrtype, addr);
8866
8867 if (indirect_p)
8868 addr = build_va_arg_indirect_ref (addr);
8869 return build_va_arg_indirect_ref (addr);
8870 }
8871 \f
8872 /* Return true if OPNUM's MEM should be matched
8873 in movabs* patterns. */
8874
8875 bool
8876 ix86_check_movabs (rtx insn, int opnum)
8877 {
8878 rtx set, mem;
8879
8880 set = PATTERN (insn);
8881 if (GET_CODE (set) == PARALLEL)
8882 set = XVECEXP (set, 0, 0);
8883 gcc_assert (GET_CODE (set) == SET);
8884 mem = XEXP (set, opnum);
8885 while (GET_CODE (mem) == SUBREG)
8886 mem = SUBREG_REG (mem);
8887 gcc_assert (MEM_P (mem));
8888 return volatile_ok || !MEM_VOLATILE_P (mem);
8889 }
8890 \f
8891 /* Initialize the table of extra 80387 mathematical constants. */
8892
8893 static void
8894 init_ext_80387_constants (void)
8895 {
8896 static const char * cst[5] =
8897 {
8898 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8899 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8900 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8901 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8902 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8903 };
8904 int i;
8905
8906 for (i = 0; i < 5; i++)
8907 {
8908 real_from_string (&ext_80387_constants_table[i], cst[i]);
8909 /* Ensure each constant is rounded to XFmode precision. */
8910 real_convert (&ext_80387_constants_table[i],
8911 XFmode, &ext_80387_constants_table[i]);
8912 }
8913
8914 ext_80387_constants_init = 1;
8915 }
8916
8917 /* Return non-zero if the constant is something that
8918 can be loaded with a special instruction. */
8919
8920 int
8921 standard_80387_constant_p (rtx x)
8922 {
8923 enum machine_mode mode = GET_MODE (x);
8924
8925 REAL_VALUE_TYPE r;
8926
8927 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8928 return -1;
8929
8930 if (x == CONST0_RTX (mode))
8931 return 1;
8932 if (x == CONST1_RTX (mode))
8933 return 2;
8934
8935 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8936
8937 /* For XFmode constants, try to find a special 80387 instruction when
8938 optimizing for size or on those CPUs that benefit from them. */
8939 if (mode == XFmode
8940 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8941 {
8942 int i;
8943
8944 if (! ext_80387_constants_init)
8945 init_ext_80387_constants ();
8946
8947 for (i = 0; i < 5; i++)
8948 if (real_identical (&r, &ext_80387_constants_table[i]))
8949 return i + 3;
8950 }
8951
8952 /* Load of the constant -0.0 or -1.0 will be split as
8953 fldz;fchs or fld1;fchs sequence. */
8954 if (real_isnegzero (&r))
8955 return 8;
8956 if (real_identical (&r, &dconstm1))
8957 return 9;
8958
8959 return 0;
8960 }
8961
8962 /* Return the opcode of the special instruction to be used to load
8963 the constant X. */
8964
8965 const char *
8966 standard_80387_constant_opcode (rtx x)
8967 {
8968 switch (standard_80387_constant_p (x))
8969 {
8970 case 1:
8971 return "fldz";
8972 case 2:
8973 return "fld1";
8974 case 3:
8975 return "fldlg2";
8976 case 4:
8977 return "fldln2";
8978 case 5:
8979 return "fldl2e";
8980 case 6:
8981 return "fldl2t";
8982 case 7:
8983 return "fldpi";
8984 case 8:
8985 case 9:
8986 return "#";
8987 default:
8988 gcc_unreachable ();
8989 }
8990 }
8991
8992 /* Return the CONST_DOUBLE representing the 80387 constant that is
8993 loaded by the specified special instruction. The argument IDX
8994 matches the return value from standard_80387_constant_p. */
8995
8996 rtx
8997 standard_80387_constant_rtx (int idx)
8998 {
8999 int i;
9000
9001 if (! ext_80387_constants_init)
9002 init_ext_80387_constants ();
9003
9004 switch (idx)
9005 {
9006 case 3:
9007 case 4:
9008 case 5:
9009 case 6:
9010 case 7:
9011 i = idx - 3;
9012 break;
9013
9014 default:
9015 gcc_unreachable ();
9016 }
9017
9018 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9019 XFmode);
9020 }
9021
9022 /* Return 1 if X is all 0s and 2 if x is all 1s
9023 in supported SSE/AVX vector mode. */
9024
9025 int
9026 standard_sse_constant_p (rtx x)
9027 {
9028 enum machine_mode mode = GET_MODE (x);
9029
9030 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9031 return 1;
9032 if (vector_all_ones_operand (x, mode))
9033 switch (mode)
9034 {
9035 case V16QImode:
9036 case V8HImode:
9037 case V4SImode:
9038 case V2DImode:
9039 if (TARGET_SSE2)
9040 return 2;
9041 case V32QImode:
9042 case V16HImode:
9043 case V8SImode:
9044 case V4DImode:
9045 if (TARGET_AVX2)
9046 return 2;
9047 case V64QImode:
9048 case V32HImode:
9049 case V16SImode:
9050 case V8DImode:
9051 if (TARGET_AVX512F)
9052 return 2;
9053 default:
9054 break;
9055 }
9056
9057 return 0;
9058 }
9059
9060 /* Return the opcode of the special instruction to be used to load
9061 the constant X. */
9062
9063 const char *
9064 standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9065 {
9066 switch (standard_sse_constant_p (x))
9067 {
9068 case 1:
9069 switch (get_attr_mode (insn))
9070 {
9071 case MODE_XI:
9072 return "vpxord\t%g0, %g0, %g0";
9073 case MODE_V16SF:
9074 return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9075 : "vpxord\t%g0, %g0, %g0";
9076 case MODE_V8DF:
9077 return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9078 : "vpxorq\t%g0, %g0, %g0";
9079 case MODE_TI:
9080 return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9081 : "%vpxor\t%0, %d0";
9082 case MODE_V2DF:
9083 return "%vxorpd\t%0, %d0";
9084 case MODE_V4SF:
9085 return "%vxorps\t%0, %d0";
9086
9087 case MODE_OI:
9088 return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9089 : "vpxor\t%x0, %x0, %x0";
9090 case MODE_V4DF:
9091 return "vxorpd\t%x0, %x0, %x0";
9092 case MODE_V8SF:
9093 return "vxorps\t%x0, %x0, %x0";
9094
9095 default:
9096 break;
9097 }
9098
9099 case 2:
9100 if (TARGET_AVX512VL
9101 || get_attr_mode (insn) == MODE_XI
9102 || get_attr_mode (insn) == MODE_V8DF
9103 || get_attr_mode (insn) == MODE_V16SF)
9104 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9105 if (TARGET_AVX)
9106 return "vpcmpeqd\t%0, %0, %0";
9107 else
9108 return "pcmpeqd\t%0, %0";
9109
9110 default:
9111 break;
9112 }
9113 gcc_unreachable ();
9114 }
9115
9116 /* Returns true if OP contains a symbol reference */
9117
9118 bool
9119 symbolic_reference_mentioned_p (rtx op)
9120 {
9121 const char *fmt;
9122 int i;
9123
9124 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9125 return true;
9126
9127 fmt = GET_RTX_FORMAT (GET_CODE (op));
9128 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9129 {
9130 if (fmt[i] == 'E')
9131 {
9132 int j;
9133
9134 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9135 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9136 return true;
9137 }
9138
9139 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9140 return true;
9141 }
9142
9143 return false;
9144 }
9145
9146 /* Return true if it is appropriate to emit `ret' instructions in the
9147 body of a function. Do this only if the epilogue is simple, needing a
9148 couple of insns. Prior to reloading, we can't tell how many registers
9149 must be saved, so return false then. Return false if there is no frame
9150 marker to de-allocate. */
9151
9152 bool
9153 ix86_can_use_return_insn_p (void)
9154 {
9155 struct ix86_frame frame;
9156
9157 if (! reload_completed || frame_pointer_needed)
9158 return 0;
9159
9160 /* Don't allow more than 32k pop, since that's all we can do
9161 with one instruction. */
9162 if (crtl->args.pops_args && crtl->args.size >= 32768)
9163 return 0;
9164
9165 ix86_compute_frame_layout (&frame);
9166 return (frame.stack_pointer_offset == UNITS_PER_WORD
9167 && (frame.nregs + frame.nsseregs) == 0);
9168 }
9169 \f
9170 /* Value should be nonzero if functions must have frame pointers.
9171 Zero means the frame pointer need not be set up (and parms may
9172 be accessed via the stack pointer) in functions that seem suitable. */
9173
9174 static bool
9175 ix86_frame_pointer_required (void)
9176 {
9177 /* If we accessed previous frames, then the generated code expects
9178 to be able to access the saved ebp value in our frame. */
9179 if (cfun->machine->accesses_prev_frame)
9180 return true;
9181
9182 /* Several x86 os'es need a frame pointer for other reasons,
9183 usually pertaining to setjmp. */
9184 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9185 return true;
9186
9187 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9188 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9189 return true;
9190
9191 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9192 allocation is 4GB. */
9193 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9194 return true;
9195
9196 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9197 turns off the frame pointer by default. Turn it back on now if
9198 we've not got a leaf function. */
9199 if (TARGET_OMIT_LEAF_FRAME_POINTER
9200 && (!crtl->is_leaf
9201 || ix86_current_function_calls_tls_descriptor))
9202 return true;
9203
9204 if (crtl->profile && !flag_fentry)
9205 return true;
9206
9207 return false;
9208 }
9209
9210 /* Record that the current function accesses previous call frames. */
9211
9212 void
9213 ix86_setup_frame_addresses (void)
9214 {
9215 cfun->machine->accesses_prev_frame = 1;
9216 }
9217 \f
9218 #ifndef USE_HIDDEN_LINKONCE
9219 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9220 # define USE_HIDDEN_LINKONCE 1
9221 # else
9222 # define USE_HIDDEN_LINKONCE 0
9223 # endif
9224 #endif
9225
9226 static int pic_labels_used;
9227
9228 /* Fills in the label name that should be used for a pc thunk for
9229 the given register. */
9230
9231 static void
9232 get_pc_thunk_name (char name[32], unsigned int regno)
9233 {
9234 gcc_assert (!TARGET_64BIT);
9235
9236 if (USE_HIDDEN_LINKONCE)
9237 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9238 else
9239 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9240 }
9241
9242
9243 /* This function generates code for -fpic that loads %ebx with
9244 the return address of the caller and then returns. */
9245
9246 static void
9247 ix86_code_end (void)
9248 {
9249 rtx xops[2];
9250 int regno;
9251
9252 for (regno = AX_REG; regno <= SP_REG; regno++)
9253 {
9254 char name[32];
9255 tree decl;
9256
9257 if (!(pic_labels_used & (1 << regno)))
9258 continue;
9259
9260 get_pc_thunk_name (name, regno);
9261
9262 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9263 get_identifier (name),
9264 build_function_type_list (void_type_node, NULL_TREE));
9265 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9266 NULL_TREE, void_type_node);
9267 TREE_PUBLIC (decl) = 1;
9268 TREE_STATIC (decl) = 1;
9269 DECL_IGNORED_P (decl) = 1;
9270
9271 #if TARGET_MACHO
9272 if (TARGET_MACHO)
9273 {
9274 switch_to_section (darwin_sections[text_coal_section]);
9275 fputs ("\t.weak_definition\t", asm_out_file);
9276 assemble_name (asm_out_file, name);
9277 fputs ("\n\t.private_extern\t", asm_out_file);
9278 assemble_name (asm_out_file, name);
9279 putc ('\n', asm_out_file);
9280 ASM_OUTPUT_LABEL (asm_out_file, name);
9281 DECL_WEAK (decl) = 1;
9282 }
9283 else
9284 #endif
9285 if (USE_HIDDEN_LINKONCE)
9286 {
9287 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9288
9289 targetm.asm_out.unique_section (decl, 0);
9290 switch_to_section (get_named_section (decl, NULL, 0));
9291
9292 targetm.asm_out.globalize_label (asm_out_file, name);
9293 fputs ("\t.hidden\t", asm_out_file);
9294 assemble_name (asm_out_file, name);
9295 putc ('\n', asm_out_file);
9296 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9297 }
9298 else
9299 {
9300 switch_to_section (text_section);
9301 ASM_OUTPUT_LABEL (asm_out_file, name);
9302 }
9303
9304 DECL_INITIAL (decl) = make_node (BLOCK);
9305 current_function_decl = decl;
9306 init_function_start (decl);
9307 first_function_block_is_cold = false;
9308 /* Make sure unwind info is emitted for the thunk if needed. */
9309 final_start_function (emit_barrier (), asm_out_file, 1);
9310
9311 /* Pad stack IP move with 4 instructions (two NOPs count
9312 as one instruction). */
9313 if (TARGET_PAD_SHORT_FUNCTION)
9314 {
9315 int i = 8;
9316
9317 while (i--)
9318 fputs ("\tnop\n", asm_out_file);
9319 }
9320
9321 xops[0] = gen_rtx_REG (Pmode, regno);
9322 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9323 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9324 fputs ("\tret\n", asm_out_file);
9325 final_end_function ();
9326 init_insn_lengths ();
9327 free_after_compilation (cfun);
9328 set_cfun (NULL);
9329 current_function_decl = NULL;
9330 }
9331
9332 if (flag_split_stack)
9333 file_end_indicate_split_stack ();
9334 }
9335
9336 /* Emit code for the SET_GOT patterns. */
9337
9338 const char *
9339 output_set_got (rtx dest, rtx label)
9340 {
9341 rtx xops[3];
9342
9343 xops[0] = dest;
9344
9345 if (TARGET_VXWORKS_RTP && flag_pic)
9346 {
9347 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9348 xops[2] = gen_rtx_MEM (Pmode,
9349 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9350 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9351
9352 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9353 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9354 an unadorned address. */
9355 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9356 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9357 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9358 return "";
9359 }
9360
9361 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9362
9363 if (!flag_pic)
9364 {
9365 if (TARGET_MACHO)
9366 /* We don't need a pic base, we're not producing pic. */
9367 gcc_unreachable ();
9368
9369 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9370 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9371 targetm.asm_out.internal_label (asm_out_file, "L",
9372 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9373 }
9374 else
9375 {
9376 char name[32];
9377 get_pc_thunk_name (name, REGNO (dest));
9378 pic_labels_used |= 1 << REGNO (dest);
9379
9380 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9381 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9382 output_asm_insn ("call\t%X2", xops);
9383
9384 #if TARGET_MACHO
9385 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9386 This is what will be referenced by the Mach-O PIC subsystem. */
9387 if (machopic_should_output_picbase_label () || !label)
9388 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9389
9390 /* When we are restoring the pic base at the site of a nonlocal label,
9391 and we decided to emit the pic base above, we will still output a
9392 local label used for calculating the correction offset (even though
9393 the offset will be 0 in that case). */
9394 if (label)
9395 targetm.asm_out.internal_label (asm_out_file, "L",
9396 CODE_LABEL_NUMBER (label));
9397 #endif
9398 }
9399
9400 if (!TARGET_MACHO)
9401 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9402
9403 return "";
9404 }
9405
9406 /* Generate an "push" pattern for input ARG. */
9407
9408 static rtx
9409 gen_push (rtx arg)
9410 {
9411 struct machine_function *m = cfun->machine;
9412
9413 if (m->fs.cfa_reg == stack_pointer_rtx)
9414 m->fs.cfa_offset += UNITS_PER_WORD;
9415 m->fs.sp_offset += UNITS_PER_WORD;
9416
9417 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9418 arg = gen_rtx_REG (word_mode, REGNO (arg));
9419
9420 return gen_rtx_SET (VOIDmode,
9421 gen_rtx_MEM (word_mode,
9422 gen_rtx_PRE_DEC (Pmode,
9423 stack_pointer_rtx)),
9424 arg);
9425 }
9426
9427 /* Generate an "pop" pattern for input ARG. */
9428
9429 static rtx
9430 gen_pop (rtx arg)
9431 {
9432 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9433 arg = gen_rtx_REG (word_mode, REGNO (arg));
9434
9435 return gen_rtx_SET (VOIDmode,
9436 arg,
9437 gen_rtx_MEM (word_mode,
9438 gen_rtx_POST_INC (Pmode,
9439 stack_pointer_rtx)));
9440 }
9441
9442 /* Return >= 0 if there is an unused call-clobbered register available
9443 for the entire function. */
9444
9445 static unsigned int
9446 ix86_select_alt_pic_regnum (void)
9447 {
9448 if (ix86_use_pseudo_pic_reg ())
9449 return INVALID_REGNUM;
9450
9451 if (crtl->is_leaf
9452 && !crtl->profile
9453 && !ix86_current_function_calls_tls_descriptor)
9454 {
9455 int i, drap;
9456 /* Can't use the same register for both PIC and DRAP. */
9457 if (crtl->drap_reg)
9458 drap = REGNO (crtl->drap_reg);
9459 else
9460 drap = -1;
9461 for (i = 2; i >= 0; --i)
9462 if (i != drap && !df_regs_ever_live_p (i))
9463 return i;
9464 }
9465
9466 return INVALID_REGNUM;
9467 }
9468
9469 /* Return TRUE if we need to save REGNO. */
9470
9471 static bool
9472 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9473 {
9474 if (pic_offset_table_rtx
9475 && !ix86_use_pseudo_pic_reg ()
9476 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9477 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9478 || crtl->profile
9479 || crtl->calls_eh_return
9480 || crtl->uses_const_pool
9481 || cfun->has_nonlocal_label))
9482 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9483
9484 if (crtl->calls_eh_return && maybe_eh_return)
9485 {
9486 unsigned i;
9487 for (i = 0; ; i++)
9488 {
9489 unsigned test = EH_RETURN_DATA_REGNO (i);
9490 if (test == INVALID_REGNUM)
9491 break;
9492 if (test == regno)
9493 return true;
9494 }
9495 }
9496
9497 if (crtl->drap_reg
9498 && regno == REGNO (crtl->drap_reg)
9499 && !cfun->machine->no_drap_save_restore)
9500 return true;
9501
9502 return (df_regs_ever_live_p (regno)
9503 && !call_used_regs[regno]
9504 && !fixed_regs[regno]
9505 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9506 }
9507
9508 /* Return number of saved general prupose registers. */
9509
9510 static int
9511 ix86_nsaved_regs (void)
9512 {
9513 int nregs = 0;
9514 int regno;
9515
9516 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9517 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9518 nregs ++;
9519 return nregs;
9520 }
9521
9522 /* Return number of saved SSE registrers. */
9523
9524 static int
9525 ix86_nsaved_sseregs (void)
9526 {
9527 int nregs = 0;
9528 int regno;
9529
9530 if (!TARGET_64BIT_MS_ABI)
9531 return 0;
9532 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9533 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9534 nregs ++;
9535 return nregs;
9536 }
9537
9538 /* Given FROM and TO register numbers, say whether this elimination is
9539 allowed. If stack alignment is needed, we can only replace argument
9540 pointer with hard frame pointer, or replace frame pointer with stack
9541 pointer. Otherwise, frame pointer elimination is automatically
9542 handled and all other eliminations are valid. */
9543
9544 static bool
9545 ix86_can_eliminate (const int from, const int to)
9546 {
9547 if (stack_realign_fp)
9548 return ((from == ARG_POINTER_REGNUM
9549 && to == HARD_FRAME_POINTER_REGNUM)
9550 || (from == FRAME_POINTER_REGNUM
9551 && to == STACK_POINTER_REGNUM));
9552 else
9553 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9554 }
9555
9556 /* Return the offset between two registers, one to be eliminated, and the other
9557 its replacement, at the start of a routine. */
9558
9559 HOST_WIDE_INT
9560 ix86_initial_elimination_offset (int from, int to)
9561 {
9562 struct ix86_frame frame;
9563 ix86_compute_frame_layout (&frame);
9564
9565 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9566 return frame.hard_frame_pointer_offset;
9567 else if (from == FRAME_POINTER_REGNUM
9568 && to == HARD_FRAME_POINTER_REGNUM)
9569 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9570 else
9571 {
9572 gcc_assert (to == STACK_POINTER_REGNUM);
9573
9574 if (from == ARG_POINTER_REGNUM)
9575 return frame.stack_pointer_offset;
9576
9577 gcc_assert (from == FRAME_POINTER_REGNUM);
9578 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9579 }
9580 }
9581
9582 /* In a dynamically-aligned function, we can't know the offset from
9583 stack pointer to frame pointer, so we must ensure that setjmp
9584 eliminates fp against the hard fp (%ebp) rather than trying to
9585 index from %esp up to the top of the frame across a gap that is
9586 of unknown (at compile-time) size. */
9587 static rtx
9588 ix86_builtin_setjmp_frame_value (void)
9589 {
9590 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9591 }
9592
9593 /* When using -fsplit-stack, the allocation routines set a field in
9594 the TCB to the bottom of the stack plus this much space, measured
9595 in bytes. */
9596
9597 #define SPLIT_STACK_AVAILABLE 256
9598
9599 /* Fill structure ix86_frame about frame of currently computed function. */
9600
9601 static void
9602 ix86_compute_frame_layout (struct ix86_frame *frame)
9603 {
9604 unsigned HOST_WIDE_INT stack_alignment_needed;
9605 HOST_WIDE_INT offset;
9606 unsigned HOST_WIDE_INT preferred_alignment;
9607 HOST_WIDE_INT size = get_frame_size ();
9608 HOST_WIDE_INT to_allocate;
9609
9610 frame->nregs = ix86_nsaved_regs ();
9611 frame->nsseregs = ix86_nsaved_sseregs ();
9612
9613 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9614 function prologues and leaf. */
9615 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9616 && (!crtl->is_leaf || cfun->calls_alloca != 0
9617 || ix86_current_function_calls_tls_descriptor))
9618 {
9619 crtl->preferred_stack_boundary = 128;
9620 crtl->stack_alignment_needed = 128;
9621 }
9622 /* preferred_stack_boundary is never updated for call
9623 expanded from tls descriptor. Update it here. We don't update it in
9624 expand stage because according to the comments before
9625 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9626 away. */
9627 else if (ix86_current_function_calls_tls_descriptor
9628 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9629 {
9630 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9631 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9632 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9633 }
9634
9635 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9636 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9637
9638 gcc_assert (!size || stack_alignment_needed);
9639 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9640 gcc_assert (preferred_alignment <= stack_alignment_needed);
9641
9642 /* For SEH we have to limit the amount of code movement into the prologue.
9643 At present we do this via a BLOCKAGE, at which point there's very little
9644 scheduling that can be done, which means that there's very little point
9645 in doing anything except PUSHs. */
9646 if (TARGET_SEH)
9647 cfun->machine->use_fast_prologue_epilogue = false;
9648
9649 /* During reload iteration the amount of registers saved can change.
9650 Recompute the value as needed. Do not recompute when amount of registers
9651 didn't change as reload does multiple calls to the function and does not
9652 expect the decision to change within single iteration. */
9653 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9654 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9655 {
9656 int count = frame->nregs;
9657 struct cgraph_node *node = cgraph_node::get (current_function_decl);
9658
9659 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9660
9661 /* The fast prologue uses move instead of push to save registers. This
9662 is significantly longer, but also executes faster as modern hardware
9663 can execute the moves in parallel, but can't do that for push/pop.
9664
9665 Be careful about choosing what prologue to emit: When function takes
9666 many instructions to execute we may use slow version as well as in
9667 case function is known to be outside hot spot (this is known with
9668 feedback only). Weight the size of function by number of registers
9669 to save as it is cheap to use one or two push instructions but very
9670 slow to use many of them. */
9671 if (count)
9672 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9673 if (node->frequency < NODE_FREQUENCY_NORMAL
9674 || (flag_branch_probabilities
9675 && node->frequency < NODE_FREQUENCY_HOT))
9676 cfun->machine->use_fast_prologue_epilogue = false;
9677 else
9678 cfun->machine->use_fast_prologue_epilogue
9679 = !expensive_function_p (count);
9680 }
9681
9682 frame->save_regs_using_mov
9683 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9684 /* If static stack checking is enabled and done with probes,
9685 the registers need to be saved before allocating the frame. */
9686 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9687
9688 /* Skip return address. */
9689 offset = UNITS_PER_WORD;
9690
9691 /* Skip pushed static chain. */
9692 if (ix86_static_chain_on_stack)
9693 offset += UNITS_PER_WORD;
9694
9695 /* Skip saved base pointer. */
9696 if (frame_pointer_needed)
9697 offset += UNITS_PER_WORD;
9698 frame->hfp_save_offset = offset;
9699
9700 /* The traditional frame pointer location is at the top of the frame. */
9701 frame->hard_frame_pointer_offset = offset;
9702
9703 /* Register save area */
9704 offset += frame->nregs * UNITS_PER_WORD;
9705 frame->reg_save_offset = offset;
9706
9707 /* On SEH target, registers are pushed just before the frame pointer
9708 location. */
9709 if (TARGET_SEH)
9710 frame->hard_frame_pointer_offset = offset;
9711
9712 /* Align and set SSE register save area. */
9713 if (frame->nsseregs)
9714 {
9715 /* The only ABI that has saved SSE registers (Win64) also has a
9716 16-byte aligned default stack, and thus we don't need to be
9717 within the re-aligned local stack frame to save them. */
9718 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9719 offset = (offset + 16 - 1) & -16;
9720 offset += frame->nsseregs * 16;
9721 }
9722 frame->sse_reg_save_offset = offset;
9723
9724 /* The re-aligned stack starts here. Values before this point are not
9725 directly comparable with values below this point. In order to make
9726 sure that no value happens to be the same before and after, force
9727 the alignment computation below to add a non-zero value. */
9728 if (stack_realign_fp)
9729 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9730
9731 /* Va-arg area */
9732 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9733 offset += frame->va_arg_size;
9734
9735 /* Align start of frame for local function. */
9736 if (stack_realign_fp
9737 || offset != frame->sse_reg_save_offset
9738 || size != 0
9739 || !crtl->is_leaf
9740 || cfun->calls_alloca
9741 || ix86_current_function_calls_tls_descriptor)
9742 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9743
9744 /* Frame pointer points here. */
9745 frame->frame_pointer_offset = offset;
9746
9747 offset += size;
9748
9749 /* Add outgoing arguments area. Can be skipped if we eliminated
9750 all the function calls as dead code.
9751 Skipping is however impossible when function calls alloca. Alloca
9752 expander assumes that last crtl->outgoing_args_size
9753 of stack frame are unused. */
9754 if (ACCUMULATE_OUTGOING_ARGS
9755 && (!crtl->is_leaf || cfun->calls_alloca
9756 || ix86_current_function_calls_tls_descriptor))
9757 {
9758 offset += crtl->outgoing_args_size;
9759 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9760 }
9761 else
9762 frame->outgoing_arguments_size = 0;
9763
9764 /* Align stack boundary. Only needed if we're calling another function
9765 or using alloca. */
9766 if (!crtl->is_leaf || cfun->calls_alloca
9767 || ix86_current_function_calls_tls_descriptor)
9768 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9769
9770 /* We've reached end of stack frame. */
9771 frame->stack_pointer_offset = offset;
9772
9773 /* Size prologue needs to allocate. */
9774 to_allocate = offset - frame->sse_reg_save_offset;
9775
9776 if ((!to_allocate && frame->nregs <= 1)
9777 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9778 frame->save_regs_using_mov = false;
9779
9780 if (ix86_using_red_zone ()
9781 && crtl->sp_is_unchanging
9782 && crtl->is_leaf
9783 && !ix86_current_function_calls_tls_descriptor)
9784 {
9785 frame->red_zone_size = to_allocate;
9786 if (frame->save_regs_using_mov)
9787 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9788 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9789 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9790 }
9791 else
9792 frame->red_zone_size = 0;
9793 frame->stack_pointer_offset -= frame->red_zone_size;
9794
9795 /* The SEH frame pointer location is near the bottom of the frame.
9796 This is enforced by the fact that the difference between the
9797 stack pointer and the frame pointer is limited to 240 bytes in
9798 the unwind data structure. */
9799 if (TARGET_SEH)
9800 {
9801 HOST_WIDE_INT diff;
9802
9803 /* If we can leave the frame pointer where it is, do so. Also, returns
9804 the establisher frame for __builtin_frame_address (0). */
9805 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9806 if (diff <= SEH_MAX_FRAME_SIZE
9807 && (diff > 240 || (diff & 15) != 0)
9808 && !crtl->accesses_prior_frames)
9809 {
9810 /* Ideally we'd determine what portion of the local stack frame
9811 (within the constraint of the lowest 240) is most heavily used.
9812 But without that complication, simply bias the frame pointer
9813 by 128 bytes so as to maximize the amount of the local stack
9814 frame that is addressable with 8-bit offsets. */
9815 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9816 }
9817 }
9818 }
9819
9820 /* This is semi-inlined memory_address_length, but simplified
9821 since we know that we're always dealing with reg+offset, and
9822 to avoid having to create and discard all that rtl. */
9823
9824 static inline int
9825 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9826 {
9827 int len = 4;
9828
9829 if (offset == 0)
9830 {
9831 /* EBP and R13 cannot be encoded without an offset. */
9832 len = (regno == BP_REG || regno == R13_REG);
9833 }
9834 else if (IN_RANGE (offset, -128, 127))
9835 len = 1;
9836
9837 /* ESP and R12 must be encoded with a SIB byte. */
9838 if (regno == SP_REG || regno == R12_REG)
9839 len++;
9840
9841 return len;
9842 }
9843
9844 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9845 The valid base registers are taken from CFUN->MACHINE->FS. */
9846
9847 static rtx
9848 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9849 {
9850 const struct machine_function *m = cfun->machine;
9851 rtx base_reg = NULL;
9852 HOST_WIDE_INT base_offset = 0;
9853
9854 if (m->use_fast_prologue_epilogue)
9855 {
9856 /* Choose the base register most likely to allow the most scheduling
9857 opportunities. Generally FP is valid throughout the function,
9858 while DRAP must be reloaded within the epilogue. But choose either
9859 over the SP due to increased encoding size. */
9860
9861 if (m->fs.fp_valid)
9862 {
9863 base_reg = hard_frame_pointer_rtx;
9864 base_offset = m->fs.fp_offset - cfa_offset;
9865 }
9866 else if (m->fs.drap_valid)
9867 {
9868 base_reg = crtl->drap_reg;
9869 base_offset = 0 - cfa_offset;
9870 }
9871 else if (m->fs.sp_valid)
9872 {
9873 base_reg = stack_pointer_rtx;
9874 base_offset = m->fs.sp_offset - cfa_offset;
9875 }
9876 }
9877 else
9878 {
9879 HOST_WIDE_INT toffset;
9880 int len = 16, tlen;
9881
9882 /* Choose the base register with the smallest address encoding.
9883 With a tie, choose FP > DRAP > SP. */
9884 if (m->fs.sp_valid)
9885 {
9886 base_reg = stack_pointer_rtx;
9887 base_offset = m->fs.sp_offset - cfa_offset;
9888 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9889 }
9890 if (m->fs.drap_valid)
9891 {
9892 toffset = 0 - cfa_offset;
9893 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9894 if (tlen <= len)
9895 {
9896 base_reg = crtl->drap_reg;
9897 base_offset = toffset;
9898 len = tlen;
9899 }
9900 }
9901 if (m->fs.fp_valid)
9902 {
9903 toffset = m->fs.fp_offset - cfa_offset;
9904 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9905 if (tlen <= len)
9906 {
9907 base_reg = hard_frame_pointer_rtx;
9908 base_offset = toffset;
9909 len = tlen;
9910 }
9911 }
9912 }
9913 gcc_assert (base_reg != NULL);
9914
9915 return plus_constant (Pmode, base_reg, base_offset);
9916 }
9917
9918 /* Emit code to save registers in the prologue. */
9919
9920 static void
9921 ix86_emit_save_regs (void)
9922 {
9923 unsigned int regno;
9924 rtx insn;
9925
9926 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9927 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9928 {
9929 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9930 RTX_FRAME_RELATED_P (insn) = 1;
9931 }
9932 }
9933
9934 /* Emit a single register save at CFA - CFA_OFFSET. */
9935
9936 static void
9937 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9938 HOST_WIDE_INT cfa_offset)
9939 {
9940 struct machine_function *m = cfun->machine;
9941 rtx reg = gen_rtx_REG (mode, regno);
9942 rtx mem, addr, base, insn;
9943
9944 addr = choose_baseaddr (cfa_offset);
9945 mem = gen_frame_mem (mode, addr);
9946
9947 /* For SSE saves, we need to indicate the 128-bit alignment. */
9948 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9949
9950 insn = emit_move_insn (mem, reg);
9951 RTX_FRAME_RELATED_P (insn) = 1;
9952
9953 base = addr;
9954 if (GET_CODE (base) == PLUS)
9955 base = XEXP (base, 0);
9956 gcc_checking_assert (REG_P (base));
9957
9958 /* When saving registers into a re-aligned local stack frame, avoid
9959 any tricky guessing by dwarf2out. */
9960 if (m->fs.realigned)
9961 {
9962 gcc_checking_assert (stack_realign_drap);
9963
9964 if (regno == REGNO (crtl->drap_reg))
9965 {
9966 /* A bit of a hack. We force the DRAP register to be saved in
9967 the re-aligned stack frame, which provides us with a copy
9968 of the CFA that will last past the prologue. Install it. */
9969 gcc_checking_assert (cfun->machine->fs.fp_valid);
9970 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9971 cfun->machine->fs.fp_offset - cfa_offset);
9972 mem = gen_rtx_MEM (mode, addr);
9973 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9974 }
9975 else
9976 {
9977 /* The frame pointer is a stable reference within the
9978 aligned frame. Use it. */
9979 gcc_checking_assert (cfun->machine->fs.fp_valid);
9980 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9981 cfun->machine->fs.fp_offset - cfa_offset);
9982 mem = gen_rtx_MEM (mode, addr);
9983 add_reg_note (insn, REG_CFA_EXPRESSION,
9984 gen_rtx_SET (VOIDmode, mem, reg));
9985 }
9986 }
9987
9988 /* The memory may not be relative to the current CFA register,
9989 which means that we may need to generate a new pattern for
9990 use by the unwind info. */
9991 else if (base != m->fs.cfa_reg)
9992 {
9993 addr = plus_constant (Pmode, m->fs.cfa_reg,
9994 m->fs.cfa_offset - cfa_offset);
9995 mem = gen_rtx_MEM (mode, addr);
9996 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9997 }
9998 }
9999
10000 /* Emit code to save registers using MOV insns.
10001 First register is stored at CFA - CFA_OFFSET. */
10002 static void
10003 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10004 {
10005 unsigned int regno;
10006
10007 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10008 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10009 {
10010 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10011 cfa_offset -= UNITS_PER_WORD;
10012 }
10013 }
10014
10015 /* Emit code to save SSE registers using MOV insns.
10016 First register is stored at CFA - CFA_OFFSET. */
10017 static void
10018 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10019 {
10020 unsigned int regno;
10021
10022 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10023 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10024 {
10025 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10026 cfa_offset -= 16;
10027 }
10028 }
10029
10030 static GTY(()) rtx queued_cfa_restores;
10031
10032 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10033 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
10034 Don't add the note if the previously saved value will be left untouched
10035 within stack red-zone till return, as unwinders can find the same value
10036 in the register and on the stack. */
10037
10038 static void
10039 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10040 {
10041 if (!crtl->shrink_wrapped
10042 && cfa_offset <= cfun->machine->fs.red_zone_offset)
10043 return;
10044
10045 if (insn)
10046 {
10047 add_reg_note (insn, REG_CFA_RESTORE, reg);
10048 RTX_FRAME_RELATED_P (insn) = 1;
10049 }
10050 else
10051 queued_cfa_restores
10052 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10053 }
10054
10055 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
10056
10057 static void
10058 ix86_add_queued_cfa_restore_notes (rtx insn)
10059 {
10060 rtx last;
10061 if (!queued_cfa_restores)
10062 return;
10063 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10064 ;
10065 XEXP (last, 1) = REG_NOTES (insn);
10066 REG_NOTES (insn) = queued_cfa_restores;
10067 queued_cfa_restores = NULL_RTX;
10068 RTX_FRAME_RELATED_P (insn) = 1;
10069 }
10070
10071 /* Expand prologue or epilogue stack adjustment.
10072 The pattern exist to put a dependency on all ebp-based memory accesses.
10073 STYLE should be negative if instructions should be marked as frame related,
10074 zero if %r11 register is live and cannot be freely used and positive
10075 otherwise. */
10076
10077 static void
10078 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10079 int style, bool set_cfa)
10080 {
10081 struct machine_function *m = cfun->machine;
10082 rtx insn;
10083 bool add_frame_related_expr = false;
10084
10085 if (Pmode == SImode)
10086 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10087 else if (x86_64_immediate_operand (offset, DImode))
10088 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10089 else
10090 {
10091 rtx tmp;
10092 /* r11 is used by indirect sibcall return as well, set before the
10093 epilogue and used after the epilogue. */
10094 if (style)
10095 tmp = gen_rtx_REG (DImode, R11_REG);
10096 else
10097 {
10098 gcc_assert (src != hard_frame_pointer_rtx
10099 && dest != hard_frame_pointer_rtx);
10100 tmp = hard_frame_pointer_rtx;
10101 }
10102 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10103 if (style < 0)
10104 add_frame_related_expr = true;
10105
10106 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10107 }
10108
10109 insn = emit_insn (insn);
10110 if (style >= 0)
10111 ix86_add_queued_cfa_restore_notes (insn);
10112
10113 if (set_cfa)
10114 {
10115 rtx r;
10116
10117 gcc_assert (m->fs.cfa_reg == src);
10118 m->fs.cfa_offset += INTVAL (offset);
10119 m->fs.cfa_reg = dest;
10120
10121 r = gen_rtx_PLUS (Pmode, src, offset);
10122 r = gen_rtx_SET (VOIDmode, dest, r);
10123 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10124 RTX_FRAME_RELATED_P (insn) = 1;
10125 }
10126 else if (style < 0)
10127 {
10128 RTX_FRAME_RELATED_P (insn) = 1;
10129 if (add_frame_related_expr)
10130 {
10131 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10132 r = gen_rtx_SET (VOIDmode, dest, r);
10133 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10134 }
10135 }
10136
10137 if (dest == stack_pointer_rtx)
10138 {
10139 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10140 bool valid = m->fs.sp_valid;
10141
10142 if (src == hard_frame_pointer_rtx)
10143 {
10144 valid = m->fs.fp_valid;
10145 ooffset = m->fs.fp_offset;
10146 }
10147 else if (src == crtl->drap_reg)
10148 {
10149 valid = m->fs.drap_valid;
10150 ooffset = 0;
10151 }
10152 else
10153 {
10154 /* Else there are two possibilities: SP itself, which we set
10155 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10156 taken care of this by hand along the eh_return path. */
10157 gcc_checking_assert (src == stack_pointer_rtx
10158 || offset == const0_rtx);
10159 }
10160
10161 m->fs.sp_offset = ooffset - INTVAL (offset);
10162 m->fs.sp_valid = valid;
10163 }
10164 }
10165
10166 /* Find an available register to be used as dynamic realign argument
10167 pointer regsiter. Such a register will be written in prologue and
10168 used in begin of body, so it must not be
10169 1. parameter passing register.
10170 2. GOT pointer.
10171 We reuse static-chain register if it is available. Otherwise, we
10172 use DI for i386 and R13 for x86-64. We chose R13 since it has
10173 shorter encoding.
10174
10175 Return: the regno of chosen register. */
10176
10177 static unsigned int
10178 find_drap_reg (void)
10179 {
10180 tree decl = cfun->decl;
10181
10182 if (TARGET_64BIT)
10183 {
10184 /* Use R13 for nested function or function need static chain.
10185 Since function with tail call may use any caller-saved
10186 registers in epilogue, DRAP must not use caller-saved
10187 register in such case. */
10188 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10189 return R13_REG;
10190
10191 return R10_REG;
10192 }
10193 else
10194 {
10195 /* Use DI for nested function or function need static chain.
10196 Since function with tail call may use any caller-saved
10197 registers in epilogue, DRAP must not use caller-saved
10198 register in such case. */
10199 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10200 return DI_REG;
10201
10202 /* Reuse static chain register if it isn't used for parameter
10203 passing. */
10204 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10205 {
10206 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10207 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10208 return CX_REG;
10209 }
10210 return DI_REG;
10211 }
10212 }
10213
10214 /* Return minimum incoming stack alignment. */
10215
10216 static unsigned int
10217 ix86_minimum_incoming_stack_boundary (bool sibcall)
10218 {
10219 unsigned int incoming_stack_boundary;
10220
10221 /* Prefer the one specified at command line. */
10222 if (ix86_user_incoming_stack_boundary)
10223 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10224 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10225 if -mstackrealign is used, it isn't used for sibcall check and
10226 estimated stack alignment is 128bit. */
10227 else if (!sibcall
10228 && !TARGET_64BIT
10229 && ix86_force_align_arg_pointer
10230 && crtl->stack_alignment_estimated == 128)
10231 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10232 else
10233 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10234
10235 /* Incoming stack alignment can be changed on individual functions
10236 via force_align_arg_pointer attribute. We use the smallest
10237 incoming stack boundary. */
10238 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10239 && lookup_attribute (ix86_force_align_arg_pointer_string,
10240 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10241 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10242
10243 /* The incoming stack frame has to be aligned at least at
10244 parm_stack_boundary. */
10245 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10246 incoming_stack_boundary = crtl->parm_stack_boundary;
10247
10248 /* Stack at entrance of main is aligned by runtime. We use the
10249 smallest incoming stack boundary. */
10250 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10251 && DECL_NAME (current_function_decl)
10252 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10253 && DECL_FILE_SCOPE_P (current_function_decl))
10254 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10255
10256 return incoming_stack_boundary;
10257 }
10258
10259 /* Update incoming stack boundary and estimated stack alignment. */
10260
10261 static void
10262 ix86_update_stack_boundary (void)
10263 {
10264 ix86_incoming_stack_boundary
10265 = ix86_minimum_incoming_stack_boundary (false);
10266
10267 /* x86_64 vararg needs 16byte stack alignment for register save
10268 area. */
10269 if (TARGET_64BIT
10270 && cfun->stdarg
10271 && crtl->stack_alignment_estimated < 128)
10272 crtl->stack_alignment_estimated = 128;
10273 }
10274
10275 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10276 needed or an rtx for DRAP otherwise. */
10277
10278 static rtx
10279 ix86_get_drap_rtx (void)
10280 {
10281 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10282 crtl->need_drap = true;
10283
10284 if (stack_realign_drap)
10285 {
10286 /* Assign DRAP to vDRAP and returns vDRAP */
10287 unsigned int regno = find_drap_reg ();
10288 rtx drap_vreg;
10289 rtx arg_ptr;
10290 rtx_insn *seq, *insn;
10291
10292 arg_ptr = gen_rtx_REG (Pmode, regno);
10293 crtl->drap_reg = arg_ptr;
10294
10295 start_sequence ();
10296 drap_vreg = copy_to_reg (arg_ptr);
10297 seq = get_insns ();
10298 end_sequence ();
10299
10300 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10301 if (!optimize)
10302 {
10303 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10304 RTX_FRAME_RELATED_P (insn) = 1;
10305 }
10306 return drap_vreg;
10307 }
10308 else
10309 return NULL;
10310 }
10311
10312 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10313
10314 static rtx
10315 ix86_internal_arg_pointer (void)
10316 {
10317 return virtual_incoming_args_rtx;
10318 }
10319
10320 struct scratch_reg {
10321 rtx reg;
10322 bool saved;
10323 };
10324
10325 /* Return a short-lived scratch register for use on function entry.
10326 In 32-bit mode, it is valid only after the registers are saved
10327 in the prologue. This register must be released by means of
10328 release_scratch_register_on_entry once it is dead. */
10329
10330 static void
10331 get_scratch_register_on_entry (struct scratch_reg *sr)
10332 {
10333 int regno;
10334
10335 sr->saved = false;
10336
10337 if (TARGET_64BIT)
10338 {
10339 /* We always use R11 in 64-bit mode. */
10340 regno = R11_REG;
10341 }
10342 else
10343 {
10344 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10345 bool fastcall_p
10346 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10347 bool thiscall_p
10348 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10349 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10350 int regparm = ix86_function_regparm (fntype, decl);
10351 int drap_regno
10352 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10353
10354 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10355 for the static chain register. */
10356 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10357 && drap_regno != AX_REG)
10358 regno = AX_REG;
10359 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10360 for the static chain register. */
10361 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10362 regno = AX_REG;
10363 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10364 regno = DX_REG;
10365 /* ecx is the static chain register. */
10366 else if (regparm < 3 && !fastcall_p && !thiscall_p
10367 && !static_chain_p
10368 && drap_regno != CX_REG)
10369 regno = CX_REG;
10370 else if (ix86_save_reg (BX_REG, true))
10371 regno = BX_REG;
10372 /* esi is the static chain register. */
10373 else if (!(regparm == 3 && static_chain_p)
10374 && ix86_save_reg (SI_REG, true))
10375 regno = SI_REG;
10376 else if (ix86_save_reg (DI_REG, true))
10377 regno = DI_REG;
10378 else
10379 {
10380 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10381 sr->saved = true;
10382 }
10383 }
10384
10385 sr->reg = gen_rtx_REG (Pmode, regno);
10386 if (sr->saved)
10387 {
10388 rtx insn = emit_insn (gen_push (sr->reg));
10389 RTX_FRAME_RELATED_P (insn) = 1;
10390 }
10391 }
10392
10393 /* Release a scratch register obtained from the preceding function. */
10394
10395 static void
10396 release_scratch_register_on_entry (struct scratch_reg *sr)
10397 {
10398 if (sr->saved)
10399 {
10400 struct machine_function *m = cfun->machine;
10401 rtx x, insn = emit_insn (gen_pop (sr->reg));
10402
10403 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10404 RTX_FRAME_RELATED_P (insn) = 1;
10405 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10406 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10407 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10408 m->fs.sp_offset -= UNITS_PER_WORD;
10409 }
10410 }
10411
10412 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10413
10414 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10415
10416 static void
10417 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10418 {
10419 /* We skip the probe for the first interval + a small dope of 4 words and
10420 probe that many bytes past the specified size to maintain a protection
10421 area at the botton of the stack. */
10422 const int dope = 4 * UNITS_PER_WORD;
10423 rtx size_rtx = GEN_INT (size), last;
10424
10425 /* See if we have a constant small number of probes to generate. If so,
10426 that's the easy case. The run-time loop is made up of 11 insns in the
10427 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10428 for n # of intervals. */
10429 if (size <= 5 * PROBE_INTERVAL)
10430 {
10431 HOST_WIDE_INT i, adjust;
10432 bool first_probe = true;
10433
10434 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10435 values of N from 1 until it exceeds SIZE. If only one probe is
10436 needed, this will not generate any code. Then adjust and probe
10437 to PROBE_INTERVAL + SIZE. */
10438 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10439 {
10440 if (first_probe)
10441 {
10442 adjust = 2 * PROBE_INTERVAL + dope;
10443 first_probe = false;
10444 }
10445 else
10446 adjust = PROBE_INTERVAL;
10447
10448 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10449 plus_constant (Pmode, stack_pointer_rtx,
10450 -adjust)));
10451 emit_stack_probe (stack_pointer_rtx);
10452 }
10453
10454 if (first_probe)
10455 adjust = size + PROBE_INTERVAL + dope;
10456 else
10457 adjust = size + PROBE_INTERVAL - i;
10458
10459 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10460 plus_constant (Pmode, stack_pointer_rtx,
10461 -adjust)));
10462 emit_stack_probe (stack_pointer_rtx);
10463
10464 /* Adjust back to account for the additional first interval. */
10465 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10466 plus_constant (Pmode, stack_pointer_rtx,
10467 PROBE_INTERVAL + dope)));
10468 }
10469
10470 /* Otherwise, do the same as above, but in a loop. Note that we must be
10471 extra careful with variables wrapping around because we might be at
10472 the very top (or the very bottom) of the address space and we have
10473 to be able to handle this case properly; in particular, we use an
10474 equality test for the loop condition. */
10475 else
10476 {
10477 HOST_WIDE_INT rounded_size;
10478 struct scratch_reg sr;
10479
10480 get_scratch_register_on_entry (&sr);
10481
10482
10483 /* Step 1: round SIZE to the previous multiple of the interval. */
10484
10485 rounded_size = size & -PROBE_INTERVAL;
10486
10487
10488 /* Step 2: compute initial and final value of the loop counter. */
10489
10490 /* SP = SP_0 + PROBE_INTERVAL. */
10491 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10492 plus_constant (Pmode, stack_pointer_rtx,
10493 - (PROBE_INTERVAL + dope))));
10494
10495 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10496 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10497 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10498 gen_rtx_PLUS (Pmode, sr.reg,
10499 stack_pointer_rtx)));
10500
10501
10502 /* Step 3: the loop
10503
10504 while (SP != LAST_ADDR)
10505 {
10506 SP = SP + PROBE_INTERVAL
10507 probe at SP
10508 }
10509
10510 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10511 values of N from 1 until it is equal to ROUNDED_SIZE. */
10512
10513 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10514
10515
10516 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10517 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10518
10519 if (size != rounded_size)
10520 {
10521 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10522 plus_constant (Pmode, stack_pointer_rtx,
10523 rounded_size - size)));
10524 emit_stack_probe (stack_pointer_rtx);
10525 }
10526
10527 /* Adjust back to account for the additional first interval. */
10528 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10529 plus_constant (Pmode, stack_pointer_rtx,
10530 PROBE_INTERVAL + dope)));
10531
10532 release_scratch_register_on_entry (&sr);
10533 }
10534
10535 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10536
10537 /* Even if the stack pointer isn't the CFA register, we need to correctly
10538 describe the adjustments made to it, in particular differentiate the
10539 frame-related ones from the frame-unrelated ones. */
10540 if (size > 0)
10541 {
10542 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10543 XVECEXP (expr, 0, 0)
10544 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10545 plus_constant (Pmode, stack_pointer_rtx, -size));
10546 XVECEXP (expr, 0, 1)
10547 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10548 plus_constant (Pmode, stack_pointer_rtx,
10549 PROBE_INTERVAL + dope + size));
10550 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10551 RTX_FRAME_RELATED_P (last) = 1;
10552
10553 cfun->machine->fs.sp_offset += size;
10554 }
10555
10556 /* Make sure nothing is scheduled before we are done. */
10557 emit_insn (gen_blockage ());
10558 }
10559
10560 /* Adjust the stack pointer up to REG while probing it. */
10561
10562 const char *
10563 output_adjust_stack_and_probe (rtx reg)
10564 {
10565 static int labelno = 0;
10566 char loop_lab[32], end_lab[32];
10567 rtx xops[2];
10568
10569 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10570 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10571
10572 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10573
10574 /* Jump to END_LAB if SP == LAST_ADDR. */
10575 xops[0] = stack_pointer_rtx;
10576 xops[1] = reg;
10577 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10578 fputs ("\tje\t", asm_out_file);
10579 assemble_name_raw (asm_out_file, end_lab);
10580 fputc ('\n', asm_out_file);
10581
10582 /* SP = SP + PROBE_INTERVAL. */
10583 xops[1] = GEN_INT (PROBE_INTERVAL);
10584 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10585
10586 /* Probe at SP. */
10587 xops[1] = const0_rtx;
10588 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10589
10590 fprintf (asm_out_file, "\tjmp\t");
10591 assemble_name_raw (asm_out_file, loop_lab);
10592 fputc ('\n', asm_out_file);
10593
10594 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10595
10596 return "";
10597 }
10598
10599 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10600 inclusive. These are offsets from the current stack pointer. */
10601
10602 static void
10603 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10604 {
10605 /* See if we have a constant small number of probes to generate. If so,
10606 that's the easy case. The run-time loop is made up of 7 insns in the
10607 generic case while the compile-time loop is made up of n insns for n #
10608 of intervals. */
10609 if (size <= 7 * PROBE_INTERVAL)
10610 {
10611 HOST_WIDE_INT i;
10612
10613 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10614 it exceeds SIZE. If only one probe is needed, this will not
10615 generate any code. Then probe at FIRST + SIZE. */
10616 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10617 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10618 -(first + i)));
10619
10620 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10621 -(first + size)));
10622 }
10623
10624 /* Otherwise, do the same as above, but in a loop. Note that we must be
10625 extra careful with variables wrapping around because we might be at
10626 the very top (or the very bottom) of the address space and we have
10627 to be able to handle this case properly; in particular, we use an
10628 equality test for the loop condition. */
10629 else
10630 {
10631 HOST_WIDE_INT rounded_size, last;
10632 struct scratch_reg sr;
10633
10634 get_scratch_register_on_entry (&sr);
10635
10636
10637 /* Step 1: round SIZE to the previous multiple of the interval. */
10638
10639 rounded_size = size & -PROBE_INTERVAL;
10640
10641
10642 /* Step 2: compute initial and final value of the loop counter. */
10643
10644 /* TEST_OFFSET = FIRST. */
10645 emit_move_insn (sr.reg, GEN_INT (-first));
10646
10647 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10648 last = first + rounded_size;
10649
10650
10651 /* Step 3: the loop
10652
10653 while (TEST_ADDR != LAST_ADDR)
10654 {
10655 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10656 probe at TEST_ADDR
10657 }
10658
10659 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10660 until it is equal to ROUNDED_SIZE. */
10661
10662 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10663
10664
10665 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10666 that SIZE is equal to ROUNDED_SIZE. */
10667
10668 if (size != rounded_size)
10669 emit_stack_probe (plus_constant (Pmode,
10670 gen_rtx_PLUS (Pmode,
10671 stack_pointer_rtx,
10672 sr.reg),
10673 rounded_size - size));
10674
10675 release_scratch_register_on_entry (&sr);
10676 }
10677
10678 /* Make sure nothing is scheduled before we are done. */
10679 emit_insn (gen_blockage ());
10680 }
10681
10682 /* Probe a range of stack addresses from REG to END, inclusive. These are
10683 offsets from the current stack pointer. */
10684
10685 const char *
10686 output_probe_stack_range (rtx reg, rtx end)
10687 {
10688 static int labelno = 0;
10689 char loop_lab[32], end_lab[32];
10690 rtx xops[3];
10691
10692 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10693 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10694
10695 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10696
10697 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10698 xops[0] = reg;
10699 xops[1] = end;
10700 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10701 fputs ("\tje\t", asm_out_file);
10702 assemble_name_raw (asm_out_file, end_lab);
10703 fputc ('\n', asm_out_file);
10704
10705 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10706 xops[1] = GEN_INT (PROBE_INTERVAL);
10707 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10708
10709 /* Probe at TEST_ADDR. */
10710 xops[0] = stack_pointer_rtx;
10711 xops[1] = reg;
10712 xops[2] = const0_rtx;
10713 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10714
10715 fprintf (asm_out_file, "\tjmp\t");
10716 assemble_name_raw (asm_out_file, loop_lab);
10717 fputc ('\n', asm_out_file);
10718
10719 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10720
10721 return "";
10722 }
10723
10724 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10725 to be generated in correct form. */
10726 static void
10727 ix86_finalize_stack_realign_flags (void)
10728 {
10729 /* Check if stack realign is really needed after reload, and
10730 stores result in cfun */
10731 unsigned int incoming_stack_boundary
10732 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10733 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10734 unsigned int stack_realign = (incoming_stack_boundary
10735 < (crtl->is_leaf
10736 ? crtl->max_used_stack_slot_alignment
10737 : crtl->stack_alignment_needed));
10738
10739 if (crtl->stack_realign_finalized)
10740 {
10741 /* After stack_realign_needed is finalized, we can't no longer
10742 change it. */
10743 gcc_assert (crtl->stack_realign_needed == stack_realign);
10744 return;
10745 }
10746
10747 /* If the only reason for frame_pointer_needed is that we conservatively
10748 assumed stack realignment might be needed, but in the end nothing that
10749 needed the stack alignment had been spilled, clear frame_pointer_needed
10750 and say we don't need stack realignment. */
10751 if (stack_realign
10752 && frame_pointer_needed
10753 && crtl->is_leaf
10754 && flag_omit_frame_pointer
10755 && crtl->sp_is_unchanging
10756 && !ix86_current_function_calls_tls_descriptor
10757 && !crtl->accesses_prior_frames
10758 && !cfun->calls_alloca
10759 && !crtl->calls_eh_return
10760 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10761 && !ix86_frame_pointer_required ()
10762 && get_frame_size () == 0
10763 && ix86_nsaved_sseregs () == 0
10764 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10765 {
10766 HARD_REG_SET set_up_by_prologue, prologue_used;
10767 basic_block bb;
10768
10769 CLEAR_HARD_REG_SET (prologue_used);
10770 CLEAR_HARD_REG_SET (set_up_by_prologue);
10771 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10772 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10773 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10774 HARD_FRAME_POINTER_REGNUM);
10775 FOR_EACH_BB_FN (bb, cfun)
10776 {
10777 rtx_insn *insn;
10778 FOR_BB_INSNS (bb, insn)
10779 if (NONDEBUG_INSN_P (insn)
10780 && requires_stack_frame_p (insn, prologue_used,
10781 set_up_by_prologue))
10782 {
10783 crtl->stack_realign_needed = stack_realign;
10784 crtl->stack_realign_finalized = true;
10785 return;
10786 }
10787 }
10788
10789 /* If drap has been set, but it actually isn't live at the start
10790 of the function, there is no reason to set it up. */
10791 if (crtl->drap_reg)
10792 {
10793 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10794 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10795 {
10796 crtl->drap_reg = NULL_RTX;
10797 crtl->need_drap = false;
10798 }
10799 }
10800 else
10801 cfun->machine->no_drap_save_restore = true;
10802
10803 frame_pointer_needed = false;
10804 stack_realign = false;
10805 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10806 crtl->stack_alignment_needed = incoming_stack_boundary;
10807 crtl->stack_alignment_estimated = incoming_stack_boundary;
10808 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10809 crtl->preferred_stack_boundary = incoming_stack_boundary;
10810 df_finish_pass (true);
10811 df_scan_alloc (NULL);
10812 df_scan_blocks ();
10813 df_compute_regs_ever_live (true);
10814 df_analyze ();
10815 }
10816
10817 crtl->stack_realign_needed = stack_realign;
10818 crtl->stack_realign_finalized = true;
10819 }
10820
10821 /* Expand the prologue into a bunch of separate insns. */
10822
10823 void
10824 ix86_expand_prologue (void)
10825 {
10826 struct machine_function *m = cfun->machine;
10827 rtx insn, t;
10828 struct ix86_frame frame;
10829 HOST_WIDE_INT allocate;
10830 bool int_registers_saved;
10831 bool sse_registers_saved;
10832
10833 ix86_finalize_stack_realign_flags ();
10834
10835 /* DRAP should not coexist with stack_realign_fp */
10836 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10837
10838 memset (&m->fs, 0, sizeof (m->fs));
10839
10840 /* Initialize CFA state for before the prologue. */
10841 m->fs.cfa_reg = stack_pointer_rtx;
10842 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10843
10844 /* Track SP offset to the CFA. We continue tracking this after we've
10845 swapped the CFA register away from SP. In the case of re-alignment
10846 this is fudged; we're interested to offsets within the local frame. */
10847 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10848 m->fs.sp_valid = true;
10849
10850 ix86_compute_frame_layout (&frame);
10851
10852 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10853 {
10854 /* We should have already generated an error for any use of
10855 ms_hook on a nested function. */
10856 gcc_checking_assert (!ix86_static_chain_on_stack);
10857
10858 /* Check if profiling is active and we shall use profiling before
10859 prologue variant. If so sorry. */
10860 if (crtl->profile && flag_fentry != 0)
10861 sorry ("ms_hook_prologue attribute isn%'t compatible "
10862 "with -mfentry for 32-bit");
10863
10864 /* In ix86_asm_output_function_label we emitted:
10865 8b ff movl.s %edi,%edi
10866 55 push %ebp
10867 8b ec movl.s %esp,%ebp
10868
10869 This matches the hookable function prologue in Win32 API
10870 functions in Microsoft Windows XP Service Pack 2 and newer.
10871 Wine uses this to enable Windows apps to hook the Win32 API
10872 functions provided by Wine.
10873
10874 What that means is that we've already set up the frame pointer. */
10875
10876 if (frame_pointer_needed
10877 && !(crtl->drap_reg && crtl->stack_realign_needed))
10878 {
10879 rtx push, mov;
10880
10881 /* We've decided to use the frame pointer already set up.
10882 Describe this to the unwinder by pretending that both
10883 push and mov insns happen right here.
10884
10885 Putting the unwind info here at the end of the ms_hook
10886 is done so that we can make absolutely certain we get
10887 the required byte sequence at the start of the function,
10888 rather than relying on an assembler that can produce
10889 the exact encoding required.
10890
10891 However it does mean (in the unpatched case) that we have
10892 a 1 insn window where the asynchronous unwind info is
10893 incorrect. However, if we placed the unwind info at
10894 its correct location we would have incorrect unwind info
10895 in the patched case. Which is probably all moot since
10896 I don't expect Wine generates dwarf2 unwind info for the
10897 system libraries that use this feature. */
10898
10899 insn = emit_insn (gen_blockage ());
10900
10901 push = gen_push (hard_frame_pointer_rtx);
10902 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10903 stack_pointer_rtx);
10904 RTX_FRAME_RELATED_P (push) = 1;
10905 RTX_FRAME_RELATED_P (mov) = 1;
10906
10907 RTX_FRAME_RELATED_P (insn) = 1;
10908 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10909 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10910
10911 /* Note that gen_push incremented m->fs.cfa_offset, even
10912 though we didn't emit the push insn here. */
10913 m->fs.cfa_reg = hard_frame_pointer_rtx;
10914 m->fs.fp_offset = m->fs.cfa_offset;
10915 m->fs.fp_valid = true;
10916 }
10917 else
10918 {
10919 /* The frame pointer is not needed so pop %ebp again.
10920 This leaves us with a pristine state. */
10921 emit_insn (gen_pop (hard_frame_pointer_rtx));
10922 }
10923 }
10924
10925 /* The first insn of a function that accepts its static chain on the
10926 stack is to push the register that would be filled in by a direct
10927 call. This insn will be skipped by the trampoline. */
10928 else if (ix86_static_chain_on_stack)
10929 {
10930 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10931 emit_insn (gen_blockage ());
10932
10933 /* We don't want to interpret this push insn as a register save,
10934 only as a stack adjustment. The real copy of the register as
10935 a save will be done later, if needed. */
10936 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10937 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10938 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10939 RTX_FRAME_RELATED_P (insn) = 1;
10940 }
10941
10942 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10943 of DRAP is needed and stack realignment is really needed after reload */
10944 if (stack_realign_drap)
10945 {
10946 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10947
10948 /* Only need to push parameter pointer reg if it is caller saved. */
10949 if (!call_used_regs[REGNO (crtl->drap_reg)])
10950 {
10951 /* Push arg pointer reg */
10952 insn = emit_insn (gen_push (crtl->drap_reg));
10953 RTX_FRAME_RELATED_P (insn) = 1;
10954 }
10955
10956 /* Grab the argument pointer. */
10957 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10958 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10959 RTX_FRAME_RELATED_P (insn) = 1;
10960 m->fs.cfa_reg = crtl->drap_reg;
10961 m->fs.cfa_offset = 0;
10962
10963 /* Align the stack. */
10964 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10965 stack_pointer_rtx,
10966 GEN_INT (-align_bytes)));
10967 RTX_FRAME_RELATED_P (insn) = 1;
10968
10969 /* Replicate the return address on the stack so that return
10970 address can be reached via (argp - 1) slot. This is needed
10971 to implement macro RETURN_ADDR_RTX and intrinsic function
10972 expand_builtin_return_addr etc. */
10973 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10974 t = gen_frame_mem (word_mode, t);
10975 insn = emit_insn (gen_push (t));
10976 RTX_FRAME_RELATED_P (insn) = 1;
10977
10978 /* For the purposes of frame and register save area addressing,
10979 we've started over with a new frame. */
10980 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10981 m->fs.realigned = true;
10982 }
10983
10984 int_registers_saved = (frame.nregs == 0);
10985 sse_registers_saved = (frame.nsseregs == 0);
10986
10987 if (frame_pointer_needed && !m->fs.fp_valid)
10988 {
10989 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10990 slower on all targets. Also sdb doesn't like it. */
10991 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10992 RTX_FRAME_RELATED_P (insn) = 1;
10993
10994 /* Push registers now, before setting the frame pointer
10995 on SEH target. */
10996 if (!int_registers_saved
10997 && TARGET_SEH
10998 && !frame.save_regs_using_mov)
10999 {
11000 ix86_emit_save_regs ();
11001 int_registers_saved = true;
11002 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11003 }
11004
11005 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11006 {
11007 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11008 RTX_FRAME_RELATED_P (insn) = 1;
11009
11010 if (m->fs.cfa_reg == stack_pointer_rtx)
11011 m->fs.cfa_reg = hard_frame_pointer_rtx;
11012 m->fs.fp_offset = m->fs.sp_offset;
11013 m->fs.fp_valid = true;
11014 }
11015 }
11016
11017 if (!int_registers_saved)
11018 {
11019 /* If saving registers via PUSH, do so now. */
11020 if (!frame.save_regs_using_mov)
11021 {
11022 ix86_emit_save_regs ();
11023 int_registers_saved = true;
11024 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11025 }
11026
11027 /* When using red zone we may start register saving before allocating
11028 the stack frame saving one cycle of the prologue. However, avoid
11029 doing this if we have to probe the stack; at least on x86_64 the
11030 stack probe can turn into a call that clobbers a red zone location. */
11031 else if (ix86_using_red_zone ()
11032 && (! TARGET_STACK_PROBE
11033 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11034 {
11035 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11036 int_registers_saved = true;
11037 }
11038 }
11039
11040 if (stack_realign_fp)
11041 {
11042 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11043 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11044
11045 /* The computation of the size of the re-aligned stack frame means
11046 that we must allocate the size of the register save area before
11047 performing the actual alignment. Otherwise we cannot guarantee
11048 that there's enough storage above the realignment point. */
11049 if (m->fs.sp_offset != frame.sse_reg_save_offset)
11050 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11051 GEN_INT (m->fs.sp_offset
11052 - frame.sse_reg_save_offset),
11053 -1, false);
11054
11055 /* Align the stack. */
11056 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11057 stack_pointer_rtx,
11058 GEN_INT (-align_bytes)));
11059
11060 /* For the purposes of register save area addressing, the stack
11061 pointer is no longer valid. As for the value of sp_offset,
11062 see ix86_compute_frame_layout, which we need to match in order
11063 to pass verification of stack_pointer_offset at the end. */
11064 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
11065 m->fs.sp_valid = false;
11066 }
11067
11068 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11069
11070 if (flag_stack_usage_info)
11071 {
11072 /* We start to count from ARG_POINTER. */
11073 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11074
11075 /* If it was realigned, take into account the fake frame. */
11076 if (stack_realign_drap)
11077 {
11078 if (ix86_static_chain_on_stack)
11079 stack_size += UNITS_PER_WORD;
11080
11081 if (!call_used_regs[REGNO (crtl->drap_reg)])
11082 stack_size += UNITS_PER_WORD;
11083
11084 /* This over-estimates by 1 minimal-stack-alignment-unit but
11085 mitigates that by counting in the new return address slot. */
11086 current_function_dynamic_stack_size
11087 += crtl->stack_alignment_needed / BITS_PER_UNIT;
11088 }
11089
11090 current_function_static_stack_size = stack_size;
11091 }
11092
11093 /* On SEH target with very large frame size, allocate an area to save
11094 SSE registers (as the very large allocation won't be described). */
11095 if (TARGET_SEH
11096 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11097 && !sse_registers_saved)
11098 {
11099 HOST_WIDE_INT sse_size =
11100 frame.sse_reg_save_offset - frame.reg_save_offset;
11101
11102 gcc_assert (int_registers_saved);
11103
11104 /* No need to do stack checking as the area will be immediately
11105 written. */
11106 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11107 GEN_INT (-sse_size), -1,
11108 m->fs.cfa_reg == stack_pointer_rtx);
11109 allocate -= sse_size;
11110 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11111 sse_registers_saved = true;
11112 }
11113
11114 /* The stack has already been decremented by the instruction calling us
11115 so probe if the size is non-negative to preserve the protection area. */
11116 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11117 {
11118 /* We expect the registers to be saved when probes are used. */
11119 gcc_assert (int_registers_saved);
11120
11121 if (STACK_CHECK_MOVING_SP)
11122 {
11123 if (!(crtl->is_leaf && !cfun->calls_alloca
11124 && allocate <= PROBE_INTERVAL))
11125 {
11126 ix86_adjust_stack_and_probe (allocate);
11127 allocate = 0;
11128 }
11129 }
11130 else
11131 {
11132 HOST_WIDE_INT size = allocate;
11133
11134 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11135 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11136
11137 if (TARGET_STACK_PROBE)
11138 {
11139 if (crtl->is_leaf && !cfun->calls_alloca)
11140 {
11141 if (size > PROBE_INTERVAL)
11142 ix86_emit_probe_stack_range (0, size);
11143 }
11144 else
11145 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11146 }
11147 else
11148 {
11149 if (crtl->is_leaf && !cfun->calls_alloca)
11150 {
11151 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11152 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11153 size - STACK_CHECK_PROTECT);
11154 }
11155 else
11156 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11157 }
11158 }
11159 }
11160
11161 if (allocate == 0)
11162 ;
11163 else if (!ix86_target_stack_probe ()
11164 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11165 {
11166 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11167 GEN_INT (-allocate), -1,
11168 m->fs.cfa_reg == stack_pointer_rtx);
11169 }
11170 else
11171 {
11172 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11173 rtx r10 = NULL;
11174 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11175 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11176 bool eax_live = ix86_eax_live_at_start_p ();
11177 bool r10_live = false;
11178
11179 if (TARGET_64BIT)
11180 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11181
11182 if (eax_live)
11183 {
11184 insn = emit_insn (gen_push (eax));
11185 allocate -= UNITS_PER_WORD;
11186 /* Note that SEH directives need to continue tracking the stack
11187 pointer even after the frame pointer has been set up. */
11188 if (sp_is_cfa_reg || TARGET_SEH)
11189 {
11190 if (sp_is_cfa_reg)
11191 m->fs.cfa_offset += UNITS_PER_WORD;
11192 RTX_FRAME_RELATED_P (insn) = 1;
11193 }
11194 }
11195
11196 if (r10_live)
11197 {
11198 r10 = gen_rtx_REG (Pmode, R10_REG);
11199 insn = emit_insn (gen_push (r10));
11200 allocate -= UNITS_PER_WORD;
11201 if (sp_is_cfa_reg || TARGET_SEH)
11202 {
11203 if (sp_is_cfa_reg)
11204 m->fs.cfa_offset += UNITS_PER_WORD;
11205 RTX_FRAME_RELATED_P (insn) = 1;
11206 }
11207 }
11208
11209 emit_move_insn (eax, GEN_INT (allocate));
11210 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11211
11212 /* Use the fact that AX still contains ALLOCATE. */
11213 adjust_stack_insn = (Pmode == DImode
11214 ? gen_pro_epilogue_adjust_stack_di_sub
11215 : gen_pro_epilogue_adjust_stack_si_sub);
11216
11217 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11218 stack_pointer_rtx, eax));
11219
11220 if (sp_is_cfa_reg || TARGET_SEH)
11221 {
11222 if (sp_is_cfa_reg)
11223 m->fs.cfa_offset += allocate;
11224 RTX_FRAME_RELATED_P (insn) = 1;
11225 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11226 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11227 plus_constant (Pmode, stack_pointer_rtx,
11228 -allocate)));
11229 }
11230 m->fs.sp_offset += allocate;
11231
11232 /* Use stack_pointer_rtx for relative addressing so that code
11233 works for realigned stack, too. */
11234 if (r10_live && eax_live)
11235 {
11236 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11237 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11238 gen_frame_mem (word_mode, t));
11239 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11240 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11241 gen_frame_mem (word_mode, t));
11242 }
11243 else if (eax_live || r10_live)
11244 {
11245 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11246 emit_move_insn (gen_rtx_REG (word_mode,
11247 (eax_live ? AX_REG : R10_REG)),
11248 gen_frame_mem (word_mode, t));
11249 }
11250 }
11251 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11252
11253 /* If we havn't already set up the frame pointer, do so now. */
11254 if (frame_pointer_needed && !m->fs.fp_valid)
11255 {
11256 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11257 GEN_INT (frame.stack_pointer_offset
11258 - frame.hard_frame_pointer_offset));
11259 insn = emit_insn (insn);
11260 RTX_FRAME_RELATED_P (insn) = 1;
11261 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11262
11263 if (m->fs.cfa_reg == stack_pointer_rtx)
11264 m->fs.cfa_reg = hard_frame_pointer_rtx;
11265 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11266 m->fs.fp_valid = true;
11267 }
11268
11269 if (!int_registers_saved)
11270 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11271 if (!sse_registers_saved)
11272 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11273
11274 if (crtl->drap_reg && !crtl->stack_realign_needed)
11275 {
11276 /* vDRAP is setup but after reload it turns out stack realign
11277 isn't necessary, here we will emit prologue to setup DRAP
11278 without stack realign adjustment */
11279 t = choose_baseaddr (0);
11280 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11281 }
11282
11283 /* Prevent instructions from being scheduled into register save push
11284 sequence when access to the redzone area is done through frame pointer.
11285 The offset between the frame pointer and the stack pointer is calculated
11286 relative to the value of the stack pointer at the end of the function
11287 prologue, and moving instructions that access redzone area via frame
11288 pointer inside push sequence violates this assumption. */
11289 if (frame_pointer_needed && frame.red_zone_size)
11290 emit_insn (gen_memory_blockage ());
11291
11292 /* Emit cld instruction if stringops are used in the function. */
11293 if (TARGET_CLD && ix86_current_function_needs_cld)
11294 emit_insn (gen_cld ());
11295
11296 /* SEH requires that the prologue end within 256 bytes of the start of
11297 the function. Prevent instruction schedules that would extend that.
11298 Further, prevent alloca modifications to the stack pointer from being
11299 combined with prologue modifications. */
11300 if (TARGET_SEH)
11301 emit_insn (gen_prologue_use (stack_pointer_rtx));
11302 }
11303
11304 /* Emit code to restore REG using a POP insn. */
11305
11306 static void
11307 ix86_emit_restore_reg_using_pop (rtx reg)
11308 {
11309 struct machine_function *m = cfun->machine;
11310 rtx insn = emit_insn (gen_pop (reg));
11311
11312 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11313 m->fs.sp_offset -= UNITS_PER_WORD;
11314
11315 if (m->fs.cfa_reg == crtl->drap_reg
11316 && REGNO (reg) == REGNO (crtl->drap_reg))
11317 {
11318 /* Previously we'd represented the CFA as an expression
11319 like *(%ebp - 8). We've just popped that value from
11320 the stack, which means we need to reset the CFA to
11321 the drap register. This will remain until we restore
11322 the stack pointer. */
11323 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11324 RTX_FRAME_RELATED_P (insn) = 1;
11325
11326 /* This means that the DRAP register is valid for addressing too. */
11327 m->fs.drap_valid = true;
11328 return;
11329 }
11330
11331 if (m->fs.cfa_reg == stack_pointer_rtx)
11332 {
11333 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11334 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11335 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11336 RTX_FRAME_RELATED_P (insn) = 1;
11337
11338 m->fs.cfa_offset -= UNITS_PER_WORD;
11339 }
11340
11341 /* When the frame pointer is the CFA, and we pop it, we are
11342 swapping back to the stack pointer as the CFA. This happens
11343 for stack frames that don't allocate other data, so we assume
11344 the stack pointer is now pointing at the return address, i.e.
11345 the function entry state, which makes the offset be 1 word. */
11346 if (reg == hard_frame_pointer_rtx)
11347 {
11348 m->fs.fp_valid = false;
11349 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11350 {
11351 m->fs.cfa_reg = stack_pointer_rtx;
11352 m->fs.cfa_offset -= UNITS_PER_WORD;
11353
11354 add_reg_note (insn, REG_CFA_DEF_CFA,
11355 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11356 GEN_INT (m->fs.cfa_offset)));
11357 RTX_FRAME_RELATED_P (insn) = 1;
11358 }
11359 }
11360 }
11361
11362 /* Emit code to restore saved registers using POP insns. */
11363
11364 static void
11365 ix86_emit_restore_regs_using_pop (void)
11366 {
11367 unsigned int regno;
11368
11369 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11370 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11371 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11372 }
11373
11374 /* Emit code and notes for the LEAVE instruction. */
11375
11376 static void
11377 ix86_emit_leave (void)
11378 {
11379 struct machine_function *m = cfun->machine;
11380 rtx insn = emit_insn (ix86_gen_leave ());
11381
11382 ix86_add_queued_cfa_restore_notes (insn);
11383
11384 gcc_assert (m->fs.fp_valid);
11385 m->fs.sp_valid = true;
11386 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11387 m->fs.fp_valid = false;
11388
11389 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11390 {
11391 m->fs.cfa_reg = stack_pointer_rtx;
11392 m->fs.cfa_offset = m->fs.sp_offset;
11393
11394 add_reg_note (insn, REG_CFA_DEF_CFA,
11395 plus_constant (Pmode, stack_pointer_rtx,
11396 m->fs.sp_offset));
11397 RTX_FRAME_RELATED_P (insn) = 1;
11398 }
11399 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11400 m->fs.fp_offset);
11401 }
11402
11403 /* Emit code to restore saved registers using MOV insns.
11404 First register is restored from CFA - CFA_OFFSET. */
11405 static void
11406 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11407 bool maybe_eh_return)
11408 {
11409 struct machine_function *m = cfun->machine;
11410 unsigned int regno;
11411
11412 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11413 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11414 {
11415 rtx reg = gen_rtx_REG (word_mode, regno);
11416 rtx insn, mem;
11417
11418 mem = choose_baseaddr (cfa_offset);
11419 mem = gen_frame_mem (word_mode, mem);
11420 insn = emit_move_insn (reg, mem);
11421
11422 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11423 {
11424 /* Previously we'd represented the CFA as an expression
11425 like *(%ebp - 8). We've just popped that value from
11426 the stack, which means we need to reset the CFA to
11427 the drap register. This will remain until we restore
11428 the stack pointer. */
11429 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11430 RTX_FRAME_RELATED_P (insn) = 1;
11431
11432 /* This means that the DRAP register is valid for addressing. */
11433 m->fs.drap_valid = true;
11434 }
11435 else
11436 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11437
11438 cfa_offset -= UNITS_PER_WORD;
11439 }
11440 }
11441
11442 /* Emit code to restore saved registers using MOV insns.
11443 First register is restored from CFA - CFA_OFFSET. */
11444 static void
11445 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11446 bool maybe_eh_return)
11447 {
11448 unsigned int regno;
11449
11450 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11451 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11452 {
11453 rtx reg = gen_rtx_REG (V4SFmode, regno);
11454 rtx mem;
11455
11456 mem = choose_baseaddr (cfa_offset);
11457 mem = gen_rtx_MEM (V4SFmode, mem);
11458 set_mem_align (mem, 128);
11459 emit_move_insn (reg, mem);
11460
11461 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11462
11463 cfa_offset -= 16;
11464 }
11465 }
11466
11467 /* Restore function stack, frame, and registers. */
11468
11469 void
11470 ix86_expand_epilogue (int style)
11471 {
11472 struct machine_function *m = cfun->machine;
11473 struct machine_frame_state frame_state_save = m->fs;
11474 struct ix86_frame frame;
11475 bool restore_regs_via_mov;
11476 bool using_drap;
11477
11478 ix86_finalize_stack_realign_flags ();
11479 ix86_compute_frame_layout (&frame);
11480
11481 m->fs.sp_valid = (!frame_pointer_needed
11482 || (crtl->sp_is_unchanging
11483 && !stack_realign_fp));
11484 gcc_assert (!m->fs.sp_valid
11485 || m->fs.sp_offset == frame.stack_pointer_offset);
11486
11487 /* The FP must be valid if the frame pointer is present. */
11488 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11489 gcc_assert (!m->fs.fp_valid
11490 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11491
11492 /* We must have *some* valid pointer to the stack frame. */
11493 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11494
11495 /* The DRAP is never valid at this point. */
11496 gcc_assert (!m->fs.drap_valid);
11497
11498 /* See the comment about red zone and frame
11499 pointer usage in ix86_expand_prologue. */
11500 if (frame_pointer_needed && frame.red_zone_size)
11501 emit_insn (gen_memory_blockage ());
11502
11503 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11504 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11505
11506 /* Determine the CFA offset of the end of the red-zone. */
11507 m->fs.red_zone_offset = 0;
11508 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11509 {
11510 /* The red-zone begins below the return address. */
11511 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11512
11513 /* When the register save area is in the aligned portion of
11514 the stack, determine the maximum runtime displacement that
11515 matches up with the aligned frame. */
11516 if (stack_realign_drap)
11517 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11518 + UNITS_PER_WORD);
11519 }
11520
11521 /* Special care must be taken for the normal return case of a function
11522 using eh_return: the eax and edx registers are marked as saved, but
11523 not restored along this path. Adjust the save location to match. */
11524 if (crtl->calls_eh_return && style != 2)
11525 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11526
11527 /* EH_RETURN requires the use of moves to function properly. */
11528 if (crtl->calls_eh_return)
11529 restore_regs_via_mov = true;
11530 /* SEH requires the use of pops to identify the epilogue. */
11531 else if (TARGET_SEH)
11532 restore_regs_via_mov = false;
11533 /* If we're only restoring one register and sp is not valid then
11534 using a move instruction to restore the register since it's
11535 less work than reloading sp and popping the register. */
11536 else if (!m->fs.sp_valid && frame.nregs <= 1)
11537 restore_regs_via_mov = true;
11538 else if (TARGET_EPILOGUE_USING_MOVE
11539 && cfun->machine->use_fast_prologue_epilogue
11540 && (frame.nregs > 1
11541 || m->fs.sp_offset != frame.reg_save_offset))
11542 restore_regs_via_mov = true;
11543 else if (frame_pointer_needed
11544 && !frame.nregs
11545 && m->fs.sp_offset != frame.reg_save_offset)
11546 restore_regs_via_mov = true;
11547 else if (frame_pointer_needed
11548 && TARGET_USE_LEAVE
11549 && cfun->machine->use_fast_prologue_epilogue
11550 && frame.nregs == 1)
11551 restore_regs_via_mov = true;
11552 else
11553 restore_regs_via_mov = false;
11554
11555 if (restore_regs_via_mov || frame.nsseregs)
11556 {
11557 /* Ensure that the entire register save area is addressable via
11558 the stack pointer, if we will restore via sp. */
11559 if (TARGET_64BIT
11560 && m->fs.sp_offset > 0x7fffffff
11561 && !(m->fs.fp_valid || m->fs.drap_valid)
11562 && (frame.nsseregs + frame.nregs) != 0)
11563 {
11564 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11565 GEN_INT (m->fs.sp_offset
11566 - frame.sse_reg_save_offset),
11567 style,
11568 m->fs.cfa_reg == stack_pointer_rtx);
11569 }
11570 }
11571
11572 /* If there are any SSE registers to restore, then we have to do it
11573 via moves, since there's obviously no pop for SSE regs. */
11574 if (frame.nsseregs)
11575 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11576 style == 2);
11577
11578 if (restore_regs_via_mov)
11579 {
11580 rtx t;
11581
11582 if (frame.nregs)
11583 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11584
11585 /* eh_return epilogues need %ecx added to the stack pointer. */
11586 if (style == 2)
11587 {
11588 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11589
11590 /* Stack align doesn't work with eh_return. */
11591 gcc_assert (!stack_realign_drap);
11592 /* Neither does regparm nested functions. */
11593 gcc_assert (!ix86_static_chain_on_stack);
11594
11595 if (frame_pointer_needed)
11596 {
11597 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11598 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11599 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11600
11601 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11602 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11603
11604 /* Note that we use SA as a temporary CFA, as the return
11605 address is at the proper place relative to it. We
11606 pretend this happens at the FP restore insn because
11607 prior to this insn the FP would be stored at the wrong
11608 offset relative to SA, and after this insn we have no
11609 other reasonable register to use for the CFA. We don't
11610 bother resetting the CFA to the SP for the duration of
11611 the return insn. */
11612 add_reg_note (insn, REG_CFA_DEF_CFA,
11613 plus_constant (Pmode, sa, UNITS_PER_WORD));
11614 ix86_add_queued_cfa_restore_notes (insn);
11615 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11616 RTX_FRAME_RELATED_P (insn) = 1;
11617
11618 m->fs.cfa_reg = sa;
11619 m->fs.cfa_offset = UNITS_PER_WORD;
11620 m->fs.fp_valid = false;
11621
11622 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11623 const0_rtx, style, false);
11624 }
11625 else
11626 {
11627 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11628 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11629 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11630 ix86_add_queued_cfa_restore_notes (insn);
11631
11632 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11633 if (m->fs.cfa_offset != UNITS_PER_WORD)
11634 {
11635 m->fs.cfa_offset = UNITS_PER_WORD;
11636 add_reg_note (insn, REG_CFA_DEF_CFA,
11637 plus_constant (Pmode, stack_pointer_rtx,
11638 UNITS_PER_WORD));
11639 RTX_FRAME_RELATED_P (insn) = 1;
11640 }
11641 }
11642 m->fs.sp_offset = UNITS_PER_WORD;
11643 m->fs.sp_valid = true;
11644 }
11645 }
11646 else
11647 {
11648 /* SEH requires that the function end with (1) a stack adjustment
11649 if necessary, (2) a sequence of pops, and (3) a return or
11650 jump instruction. Prevent insns from the function body from
11651 being scheduled into this sequence. */
11652 if (TARGET_SEH)
11653 {
11654 /* Prevent a catch region from being adjacent to the standard
11655 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11656 several other flags that would be interesting to test are
11657 not yet set up. */
11658 if (flag_non_call_exceptions)
11659 emit_insn (gen_nops (const1_rtx));
11660 else
11661 emit_insn (gen_blockage ());
11662 }
11663
11664 /* First step is to deallocate the stack frame so that we can
11665 pop the registers. Also do it on SEH target for very large
11666 frame as the emitted instructions aren't allowed by the ABI in
11667 epilogues. */
11668 if (!m->fs.sp_valid
11669 || (TARGET_SEH
11670 && (m->fs.sp_offset - frame.reg_save_offset
11671 >= SEH_MAX_FRAME_SIZE)))
11672 {
11673 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11674 GEN_INT (m->fs.fp_offset
11675 - frame.reg_save_offset),
11676 style, false);
11677 }
11678 else if (m->fs.sp_offset != frame.reg_save_offset)
11679 {
11680 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11681 GEN_INT (m->fs.sp_offset
11682 - frame.reg_save_offset),
11683 style,
11684 m->fs.cfa_reg == stack_pointer_rtx);
11685 }
11686
11687 ix86_emit_restore_regs_using_pop ();
11688 }
11689
11690 /* If we used a stack pointer and haven't already got rid of it,
11691 then do so now. */
11692 if (m->fs.fp_valid)
11693 {
11694 /* If the stack pointer is valid and pointing at the frame
11695 pointer store address, then we only need a pop. */
11696 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11697 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11698 /* Leave results in shorter dependency chains on CPUs that are
11699 able to grok it fast. */
11700 else if (TARGET_USE_LEAVE
11701 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11702 || !cfun->machine->use_fast_prologue_epilogue)
11703 ix86_emit_leave ();
11704 else
11705 {
11706 pro_epilogue_adjust_stack (stack_pointer_rtx,
11707 hard_frame_pointer_rtx,
11708 const0_rtx, style, !using_drap);
11709 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11710 }
11711 }
11712
11713 if (using_drap)
11714 {
11715 int param_ptr_offset = UNITS_PER_WORD;
11716 rtx insn;
11717
11718 gcc_assert (stack_realign_drap);
11719
11720 if (ix86_static_chain_on_stack)
11721 param_ptr_offset += UNITS_PER_WORD;
11722 if (!call_used_regs[REGNO (crtl->drap_reg)])
11723 param_ptr_offset += UNITS_PER_WORD;
11724
11725 insn = emit_insn (gen_rtx_SET
11726 (VOIDmode, stack_pointer_rtx,
11727 gen_rtx_PLUS (Pmode,
11728 crtl->drap_reg,
11729 GEN_INT (-param_ptr_offset))));
11730 m->fs.cfa_reg = stack_pointer_rtx;
11731 m->fs.cfa_offset = param_ptr_offset;
11732 m->fs.sp_offset = param_ptr_offset;
11733 m->fs.realigned = false;
11734
11735 add_reg_note (insn, REG_CFA_DEF_CFA,
11736 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11737 GEN_INT (param_ptr_offset)));
11738 RTX_FRAME_RELATED_P (insn) = 1;
11739
11740 if (!call_used_regs[REGNO (crtl->drap_reg)])
11741 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11742 }
11743
11744 /* At this point the stack pointer must be valid, and we must have
11745 restored all of the registers. We may not have deallocated the
11746 entire stack frame. We've delayed this until now because it may
11747 be possible to merge the local stack deallocation with the
11748 deallocation forced by ix86_static_chain_on_stack. */
11749 gcc_assert (m->fs.sp_valid);
11750 gcc_assert (!m->fs.fp_valid);
11751 gcc_assert (!m->fs.realigned);
11752 if (m->fs.sp_offset != UNITS_PER_WORD)
11753 {
11754 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11755 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11756 style, true);
11757 }
11758 else
11759 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11760
11761 /* Sibcall epilogues don't want a return instruction. */
11762 if (style == 0)
11763 {
11764 m->fs = frame_state_save;
11765 return;
11766 }
11767
11768 if (crtl->args.pops_args && crtl->args.size)
11769 {
11770 rtx popc = GEN_INT (crtl->args.pops_args);
11771
11772 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11773 address, do explicit add, and jump indirectly to the caller. */
11774
11775 if (crtl->args.pops_args >= 65536)
11776 {
11777 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11778 rtx insn;
11779
11780 /* There is no "pascal" calling convention in any 64bit ABI. */
11781 gcc_assert (!TARGET_64BIT);
11782
11783 insn = emit_insn (gen_pop (ecx));
11784 m->fs.cfa_offset -= UNITS_PER_WORD;
11785 m->fs.sp_offset -= UNITS_PER_WORD;
11786
11787 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11788 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11789 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11790 add_reg_note (insn, REG_CFA_REGISTER,
11791 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11792 RTX_FRAME_RELATED_P (insn) = 1;
11793
11794 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11795 popc, -1, true);
11796 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11797 }
11798 else
11799 emit_jump_insn (gen_simple_return_pop_internal (popc));
11800 }
11801 else
11802 emit_jump_insn (gen_simple_return_internal ());
11803
11804 /* Restore the state back to the state from the prologue,
11805 so that it's correct for the next epilogue. */
11806 m->fs = frame_state_save;
11807 }
11808
11809 /* Reset from the function's potential modifications. */
11810
11811 static void
11812 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
11813 {
11814 if (pic_offset_table_rtx
11815 && !ix86_use_pseudo_pic_reg ())
11816 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11817 #if TARGET_MACHO
11818 /* Mach-O doesn't support labels at the end of objects, so if
11819 it looks like we might want one, insert a NOP. */
11820 {
11821 rtx_insn *insn = get_last_insn ();
11822 rtx_insn *deleted_debug_label = NULL;
11823 while (insn
11824 && NOTE_P (insn)
11825 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11826 {
11827 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11828 notes only, instead set their CODE_LABEL_NUMBER to -1,
11829 otherwise there would be code generation differences
11830 in between -g and -g0. */
11831 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11832 deleted_debug_label = insn;
11833 insn = PREV_INSN (insn);
11834 }
11835 if (insn
11836 && (LABEL_P (insn)
11837 || (NOTE_P (insn)
11838 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11839 fputs ("\tnop\n", file);
11840 else if (deleted_debug_label)
11841 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11842 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11843 CODE_LABEL_NUMBER (insn) = -1;
11844 }
11845 #endif
11846
11847 }
11848
11849 /* Return a scratch register to use in the split stack prologue. The
11850 split stack prologue is used for -fsplit-stack. It is the first
11851 instructions in the function, even before the regular prologue.
11852 The scratch register can be any caller-saved register which is not
11853 used for parameters or for the static chain. */
11854
11855 static unsigned int
11856 split_stack_prologue_scratch_regno (void)
11857 {
11858 if (TARGET_64BIT)
11859 return R11_REG;
11860 else
11861 {
11862 bool is_fastcall, is_thiscall;
11863 int regparm;
11864
11865 is_fastcall = (lookup_attribute ("fastcall",
11866 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11867 != NULL);
11868 is_thiscall = (lookup_attribute ("thiscall",
11869 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11870 != NULL);
11871 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11872
11873 if (is_fastcall)
11874 {
11875 if (DECL_STATIC_CHAIN (cfun->decl))
11876 {
11877 sorry ("-fsplit-stack does not support fastcall with "
11878 "nested function");
11879 return INVALID_REGNUM;
11880 }
11881 return AX_REG;
11882 }
11883 else if (is_thiscall)
11884 {
11885 if (!DECL_STATIC_CHAIN (cfun->decl))
11886 return DX_REG;
11887 return AX_REG;
11888 }
11889 else if (regparm < 3)
11890 {
11891 if (!DECL_STATIC_CHAIN (cfun->decl))
11892 return CX_REG;
11893 else
11894 {
11895 if (regparm >= 2)
11896 {
11897 sorry ("-fsplit-stack does not support 2 register "
11898 "parameters for a nested function");
11899 return INVALID_REGNUM;
11900 }
11901 return DX_REG;
11902 }
11903 }
11904 else
11905 {
11906 /* FIXME: We could make this work by pushing a register
11907 around the addition and comparison. */
11908 sorry ("-fsplit-stack does not support 3 register parameters");
11909 return INVALID_REGNUM;
11910 }
11911 }
11912 }
11913
11914 /* A SYMBOL_REF for the function which allocates new stackspace for
11915 -fsplit-stack. */
11916
11917 static GTY(()) rtx split_stack_fn;
11918
11919 /* A SYMBOL_REF for the more stack function when using the large
11920 model. */
11921
11922 static GTY(()) rtx split_stack_fn_large;
11923
11924 /* Handle -fsplit-stack. These are the first instructions in the
11925 function, even before the regular prologue. */
11926
11927 void
11928 ix86_expand_split_stack_prologue (void)
11929 {
11930 struct ix86_frame frame;
11931 HOST_WIDE_INT allocate;
11932 unsigned HOST_WIDE_INT args_size;
11933 rtx_code_label *label;
11934 rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11935 rtx scratch_reg = NULL_RTX;
11936 rtx_code_label *varargs_label = NULL;
11937 rtx fn;
11938
11939 gcc_assert (flag_split_stack && reload_completed);
11940
11941 ix86_finalize_stack_realign_flags ();
11942 ix86_compute_frame_layout (&frame);
11943 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11944
11945 /* This is the label we will branch to if we have enough stack
11946 space. We expect the basic block reordering pass to reverse this
11947 branch if optimizing, so that we branch in the unlikely case. */
11948 label = gen_label_rtx ();
11949
11950 /* We need to compare the stack pointer minus the frame size with
11951 the stack boundary in the TCB. The stack boundary always gives
11952 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11953 can compare directly. Otherwise we need to do an addition. */
11954
11955 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11956 UNSPEC_STACK_CHECK);
11957 limit = gen_rtx_CONST (Pmode, limit);
11958 limit = gen_rtx_MEM (Pmode, limit);
11959 if (allocate < SPLIT_STACK_AVAILABLE)
11960 current = stack_pointer_rtx;
11961 else
11962 {
11963 unsigned int scratch_regno;
11964 rtx offset;
11965
11966 /* We need a scratch register to hold the stack pointer minus
11967 the required frame size. Since this is the very start of the
11968 function, the scratch register can be any caller-saved
11969 register which is not used for parameters. */
11970 offset = GEN_INT (- allocate);
11971 scratch_regno = split_stack_prologue_scratch_regno ();
11972 if (scratch_regno == INVALID_REGNUM)
11973 return;
11974 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11975 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11976 {
11977 /* We don't use ix86_gen_add3 in this case because it will
11978 want to split to lea, but when not optimizing the insn
11979 will not be split after this point. */
11980 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11981 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11982 offset)));
11983 }
11984 else
11985 {
11986 emit_move_insn (scratch_reg, offset);
11987 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11988 stack_pointer_rtx));
11989 }
11990 current = scratch_reg;
11991 }
11992
11993 ix86_expand_branch (GEU, current, limit, label);
11994 jump_insn = get_last_insn ();
11995 JUMP_LABEL (jump_insn) = label;
11996
11997 /* Mark the jump as very likely to be taken. */
11998 add_int_reg_note (jump_insn, REG_BR_PROB,
11999 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12000
12001 if (split_stack_fn == NULL_RTX)
12002 {
12003 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12004 SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12005 }
12006 fn = split_stack_fn;
12007
12008 /* Get more stack space. We pass in the desired stack space and the
12009 size of the arguments to copy to the new stack. In 32-bit mode
12010 we push the parameters; __morestack will return on a new stack
12011 anyhow. In 64-bit mode we pass the parameters in r10 and
12012 r11. */
12013 allocate_rtx = GEN_INT (allocate);
12014 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12015 call_fusage = NULL_RTX;
12016 if (TARGET_64BIT)
12017 {
12018 rtx reg10, reg11;
12019
12020 reg10 = gen_rtx_REG (Pmode, R10_REG);
12021 reg11 = gen_rtx_REG (Pmode, R11_REG);
12022
12023 /* If this function uses a static chain, it will be in %r10.
12024 Preserve it across the call to __morestack. */
12025 if (DECL_STATIC_CHAIN (cfun->decl))
12026 {
12027 rtx rax;
12028
12029 rax = gen_rtx_REG (word_mode, AX_REG);
12030 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12031 use_reg (&call_fusage, rax);
12032 }
12033
12034 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12035 && !TARGET_PECOFF)
12036 {
12037 HOST_WIDE_INT argval;
12038
12039 gcc_assert (Pmode == DImode);
12040 /* When using the large model we need to load the address
12041 into a register, and we've run out of registers. So we
12042 switch to a different calling convention, and we call a
12043 different function: __morestack_large. We pass the
12044 argument size in the upper 32 bits of r10 and pass the
12045 frame size in the lower 32 bits. */
12046 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12047 gcc_assert ((args_size & 0xffffffff) == args_size);
12048
12049 if (split_stack_fn_large == NULL_RTX)
12050 {
12051 split_stack_fn_large =
12052 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12053 SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12054 }
12055 if (ix86_cmodel == CM_LARGE_PIC)
12056 {
12057 rtx_code_label *label;
12058 rtx x;
12059
12060 label = gen_label_rtx ();
12061 emit_label (label);
12062 LABEL_PRESERVE_P (label) = 1;
12063 emit_insn (gen_set_rip_rex64 (reg10, label));
12064 emit_insn (gen_set_got_offset_rex64 (reg11, label));
12065 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12066 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12067 UNSPEC_GOT);
12068 x = gen_rtx_CONST (Pmode, x);
12069 emit_move_insn (reg11, x);
12070 x = gen_rtx_PLUS (Pmode, reg10, reg11);
12071 x = gen_const_mem (Pmode, x);
12072 emit_move_insn (reg11, x);
12073 }
12074 else
12075 emit_move_insn (reg11, split_stack_fn_large);
12076
12077 fn = reg11;
12078
12079 argval = ((args_size << 16) << 16) + allocate;
12080 emit_move_insn (reg10, GEN_INT (argval));
12081 }
12082 else
12083 {
12084 emit_move_insn (reg10, allocate_rtx);
12085 emit_move_insn (reg11, GEN_INT (args_size));
12086 use_reg (&call_fusage, reg11);
12087 }
12088
12089 use_reg (&call_fusage, reg10);
12090 }
12091 else
12092 {
12093 emit_insn (gen_push (GEN_INT (args_size)));
12094 emit_insn (gen_push (allocate_rtx));
12095 }
12096 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12097 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12098 NULL_RTX, false);
12099 add_function_usage_to (call_insn, call_fusage);
12100
12101 /* In order to make call/return prediction work right, we now need
12102 to execute a return instruction. See
12103 libgcc/config/i386/morestack.S for the details on how this works.
12104
12105 For flow purposes gcc must not see this as a return
12106 instruction--we need control flow to continue at the subsequent
12107 label. Therefore, we use an unspec. */
12108 gcc_assert (crtl->args.pops_args < 65536);
12109 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12110
12111 /* If we are in 64-bit mode and this function uses a static chain,
12112 we saved %r10 in %rax before calling _morestack. */
12113 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12114 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12115 gen_rtx_REG (word_mode, AX_REG));
12116
12117 /* If this function calls va_start, we need to store a pointer to
12118 the arguments on the old stack, because they may not have been
12119 all copied to the new stack. At this point the old stack can be
12120 found at the frame pointer value used by __morestack, because
12121 __morestack has set that up before calling back to us. Here we
12122 store that pointer in a scratch register, and in
12123 ix86_expand_prologue we store the scratch register in a stack
12124 slot. */
12125 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12126 {
12127 unsigned int scratch_regno;
12128 rtx frame_reg;
12129 int words;
12130
12131 scratch_regno = split_stack_prologue_scratch_regno ();
12132 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12133 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12134
12135 /* 64-bit:
12136 fp -> old fp value
12137 return address within this function
12138 return address of caller of this function
12139 stack arguments
12140 So we add three words to get to the stack arguments.
12141
12142 32-bit:
12143 fp -> old fp value
12144 return address within this function
12145 first argument to __morestack
12146 second argument to __morestack
12147 return address of caller of this function
12148 stack arguments
12149 So we add five words to get to the stack arguments.
12150 */
12151 words = TARGET_64BIT ? 3 : 5;
12152 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12153 gen_rtx_PLUS (Pmode, frame_reg,
12154 GEN_INT (words * UNITS_PER_WORD))));
12155
12156 varargs_label = gen_label_rtx ();
12157 emit_jump_insn (gen_jump (varargs_label));
12158 JUMP_LABEL (get_last_insn ()) = varargs_label;
12159
12160 emit_barrier ();
12161 }
12162
12163 emit_label (label);
12164 LABEL_NUSES (label) = 1;
12165
12166 /* If this function calls va_start, we now have to set the scratch
12167 register for the case where we do not call __morestack. In this
12168 case we need to set it based on the stack pointer. */
12169 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12170 {
12171 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12172 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12173 GEN_INT (UNITS_PER_WORD))));
12174
12175 emit_label (varargs_label);
12176 LABEL_NUSES (varargs_label) = 1;
12177 }
12178 }
12179
12180 /* We may have to tell the dataflow pass that the split stack prologue
12181 is initializing a scratch register. */
12182
12183 static void
12184 ix86_live_on_entry (bitmap regs)
12185 {
12186 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12187 {
12188 gcc_assert (flag_split_stack);
12189 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12190 }
12191 }
12192 \f
12193 /* Extract the parts of an RTL expression that is a valid memory address
12194 for an instruction. Return 0 if the structure of the address is
12195 grossly off. Return -1 if the address contains ASHIFT, so it is not
12196 strictly valid, but still used for computing length of lea instruction. */
12197
12198 int
12199 ix86_decompose_address (rtx addr, struct ix86_address *out)
12200 {
12201 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12202 rtx base_reg, index_reg;
12203 HOST_WIDE_INT scale = 1;
12204 rtx scale_rtx = NULL_RTX;
12205 rtx tmp;
12206 int retval = 1;
12207 enum ix86_address_seg seg = SEG_DEFAULT;
12208
12209 /* Allow zero-extended SImode addresses,
12210 they will be emitted with addr32 prefix. */
12211 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12212 {
12213 if (GET_CODE (addr) == ZERO_EXTEND
12214 && GET_MODE (XEXP (addr, 0)) == SImode)
12215 {
12216 addr = XEXP (addr, 0);
12217 if (CONST_INT_P (addr))
12218 return 0;
12219 }
12220 else if (GET_CODE (addr) == AND
12221 && const_32bit_mask (XEXP (addr, 1), DImode))
12222 {
12223 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12224 if (addr == NULL_RTX)
12225 return 0;
12226
12227 if (CONST_INT_P (addr))
12228 return 0;
12229 }
12230 }
12231
12232 /* Allow SImode subregs of DImode addresses,
12233 they will be emitted with addr32 prefix. */
12234 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12235 {
12236 if (GET_CODE (addr) == SUBREG
12237 && GET_MODE (SUBREG_REG (addr)) == DImode)
12238 {
12239 addr = SUBREG_REG (addr);
12240 if (CONST_INT_P (addr))
12241 return 0;
12242 }
12243 }
12244
12245 if (REG_P (addr))
12246 base = addr;
12247 else if (GET_CODE (addr) == SUBREG)
12248 {
12249 if (REG_P (SUBREG_REG (addr)))
12250 base = addr;
12251 else
12252 return 0;
12253 }
12254 else if (GET_CODE (addr) == PLUS)
12255 {
12256 rtx addends[4], op;
12257 int n = 0, i;
12258
12259 op = addr;
12260 do
12261 {
12262 if (n >= 4)
12263 return 0;
12264 addends[n++] = XEXP (op, 1);
12265 op = XEXP (op, 0);
12266 }
12267 while (GET_CODE (op) == PLUS);
12268 if (n >= 4)
12269 return 0;
12270 addends[n] = op;
12271
12272 for (i = n; i >= 0; --i)
12273 {
12274 op = addends[i];
12275 switch (GET_CODE (op))
12276 {
12277 case MULT:
12278 if (index)
12279 return 0;
12280 index = XEXP (op, 0);
12281 scale_rtx = XEXP (op, 1);
12282 break;
12283
12284 case ASHIFT:
12285 if (index)
12286 return 0;
12287 index = XEXP (op, 0);
12288 tmp = XEXP (op, 1);
12289 if (!CONST_INT_P (tmp))
12290 return 0;
12291 scale = INTVAL (tmp);
12292 if ((unsigned HOST_WIDE_INT) scale > 3)
12293 return 0;
12294 scale = 1 << scale;
12295 break;
12296
12297 case ZERO_EXTEND:
12298 op = XEXP (op, 0);
12299 if (GET_CODE (op) != UNSPEC)
12300 return 0;
12301 /* FALLTHRU */
12302
12303 case UNSPEC:
12304 if (XINT (op, 1) == UNSPEC_TP
12305 && TARGET_TLS_DIRECT_SEG_REFS
12306 && seg == SEG_DEFAULT)
12307 seg = DEFAULT_TLS_SEG_REG;
12308 else
12309 return 0;
12310 break;
12311
12312 case SUBREG:
12313 if (!REG_P (SUBREG_REG (op)))
12314 return 0;
12315 /* FALLTHRU */
12316
12317 case REG:
12318 if (!base)
12319 base = op;
12320 else if (!index)
12321 index = op;
12322 else
12323 return 0;
12324 break;
12325
12326 case CONST:
12327 case CONST_INT:
12328 case SYMBOL_REF:
12329 case LABEL_REF:
12330 if (disp)
12331 return 0;
12332 disp = op;
12333 break;
12334
12335 default:
12336 return 0;
12337 }
12338 }
12339 }
12340 else if (GET_CODE (addr) == MULT)
12341 {
12342 index = XEXP (addr, 0); /* index*scale */
12343 scale_rtx = XEXP (addr, 1);
12344 }
12345 else if (GET_CODE (addr) == ASHIFT)
12346 {
12347 /* We're called for lea too, which implements ashift on occasion. */
12348 index = XEXP (addr, 0);
12349 tmp = XEXP (addr, 1);
12350 if (!CONST_INT_P (tmp))
12351 return 0;
12352 scale = INTVAL (tmp);
12353 if ((unsigned HOST_WIDE_INT) scale > 3)
12354 return 0;
12355 scale = 1 << scale;
12356 retval = -1;
12357 }
12358 else
12359 disp = addr; /* displacement */
12360
12361 if (index)
12362 {
12363 if (REG_P (index))
12364 ;
12365 else if (GET_CODE (index) == SUBREG
12366 && REG_P (SUBREG_REG (index)))
12367 ;
12368 else
12369 return 0;
12370 }
12371
12372 /* Extract the integral value of scale. */
12373 if (scale_rtx)
12374 {
12375 if (!CONST_INT_P (scale_rtx))
12376 return 0;
12377 scale = INTVAL (scale_rtx);
12378 }
12379
12380 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12381 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12382
12383 /* Avoid useless 0 displacement. */
12384 if (disp == const0_rtx && (base || index))
12385 disp = NULL_RTX;
12386
12387 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12388 if (base_reg && index_reg && scale == 1
12389 && (index_reg == arg_pointer_rtx
12390 || index_reg == frame_pointer_rtx
12391 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12392 {
12393 rtx tmp;
12394 tmp = base, base = index, index = tmp;
12395 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12396 }
12397
12398 /* Special case: %ebp cannot be encoded as a base without a displacement.
12399 Similarly %r13. */
12400 if (!disp
12401 && base_reg
12402 && (base_reg == hard_frame_pointer_rtx
12403 || base_reg == frame_pointer_rtx
12404 || base_reg == arg_pointer_rtx
12405 || (REG_P (base_reg)
12406 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12407 || REGNO (base_reg) == R13_REG))))
12408 disp = const0_rtx;
12409
12410 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12411 Avoid this by transforming to [%esi+0].
12412 Reload calls address legitimization without cfun defined, so we need
12413 to test cfun for being non-NULL. */
12414 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12415 && base_reg && !index_reg && !disp
12416 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12417 disp = const0_rtx;
12418
12419 /* Special case: encode reg+reg instead of reg*2. */
12420 if (!base && index && scale == 2)
12421 base = index, base_reg = index_reg, scale = 1;
12422
12423 /* Special case: scaling cannot be encoded without base or displacement. */
12424 if (!base && !disp && index && scale != 1)
12425 disp = const0_rtx;
12426
12427 out->base = base;
12428 out->index = index;
12429 out->disp = disp;
12430 out->scale = scale;
12431 out->seg = seg;
12432
12433 return retval;
12434 }
12435 \f
12436 /* Return cost of the memory address x.
12437 For i386, it is better to use a complex address than let gcc copy
12438 the address into a reg and make a new pseudo. But not if the address
12439 requires to two regs - that would mean more pseudos with longer
12440 lifetimes. */
12441 static int
12442 ix86_address_cost (rtx x, enum machine_mode, addr_space_t, bool)
12443 {
12444 struct ix86_address parts;
12445 int cost = 1;
12446 int ok = ix86_decompose_address (x, &parts);
12447
12448 gcc_assert (ok);
12449
12450 if (parts.base && GET_CODE (parts.base) == SUBREG)
12451 parts.base = SUBREG_REG (parts.base);
12452 if (parts.index && GET_CODE (parts.index) == SUBREG)
12453 parts.index = SUBREG_REG (parts.index);
12454
12455 /* Attempt to minimize number of registers in the address. */
12456 if ((parts.base
12457 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12458 || (parts.index
12459 && (!REG_P (parts.index)
12460 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12461 cost++;
12462
12463 /* When address base or index is "pic_offset_table_rtx" we don't increase
12464 address cost. When a memopt with "pic_offset_table_rtx" is not invariant
12465 itself it most likely means that base or index is not invariant.
12466 Therefore only "pic_offset_table_rtx" could be hoisted out, which is not
12467 profitable for x86. */
12468 if (parts.base
12469 && (!pic_offset_table_rtx
12470 || REGNO (pic_offset_table_rtx) != REGNO(parts.base))
12471 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12472 && parts.index
12473 && (!pic_offset_table_rtx
12474 || REGNO (pic_offset_table_rtx) != REGNO(parts.index))
12475 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12476 && parts.base != parts.index)
12477 cost++;
12478
12479 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12480 since it's predecode logic can't detect the length of instructions
12481 and it degenerates to vector decoded. Increase cost of such
12482 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12483 to split such addresses or even refuse such addresses at all.
12484
12485 Following addressing modes are affected:
12486 [base+scale*index]
12487 [scale*index+disp]
12488 [base+index]
12489
12490 The first and last case may be avoidable by explicitly coding the zero in
12491 memory address, but I don't have AMD-K6 machine handy to check this
12492 theory. */
12493
12494 if (TARGET_K6
12495 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12496 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12497 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12498 cost += 10;
12499
12500 return cost;
12501 }
12502 \f
12503 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12504 this is used for to form addresses to local data when -fPIC is in
12505 use. */
12506
12507 static bool
12508 darwin_local_data_pic (rtx disp)
12509 {
12510 return (GET_CODE (disp) == UNSPEC
12511 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12512 }
12513
12514 /* Determine if a given RTX is a valid constant. We already know this
12515 satisfies CONSTANT_P. */
12516
12517 static bool
12518 ix86_legitimate_constant_p (enum machine_mode, rtx x)
12519 {
12520 switch (GET_CODE (x))
12521 {
12522 case CONST:
12523 x = XEXP (x, 0);
12524
12525 if (GET_CODE (x) == PLUS)
12526 {
12527 if (!CONST_INT_P (XEXP (x, 1)))
12528 return false;
12529 x = XEXP (x, 0);
12530 }
12531
12532 if (TARGET_MACHO && darwin_local_data_pic (x))
12533 return true;
12534
12535 /* Only some unspecs are valid as "constants". */
12536 if (GET_CODE (x) == UNSPEC)
12537 switch (XINT (x, 1))
12538 {
12539 case UNSPEC_GOT:
12540 case UNSPEC_GOTOFF:
12541 case UNSPEC_PLTOFF:
12542 return TARGET_64BIT;
12543 case UNSPEC_TPOFF:
12544 case UNSPEC_NTPOFF:
12545 x = XVECEXP (x, 0, 0);
12546 return (GET_CODE (x) == SYMBOL_REF
12547 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12548 case UNSPEC_DTPOFF:
12549 x = XVECEXP (x, 0, 0);
12550 return (GET_CODE (x) == SYMBOL_REF
12551 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12552 default:
12553 return false;
12554 }
12555
12556 /* We must have drilled down to a symbol. */
12557 if (GET_CODE (x) == LABEL_REF)
12558 return true;
12559 if (GET_CODE (x) != SYMBOL_REF)
12560 return false;
12561 /* FALLTHRU */
12562
12563 case SYMBOL_REF:
12564 /* TLS symbols are never valid. */
12565 if (SYMBOL_REF_TLS_MODEL (x))
12566 return false;
12567
12568 /* DLLIMPORT symbols are never valid. */
12569 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12570 && SYMBOL_REF_DLLIMPORT_P (x))
12571 return false;
12572
12573 #if TARGET_MACHO
12574 /* mdynamic-no-pic */
12575 if (MACHO_DYNAMIC_NO_PIC_P)
12576 return machopic_symbol_defined_p (x);
12577 #endif
12578 break;
12579
12580 case CONST_DOUBLE:
12581 if (GET_MODE (x) == TImode
12582 && x != CONST0_RTX (TImode)
12583 && !TARGET_64BIT)
12584 return false;
12585 break;
12586
12587 case CONST_VECTOR:
12588 if (!standard_sse_constant_p (x))
12589 return false;
12590
12591 default:
12592 break;
12593 }
12594
12595 /* Otherwise we handle everything else in the move patterns. */
12596 return true;
12597 }
12598
12599 /* Determine if it's legal to put X into the constant pool. This
12600 is not possible for the address of thread-local symbols, which
12601 is checked above. */
12602
12603 static bool
12604 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12605 {
12606 /* We can always put integral constants and vectors in memory. */
12607 switch (GET_CODE (x))
12608 {
12609 case CONST_INT:
12610 case CONST_DOUBLE:
12611 case CONST_VECTOR:
12612 return false;
12613
12614 default:
12615 break;
12616 }
12617 return !ix86_legitimate_constant_p (mode, x);
12618 }
12619
12620 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12621 otherwise zero. */
12622
12623 static bool
12624 is_imported_p (rtx x)
12625 {
12626 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12627 || GET_CODE (x) != SYMBOL_REF)
12628 return false;
12629
12630 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12631 }
12632
12633
12634 /* Nonzero if the constant value X is a legitimate general operand
12635 when generating PIC code. It is given that flag_pic is on and
12636 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12637
12638 bool
12639 legitimate_pic_operand_p (rtx x)
12640 {
12641 rtx inner;
12642
12643 switch (GET_CODE (x))
12644 {
12645 case CONST:
12646 inner = XEXP (x, 0);
12647 if (GET_CODE (inner) == PLUS
12648 && CONST_INT_P (XEXP (inner, 1)))
12649 inner = XEXP (inner, 0);
12650
12651 /* Only some unspecs are valid as "constants". */
12652 if (GET_CODE (inner) == UNSPEC)
12653 switch (XINT (inner, 1))
12654 {
12655 case UNSPEC_GOT:
12656 case UNSPEC_GOTOFF:
12657 case UNSPEC_PLTOFF:
12658 return TARGET_64BIT;
12659 case UNSPEC_TPOFF:
12660 x = XVECEXP (inner, 0, 0);
12661 return (GET_CODE (x) == SYMBOL_REF
12662 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12663 case UNSPEC_MACHOPIC_OFFSET:
12664 return legitimate_pic_address_disp_p (x);
12665 default:
12666 return false;
12667 }
12668 /* FALLTHRU */
12669
12670 case SYMBOL_REF:
12671 case LABEL_REF:
12672 return legitimate_pic_address_disp_p (x);
12673
12674 default:
12675 return true;
12676 }
12677 }
12678
12679 /* Determine if a given CONST RTX is a valid memory displacement
12680 in PIC mode. */
12681
12682 bool
12683 legitimate_pic_address_disp_p (rtx disp)
12684 {
12685 bool saw_plus;
12686
12687 /* In 64bit mode we can allow direct addresses of symbols and labels
12688 when they are not dynamic symbols. */
12689 if (TARGET_64BIT)
12690 {
12691 rtx op0 = disp, op1;
12692
12693 switch (GET_CODE (disp))
12694 {
12695 case LABEL_REF:
12696 return true;
12697
12698 case CONST:
12699 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12700 break;
12701 op0 = XEXP (XEXP (disp, 0), 0);
12702 op1 = XEXP (XEXP (disp, 0), 1);
12703 if (!CONST_INT_P (op1)
12704 || INTVAL (op1) >= 16*1024*1024
12705 || INTVAL (op1) < -16*1024*1024)
12706 break;
12707 if (GET_CODE (op0) == LABEL_REF)
12708 return true;
12709 if (GET_CODE (op0) == CONST
12710 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12711 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12712 return true;
12713 if (GET_CODE (op0) == UNSPEC
12714 && XINT (op0, 1) == UNSPEC_PCREL)
12715 return true;
12716 if (GET_CODE (op0) != SYMBOL_REF)
12717 break;
12718 /* FALLTHRU */
12719
12720 case SYMBOL_REF:
12721 /* TLS references should always be enclosed in UNSPEC.
12722 The dllimported symbol needs always to be resolved. */
12723 if (SYMBOL_REF_TLS_MODEL (op0)
12724 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12725 return false;
12726
12727 if (TARGET_PECOFF)
12728 {
12729 if (is_imported_p (op0))
12730 return true;
12731
12732 if (SYMBOL_REF_FAR_ADDR_P (op0)
12733 || !SYMBOL_REF_LOCAL_P (op0))
12734 break;
12735
12736 /* Function-symbols need to be resolved only for
12737 large-model.
12738 For the small-model we don't need to resolve anything
12739 here. */
12740 if ((ix86_cmodel != CM_LARGE_PIC
12741 && SYMBOL_REF_FUNCTION_P (op0))
12742 || ix86_cmodel == CM_SMALL_PIC)
12743 return true;
12744 /* Non-external symbols don't need to be resolved for
12745 large, and medium-model. */
12746 if ((ix86_cmodel == CM_LARGE_PIC
12747 || ix86_cmodel == CM_MEDIUM_PIC)
12748 && !SYMBOL_REF_EXTERNAL_P (op0))
12749 return true;
12750 }
12751 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12752 && SYMBOL_REF_LOCAL_P (op0)
12753 && ix86_cmodel != CM_LARGE_PIC)
12754 return true;
12755 break;
12756
12757 default:
12758 break;
12759 }
12760 }
12761 if (GET_CODE (disp) != CONST)
12762 return false;
12763 disp = XEXP (disp, 0);
12764
12765 if (TARGET_64BIT)
12766 {
12767 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12768 of GOT tables. We should not need these anyway. */
12769 if (GET_CODE (disp) != UNSPEC
12770 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12771 && XINT (disp, 1) != UNSPEC_GOTOFF
12772 && XINT (disp, 1) != UNSPEC_PCREL
12773 && XINT (disp, 1) != UNSPEC_PLTOFF))
12774 return false;
12775
12776 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12777 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12778 return false;
12779 return true;
12780 }
12781
12782 saw_plus = false;
12783 if (GET_CODE (disp) == PLUS)
12784 {
12785 if (!CONST_INT_P (XEXP (disp, 1)))
12786 return false;
12787 disp = XEXP (disp, 0);
12788 saw_plus = true;
12789 }
12790
12791 if (TARGET_MACHO && darwin_local_data_pic (disp))
12792 return true;
12793
12794 if (GET_CODE (disp) != UNSPEC)
12795 return false;
12796
12797 switch (XINT (disp, 1))
12798 {
12799 case UNSPEC_GOT:
12800 if (saw_plus)
12801 return false;
12802 /* We need to check for both symbols and labels because VxWorks loads
12803 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12804 details. */
12805 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12806 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12807 case UNSPEC_GOTOFF:
12808 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12809 While ABI specify also 32bit relocation but we don't produce it in
12810 small PIC model at all. */
12811 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12812 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12813 && !TARGET_64BIT)
12814 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12815 return false;
12816 case UNSPEC_GOTTPOFF:
12817 case UNSPEC_GOTNTPOFF:
12818 case UNSPEC_INDNTPOFF:
12819 if (saw_plus)
12820 return false;
12821 disp = XVECEXP (disp, 0, 0);
12822 return (GET_CODE (disp) == SYMBOL_REF
12823 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12824 case UNSPEC_NTPOFF:
12825 disp = XVECEXP (disp, 0, 0);
12826 return (GET_CODE (disp) == SYMBOL_REF
12827 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12828 case UNSPEC_DTPOFF:
12829 disp = XVECEXP (disp, 0, 0);
12830 return (GET_CODE (disp) == SYMBOL_REF
12831 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12832 }
12833
12834 return false;
12835 }
12836
12837 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12838 replace the input X, or the original X if no replacement is called for.
12839 The output parameter *WIN is 1 if the calling macro should goto WIN,
12840 0 if it should not. */
12841
12842 bool
12843 ix86_legitimize_reload_address (rtx x, enum machine_mode, int opnum, int type,
12844 int)
12845 {
12846 /* Reload can generate:
12847
12848 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12849 (reg:DI 97))
12850 (reg:DI 2 cx))
12851
12852 This RTX is rejected from ix86_legitimate_address_p due to
12853 non-strictness of base register 97. Following this rejection,
12854 reload pushes all three components into separate registers,
12855 creating invalid memory address RTX.
12856
12857 Following code reloads only the invalid part of the
12858 memory address RTX. */
12859
12860 if (GET_CODE (x) == PLUS
12861 && REG_P (XEXP (x, 1))
12862 && GET_CODE (XEXP (x, 0)) == PLUS
12863 && REG_P (XEXP (XEXP (x, 0), 1)))
12864 {
12865 rtx base, index;
12866 bool something_reloaded = false;
12867
12868 base = XEXP (XEXP (x, 0), 1);
12869 if (!REG_OK_FOR_BASE_STRICT_P (base))
12870 {
12871 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12872 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12873 opnum, (enum reload_type) type);
12874 something_reloaded = true;
12875 }
12876
12877 index = XEXP (x, 1);
12878 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12879 {
12880 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12881 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12882 opnum, (enum reload_type) type);
12883 something_reloaded = true;
12884 }
12885
12886 gcc_assert (something_reloaded);
12887 return true;
12888 }
12889
12890 return false;
12891 }
12892
12893 /* Determine if op is suitable RTX for an address register.
12894 Return naked register if a register or a register subreg is
12895 found, otherwise return NULL_RTX. */
12896
12897 static rtx
12898 ix86_validate_address_register (rtx op)
12899 {
12900 enum machine_mode mode = GET_MODE (op);
12901
12902 /* Only SImode or DImode registers can form the address. */
12903 if (mode != SImode && mode != DImode)
12904 return NULL_RTX;
12905
12906 if (REG_P (op))
12907 return op;
12908 else if (GET_CODE (op) == SUBREG)
12909 {
12910 rtx reg = SUBREG_REG (op);
12911
12912 if (!REG_P (reg))
12913 return NULL_RTX;
12914
12915 mode = GET_MODE (reg);
12916
12917 /* Don't allow SUBREGs that span more than a word. It can
12918 lead to spill failures when the register is one word out
12919 of a two word structure. */
12920 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12921 return NULL_RTX;
12922
12923 /* Allow only SUBREGs of non-eliminable hard registers. */
12924 if (register_no_elim_operand (reg, mode))
12925 return reg;
12926 }
12927
12928 /* Op is not a register. */
12929 return NULL_RTX;
12930 }
12931
12932 /* Recognizes RTL expressions that are valid memory addresses for an
12933 instruction. The MODE argument is the machine mode for the MEM
12934 expression that wants to use this address.
12935
12936 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12937 convert common non-canonical forms to canonical form so that they will
12938 be recognized. */
12939
12940 static bool
12941 ix86_legitimate_address_p (enum machine_mode, rtx addr, bool strict)
12942 {
12943 struct ix86_address parts;
12944 rtx base, index, disp;
12945 HOST_WIDE_INT scale;
12946 enum ix86_address_seg seg;
12947
12948 if (ix86_decompose_address (addr, &parts) <= 0)
12949 /* Decomposition failed. */
12950 return false;
12951
12952 base = parts.base;
12953 index = parts.index;
12954 disp = parts.disp;
12955 scale = parts.scale;
12956 seg = parts.seg;
12957
12958 /* Validate base register. */
12959 if (base)
12960 {
12961 rtx reg = ix86_validate_address_register (base);
12962
12963 if (reg == NULL_RTX)
12964 return false;
12965
12966 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12967 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12968 /* Base is not valid. */
12969 return false;
12970 }
12971
12972 /* Validate index register. */
12973 if (index)
12974 {
12975 rtx reg = ix86_validate_address_register (index);
12976
12977 if (reg == NULL_RTX)
12978 return false;
12979
12980 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12981 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12982 /* Index is not valid. */
12983 return false;
12984 }
12985
12986 /* Index and base should have the same mode. */
12987 if (base && index
12988 && GET_MODE (base) != GET_MODE (index))
12989 return false;
12990
12991 /* Address override works only on the (%reg) part of %fs:(%reg). */
12992 if (seg != SEG_DEFAULT
12993 && ((base && GET_MODE (base) != word_mode)
12994 || (index && GET_MODE (index) != word_mode)))
12995 return false;
12996
12997 /* Validate scale factor. */
12998 if (scale != 1)
12999 {
13000 if (!index)
13001 /* Scale without index. */
13002 return false;
13003
13004 if (scale != 2 && scale != 4 && scale != 8)
13005 /* Scale is not a valid multiplier. */
13006 return false;
13007 }
13008
13009 /* Validate displacement. */
13010 if (disp)
13011 {
13012 if (GET_CODE (disp) == CONST
13013 && GET_CODE (XEXP (disp, 0)) == UNSPEC
13014 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13015 switch (XINT (XEXP (disp, 0), 1))
13016 {
13017 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13018 used. While ABI specify also 32bit relocations, we don't produce
13019 them at all and use IP relative instead. */
13020 case UNSPEC_GOT:
13021 case UNSPEC_GOTOFF:
13022 gcc_assert (flag_pic);
13023 if (!TARGET_64BIT)
13024 goto is_legitimate_pic;
13025
13026 /* 64bit address unspec. */
13027 return false;
13028
13029 case UNSPEC_GOTPCREL:
13030 case UNSPEC_PCREL:
13031 gcc_assert (flag_pic);
13032 goto is_legitimate_pic;
13033
13034 case UNSPEC_GOTTPOFF:
13035 case UNSPEC_GOTNTPOFF:
13036 case UNSPEC_INDNTPOFF:
13037 case UNSPEC_NTPOFF:
13038 case UNSPEC_DTPOFF:
13039 break;
13040
13041 case UNSPEC_STACK_CHECK:
13042 gcc_assert (flag_split_stack);
13043 break;
13044
13045 default:
13046 /* Invalid address unspec. */
13047 return false;
13048 }
13049
13050 else if (SYMBOLIC_CONST (disp)
13051 && (flag_pic
13052 || (TARGET_MACHO
13053 #if TARGET_MACHO
13054 && MACHOPIC_INDIRECT
13055 && !machopic_operand_p (disp)
13056 #endif
13057 )))
13058 {
13059
13060 is_legitimate_pic:
13061 if (TARGET_64BIT && (index || base))
13062 {
13063 /* foo@dtpoff(%rX) is ok. */
13064 if (GET_CODE (disp) != CONST
13065 || GET_CODE (XEXP (disp, 0)) != PLUS
13066 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13067 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13068 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13069 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13070 /* Non-constant pic memory reference. */
13071 return false;
13072 }
13073 else if ((!TARGET_MACHO || flag_pic)
13074 && ! legitimate_pic_address_disp_p (disp))
13075 /* Displacement is an invalid pic construct. */
13076 return false;
13077 #if TARGET_MACHO
13078 else if (MACHO_DYNAMIC_NO_PIC_P
13079 && !ix86_legitimate_constant_p (Pmode, disp))
13080 /* displacment must be referenced via non_lazy_pointer */
13081 return false;
13082 #endif
13083
13084 /* This code used to verify that a symbolic pic displacement
13085 includes the pic_offset_table_rtx register.
13086
13087 While this is good idea, unfortunately these constructs may
13088 be created by "adds using lea" optimization for incorrect
13089 code like:
13090
13091 int a;
13092 int foo(int i)
13093 {
13094 return *(&a+i);
13095 }
13096
13097 This code is nonsensical, but results in addressing
13098 GOT table with pic_offset_table_rtx base. We can't
13099 just refuse it easily, since it gets matched by
13100 "addsi3" pattern, that later gets split to lea in the
13101 case output register differs from input. While this
13102 can be handled by separate addsi pattern for this case
13103 that never results in lea, this seems to be easier and
13104 correct fix for crash to disable this test. */
13105 }
13106 else if (GET_CODE (disp) != LABEL_REF
13107 && !CONST_INT_P (disp)
13108 && (GET_CODE (disp) != CONST
13109 || !ix86_legitimate_constant_p (Pmode, disp))
13110 && (GET_CODE (disp) != SYMBOL_REF
13111 || !ix86_legitimate_constant_p (Pmode, disp)))
13112 /* Displacement is not constant. */
13113 return false;
13114 else if (TARGET_64BIT
13115 && !x86_64_immediate_operand (disp, VOIDmode))
13116 /* Displacement is out of range. */
13117 return false;
13118 /* In x32 mode, constant addresses are sign extended to 64bit, so
13119 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13120 else if (TARGET_X32 && !(index || base)
13121 && CONST_INT_P (disp)
13122 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13123 return false;
13124 }
13125
13126 /* Everything looks valid. */
13127 return true;
13128 }
13129
13130 /* Determine if a given RTX is a valid constant address. */
13131
13132 bool
13133 constant_address_p (rtx x)
13134 {
13135 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13136 }
13137 \f
13138 /* Return a unique alias set for the GOT. */
13139
13140 static alias_set_type
13141 ix86_GOT_alias_set (void)
13142 {
13143 static alias_set_type set = -1;
13144 if (set == -1)
13145 set = new_alias_set ();
13146 return set;
13147 }
13148
13149 /* Set regs_ever_live for PIC base address register
13150 to true if required. */
13151 static void
13152 set_pic_reg_ever_live ()
13153 {
13154 if (reload_in_progress)
13155 df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13156 }
13157
13158 /* Return a legitimate reference for ORIG (an address) using the
13159 register REG. If REG is 0, a new pseudo is generated.
13160
13161 There are two types of references that must be handled:
13162
13163 1. Global data references must load the address from the GOT, via
13164 the PIC reg. An insn is emitted to do this load, and the reg is
13165 returned.
13166
13167 2. Static data references, constant pool addresses, and code labels
13168 compute the address as an offset from the GOT, whose base is in
13169 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13170 differentiate them from global data objects. The returned
13171 address is the PIC reg + an unspec constant.
13172
13173 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13174 reg also appears in the address. */
13175
13176 static rtx
13177 legitimize_pic_address (rtx orig, rtx reg)
13178 {
13179 rtx addr = orig;
13180 rtx new_rtx = orig;
13181
13182 #if TARGET_MACHO
13183 if (TARGET_MACHO && !TARGET_64BIT)
13184 {
13185 if (reg == 0)
13186 reg = gen_reg_rtx (Pmode);
13187 /* Use the generic Mach-O PIC machinery. */
13188 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13189 }
13190 #endif
13191
13192 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13193 {
13194 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13195 if (tmp)
13196 return tmp;
13197 }
13198
13199 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13200 new_rtx = addr;
13201 else if (TARGET_64BIT && !TARGET_PECOFF
13202 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13203 {
13204 rtx tmpreg;
13205 /* This symbol may be referenced via a displacement from the PIC
13206 base address (@GOTOFF). */
13207
13208 set_pic_reg_ever_live ();
13209 if (GET_CODE (addr) == CONST)
13210 addr = XEXP (addr, 0);
13211 if (GET_CODE (addr) == PLUS)
13212 {
13213 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13214 UNSPEC_GOTOFF);
13215 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13216 }
13217 else
13218 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13219 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13220 if (!reg)
13221 tmpreg = gen_reg_rtx (Pmode);
13222 else
13223 tmpreg = reg;
13224 emit_move_insn (tmpreg, new_rtx);
13225
13226 if (reg != 0)
13227 {
13228 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13229 tmpreg, 1, OPTAB_DIRECT);
13230 new_rtx = reg;
13231 }
13232 else
13233 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13234 }
13235 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13236 {
13237 /* This symbol may be referenced via a displacement from the PIC
13238 base address (@GOTOFF). */
13239
13240 set_pic_reg_ever_live ();
13241 if (GET_CODE (addr) == CONST)
13242 addr = XEXP (addr, 0);
13243 if (GET_CODE (addr) == PLUS)
13244 {
13245 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13246 UNSPEC_GOTOFF);
13247 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13248 }
13249 else
13250 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13251 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13252 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13253
13254 if (reg != 0)
13255 {
13256 emit_move_insn (reg, new_rtx);
13257 new_rtx = reg;
13258 }
13259 }
13260 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13261 /* We can't use @GOTOFF for text labels on VxWorks;
13262 see gotoff_operand. */
13263 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13264 {
13265 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13266 if (tmp)
13267 return tmp;
13268
13269 /* For x64 PE-COFF there is no GOT table. So we use address
13270 directly. */
13271 if (TARGET_64BIT && TARGET_PECOFF)
13272 {
13273 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13274 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13275
13276 if (reg == 0)
13277 reg = gen_reg_rtx (Pmode);
13278 emit_move_insn (reg, new_rtx);
13279 new_rtx = reg;
13280 }
13281 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13282 {
13283 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13284 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13285 new_rtx = gen_const_mem (Pmode, new_rtx);
13286 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13287
13288 if (reg == 0)
13289 reg = gen_reg_rtx (Pmode);
13290 /* Use directly gen_movsi, otherwise the address is loaded
13291 into register for CSE. We don't want to CSE this addresses,
13292 instead we CSE addresses from the GOT table, so skip this. */
13293 emit_insn (gen_movsi (reg, new_rtx));
13294 new_rtx = reg;
13295 }
13296 else
13297 {
13298 /* This symbol must be referenced via a load from the
13299 Global Offset Table (@GOT). */
13300
13301 set_pic_reg_ever_live ();
13302 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13303 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13304 if (TARGET_64BIT)
13305 new_rtx = force_reg (Pmode, new_rtx);
13306 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13307 new_rtx = gen_const_mem (Pmode, new_rtx);
13308 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13309
13310 if (reg == 0)
13311 reg = gen_reg_rtx (Pmode);
13312 emit_move_insn (reg, new_rtx);
13313 new_rtx = reg;
13314 }
13315 }
13316 else
13317 {
13318 if (CONST_INT_P (addr)
13319 && !x86_64_immediate_operand (addr, VOIDmode))
13320 {
13321 if (reg)
13322 {
13323 emit_move_insn (reg, addr);
13324 new_rtx = reg;
13325 }
13326 else
13327 new_rtx = force_reg (Pmode, addr);
13328 }
13329 else if (GET_CODE (addr) == CONST)
13330 {
13331 addr = XEXP (addr, 0);
13332
13333 /* We must match stuff we generate before. Assume the only
13334 unspecs that can get here are ours. Not that we could do
13335 anything with them anyway.... */
13336 if (GET_CODE (addr) == UNSPEC
13337 || (GET_CODE (addr) == PLUS
13338 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13339 return orig;
13340 gcc_assert (GET_CODE (addr) == PLUS);
13341 }
13342 if (GET_CODE (addr) == PLUS)
13343 {
13344 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13345
13346 /* Check first to see if this is a constant offset from a @GOTOFF
13347 symbol reference. */
13348 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13349 && CONST_INT_P (op1))
13350 {
13351 if (!TARGET_64BIT)
13352 {
13353 set_pic_reg_ever_live ();
13354 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13355 UNSPEC_GOTOFF);
13356 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13357 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13358 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13359
13360 if (reg != 0)
13361 {
13362 emit_move_insn (reg, new_rtx);
13363 new_rtx = reg;
13364 }
13365 }
13366 else
13367 {
13368 if (INTVAL (op1) < -16*1024*1024
13369 || INTVAL (op1) >= 16*1024*1024)
13370 {
13371 if (!x86_64_immediate_operand (op1, Pmode))
13372 op1 = force_reg (Pmode, op1);
13373 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13374 }
13375 }
13376 }
13377 else
13378 {
13379 rtx base = legitimize_pic_address (op0, reg);
13380 enum machine_mode mode = GET_MODE (base);
13381 new_rtx
13382 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13383
13384 if (CONST_INT_P (new_rtx))
13385 {
13386 if (INTVAL (new_rtx) < -16*1024*1024
13387 || INTVAL (new_rtx) >= 16*1024*1024)
13388 {
13389 if (!x86_64_immediate_operand (new_rtx, mode))
13390 new_rtx = force_reg (mode, new_rtx);
13391 new_rtx
13392 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13393 }
13394 else
13395 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13396 }
13397 else
13398 {
13399 if (GET_CODE (new_rtx) == PLUS
13400 && CONSTANT_P (XEXP (new_rtx, 1)))
13401 {
13402 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13403 new_rtx = XEXP (new_rtx, 1);
13404 }
13405 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13406 }
13407 }
13408 }
13409 }
13410 return new_rtx;
13411 }
13412 \f
13413 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13414
13415 static rtx
13416 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13417 {
13418 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13419
13420 if (GET_MODE (tp) != tp_mode)
13421 {
13422 gcc_assert (GET_MODE (tp) == SImode);
13423 gcc_assert (tp_mode == DImode);
13424
13425 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13426 }
13427
13428 if (to_reg)
13429 tp = copy_to_mode_reg (tp_mode, tp);
13430
13431 return tp;
13432 }
13433
13434 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13435
13436 static GTY(()) rtx ix86_tls_symbol;
13437
13438 static rtx
13439 ix86_tls_get_addr (void)
13440 {
13441 if (!ix86_tls_symbol)
13442 {
13443 const char *sym
13444 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13445 ? "___tls_get_addr" : "__tls_get_addr");
13446
13447 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13448 }
13449
13450 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13451 {
13452 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13453 UNSPEC_PLTOFF);
13454 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13455 gen_rtx_CONST (Pmode, unspec));
13456 }
13457
13458 return ix86_tls_symbol;
13459 }
13460
13461 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13462
13463 static GTY(()) rtx ix86_tls_module_base_symbol;
13464
13465 rtx
13466 ix86_tls_module_base (void)
13467 {
13468 if (!ix86_tls_module_base_symbol)
13469 {
13470 ix86_tls_module_base_symbol
13471 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13472
13473 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13474 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13475 }
13476
13477 return ix86_tls_module_base_symbol;
13478 }
13479
13480 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13481 false if we expect this to be used for a memory address and true if
13482 we expect to load the address into a register. */
13483
13484 static rtx
13485 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13486 {
13487 rtx dest, base, off;
13488 rtx pic = NULL_RTX, tp = NULL_RTX;
13489 enum machine_mode tp_mode = Pmode;
13490 int type;
13491
13492 /* Fall back to global dynamic model if tool chain cannot support local
13493 dynamic. */
13494 if (TARGET_SUN_TLS && !TARGET_64BIT
13495 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13496 && model == TLS_MODEL_LOCAL_DYNAMIC)
13497 model = TLS_MODEL_GLOBAL_DYNAMIC;
13498
13499 switch (model)
13500 {
13501 case TLS_MODEL_GLOBAL_DYNAMIC:
13502 dest = gen_reg_rtx (Pmode);
13503
13504 if (!TARGET_64BIT)
13505 {
13506 if (flag_pic && !TARGET_PECOFF)
13507 pic = pic_offset_table_rtx;
13508 else
13509 {
13510 pic = gen_reg_rtx (Pmode);
13511 emit_insn (gen_set_got (pic));
13512 }
13513 }
13514
13515 if (TARGET_GNU2_TLS)
13516 {
13517 if (TARGET_64BIT)
13518 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13519 else
13520 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13521
13522 tp = get_thread_pointer (Pmode, true);
13523 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13524
13525 if (GET_MODE (x) != Pmode)
13526 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13527
13528 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13529 }
13530 else
13531 {
13532 rtx caddr = ix86_tls_get_addr ();
13533
13534 if (TARGET_64BIT)
13535 {
13536 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13537 rtx_insn *insns;
13538
13539 start_sequence ();
13540 emit_call_insn
13541 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13542 insns = get_insns ();
13543 end_sequence ();
13544
13545 if (GET_MODE (x) != Pmode)
13546 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13547
13548 RTL_CONST_CALL_P (insns) = 1;
13549 emit_libcall_block (insns, dest, rax, x);
13550 }
13551 else
13552 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13553 }
13554 break;
13555
13556 case TLS_MODEL_LOCAL_DYNAMIC:
13557 base = gen_reg_rtx (Pmode);
13558
13559 if (!TARGET_64BIT)
13560 {
13561 if (flag_pic)
13562 pic = pic_offset_table_rtx;
13563 else
13564 {
13565 pic = gen_reg_rtx (Pmode);
13566 emit_insn (gen_set_got (pic));
13567 }
13568 }
13569
13570 if (TARGET_GNU2_TLS)
13571 {
13572 rtx tmp = ix86_tls_module_base ();
13573
13574 if (TARGET_64BIT)
13575 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13576 else
13577 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13578
13579 tp = get_thread_pointer (Pmode, true);
13580 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13581 gen_rtx_MINUS (Pmode, tmp, tp));
13582 }
13583 else
13584 {
13585 rtx caddr = ix86_tls_get_addr ();
13586
13587 if (TARGET_64BIT)
13588 {
13589 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13590 rtx_insn *insns;
13591 rtx eqv;
13592
13593 start_sequence ();
13594 emit_call_insn
13595 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13596 insns = get_insns ();
13597 end_sequence ();
13598
13599 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13600 share the LD_BASE result with other LD model accesses. */
13601 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13602 UNSPEC_TLS_LD_BASE);
13603
13604 RTL_CONST_CALL_P (insns) = 1;
13605 emit_libcall_block (insns, base, rax, eqv);
13606 }
13607 else
13608 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13609 }
13610
13611 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13612 off = gen_rtx_CONST (Pmode, off);
13613
13614 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13615
13616 if (TARGET_GNU2_TLS)
13617 {
13618 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13619
13620 if (GET_MODE (x) != Pmode)
13621 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13622
13623 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13624 }
13625 break;
13626
13627 case TLS_MODEL_INITIAL_EXEC:
13628 if (TARGET_64BIT)
13629 {
13630 if (TARGET_SUN_TLS && !TARGET_X32)
13631 {
13632 /* The Sun linker took the AMD64 TLS spec literally
13633 and can only handle %rax as destination of the
13634 initial executable code sequence. */
13635
13636 dest = gen_reg_rtx (DImode);
13637 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13638 return dest;
13639 }
13640
13641 /* Generate DImode references to avoid %fs:(%reg32)
13642 problems and linker IE->LE relaxation bug. */
13643 tp_mode = DImode;
13644 pic = NULL;
13645 type = UNSPEC_GOTNTPOFF;
13646 }
13647 else if (flag_pic)
13648 {
13649 set_pic_reg_ever_live ();
13650 pic = pic_offset_table_rtx;
13651 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13652 }
13653 else if (!TARGET_ANY_GNU_TLS)
13654 {
13655 pic = gen_reg_rtx (Pmode);
13656 emit_insn (gen_set_got (pic));
13657 type = UNSPEC_GOTTPOFF;
13658 }
13659 else
13660 {
13661 pic = NULL;
13662 type = UNSPEC_INDNTPOFF;
13663 }
13664
13665 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13666 off = gen_rtx_CONST (tp_mode, off);
13667 if (pic)
13668 off = gen_rtx_PLUS (tp_mode, pic, off);
13669 off = gen_const_mem (tp_mode, off);
13670 set_mem_alias_set (off, ix86_GOT_alias_set ());
13671
13672 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13673 {
13674 base = get_thread_pointer (tp_mode,
13675 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13676 off = force_reg (tp_mode, off);
13677 return gen_rtx_PLUS (tp_mode, base, off);
13678 }
13679 else
13680 {
13681 base = get_thread_pointer (Pmode, true);
13682 dest = gen_reg_rtx (Pmode);
13683 emit_insn (ix86_gen_sub3 (dest, base, off));
13684 }
13685 break;
13686
13687 case TLS_MODEL_LOCAL_EXEC:
13688 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13689 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13690 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13691 off = gen_rtx_CONST (Pmode, off);
13692
13693 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13694 {
13695 base = get_thread_pointer (Pmode,
13696 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13697 return gen_rtx_PLUS (Pmode, base, off);
13698 }
13699 else
13700 {
13701 base = get_thread_pointer (Pmode, true);
13702 dest = gen_reg_rtx (Pmode);
13703 emit_insn (ix86_gen_sub3 (dest, base, off));
13704 }
13705 break;
13706
13707 default:
13708 gcc_unreachable ();
13709 }
13710
13711 return dest;
13712 }
13713
13714 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13715 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13716 unique refptr-DECL symbol corresponding to symbol DECL. */
13717
13718 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13719 htab_t dllimport_map;
13720
13721 static tree
13722 get_dllimport_decl (tree decl, bool beimport)
13723 {
13724 struct tree_map *h, in;
13725 void **loc;
13726 const char *name;
13727 const char *prefix;
13728 size_t namelen, prefixlen;
13729 char *imp_name;
13730 tree to;
13731 rtx rtl;
13732
13733 if (!dllimport_map)
13734 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13735
13736 in.hash = htab_hash_pointer (decl);
13737 in.base.from = decl;
13738 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13739 h = (struct tree_map *) *loc;
13740 if (h)
13741 return h->to;
13742
13743 *loc = h = ggc_alloc<tree_map> ();
13744 h->hash = in.hash;
13745 h->base.from = decl;
13746 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13747 VAR_DECL, NULL, ptr_type_node);
13748 DECL_ARTIFICIAL (to) = 1;
13749 DECL_IGNORED_P (to) = 1;
13750 DECL_EXTERNAL (to) = 1;
13751 TREE_READONLY (to) = 1;
13752
13753 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13754 name = targetm.strip_name_encoding (name);
13755 if (beimport)
13756 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13757 ? "*__imp_" : "*__imp__";
13758 else
13759 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13760 namelen = strlen (name);
13761 prefixlen = strlen (prefix);
13762 imp_name = (char *) alloca (namelen + prefixlen + 1);
13763 memcpy (imp_name, prefix, prefixlen);
13764 memcpy (imp_name + prefixlen, name, namelen + 1);
13765
13766 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13767 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13768 SET_SYMBOL_REF_DECL (rtl, to);
13769 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13770 if (!beimport)
13771 {
13772 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13773 #ifdef SUB_TARGET_RECORD_STUB
13774 SUB_TARGET_RECORD_STUB (name);
13775 #endif
13776 }
13777
13778 rtl = gen_const_mem (Pmode, rtl);
13779 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13780
13781 SET_DECL_RTL (to, rtl);
13782 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13783
13784 return to;
13785 }
13786
13787 /* Expand SYMBOL into its corresponding far-addresse symbol.
13788 WANT_REG is true if we require the result be a register. */
13789
13790 static rtx
13791 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13792 {
13793 tree imp_decl;
13794 rtx x;
13795
13796 gcc_assert (SYMBOL_REF_DECL (symbol));
13797 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13798
13799 x = DECL_RTL (imp_decl);
13800 if (want_reg)
13801 x = force_reg (Pmode, x);
13802 return x;
13803 }
13804
13805 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13806 true if we require the result be a register. */
13807
13808 static rtx
13809 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13810 {
13811 tree imp_decl;
13812 rtx x;
13813
13814 gcc_assert (SYMBOL_REF_DECL (symbol));
13815 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13816
13817 x = DECL_RTL (imp_decl);
13818 if (want_reg)
13819 x = force_reg (Pmode, x);
13820 return x;
13821 }
13822
13823 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13824 is true if we require the result be a register. */
13825
13826 static rtx
13827 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13828 {
13829 if (!TARGET_PECOFF)
13830 return NULL_RTX;
13831
13832 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13833 {
13834 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13835 return legitimize_dllimport_symbol (addr, inreg);
13836 if (GET_CODE (addr) == CONST
13837 && GET_CODE (XEXP (addr, 0)) == PLUS
13838 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13839 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13840 {
13841 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13842 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13843 }
13844 }
13845
13846 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13847 return NULL_RTX;
13848 if (GET_CODE (addr) == SYMBOL_REF
13849 && !is_imported_p (addr)
13850 && SYMBOL_REF_EXTERNAL_P (addr)
13851 && SYMBOL_REF_DECL (addr))
13852 return legitimize_pe_coff_extern_decl (addr, inreg);
13853
13854 if (GET_CODE (addr) == CONST
13855 && GET_CODE (XEXP (addr, 0)) == PLUS
13856 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13857 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13858 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13859 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13860 {
13861 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13862 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13863 }
13864 return NULL_RTX;
13865 }
13866
13867 /* Try machine-dependent ways of modifying an illegitimate address
13868 to be legitimate. If we find one, return the new, valid address.
13869 This macro is used in only one place: `memory_address' in explow.c.
13870
13871 OLDX is the address as it was before break_out_memory_refs was called.
13872 In some cases it is useful to look at this to decide what needs to be done.
13873
13874 It is always safe for this macro to do nothing. It exists to recognize
13875 opportunities to optimize the output.
13876
13877 For the 80386, we handle X+REG by loading X into a register R and
13878 using R+REG. R will go in a general reg and indexing will be used.
13879 However, if REG is a broken-out memory address or multiplication,
13880 nothing needs to be done because REG can certainly go in a general reg.
13881
13882 When -fpic is used, special handling is needed for symbolic references.
13883 See comments by legitimize_pic_address in i386.c for details. */
13884
13885 static rtx
13886 ix86_legitimize_address (rtx x, rtx, enum machine_mode mode)
13887 {
13888 int changed = 0;
13889 unsigned log;
13890
13891 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13892 if (log)
13893 return legitimize_tls_address (x, (enum tls_model) log, false);
13894 if (GET_CODE (x) == CONST
13895 && GET_CODE (XEXP (x, 0)) == PLUS
13896 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13897 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13898 {
13899 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13900 (enum tls_model) log, false);
13901 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13902 }
13903
13904 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13905 {
13906 rtx tmp = legitimize_pe_coff_symbol (x, true);
13907 if (tmp)
13908 return tmp;
13909 }
13910
13911 if (flag_pic && SYMBOLIC_CONST (x))
13912 return legitimize_pic_address (x, 0);
13913
13914 #if TARGET_MACHO
13915 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13916 return machopic_indirect_data_reference (x, 0);
13917 #endif
13918
13919 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13920 if (GET_CODE (x) == ASHIFT
13921 && CONST_INT_P (XEXP (x, 1))
13922 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13923 {
13924 changed = 1;
13925 log = INTVAL (XEXP (x, 1));
13926 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13927 GEN_INT (1 << log));
13928 }
13929
13930 if (GET_CODE (x) == PLUS)
13931 {
13932 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13933
13934 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13935 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13936 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13937 {
13938 changed = 1;
13939 log = INTVAL (XEXP (XEXP (x, 0), 1));
13940 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13941 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13942 GEN_INT (1 << log));
13943 }
13944
13945 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13946 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13947 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13948 {
13949 changed = 1;
13950 log = INTVAL (XEXP (XEXP (x, 1), 1));
13951 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13952 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13953 GEN_INT (1 << log));
13954 }
13955
13956 /* Put multiply first if it isn't already. */
13957 if (GET_CODE (XEXP (x, 1)) == MULT)
13958 {
13959 rtx tmp = XEXP (x, 0);
13960 XEXP (x, 0) = XEXP (x, 1);
13961 XEXP (x, 1) = tmp;
13962 changed = 1;
13963 }
13964
13965 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13966 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13967 created by virtual register instantiation, register elimination, and
13968 similar optimizations. */
13969 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13970 {
13971 changed = 1;
13972 x = gen_rtx_PLUS (Pmode,
13973 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13974 XEXP (XEXP (x, 1), 0)),
13975 XEXP (XEXP (x, 1), 1));
13976 }
13977
13978 /* Canonicalize
13979 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13980 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13981 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13982 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13983 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13984 && CONSTANT_P (XEXP (x, 1)))
13985 {
13986 rtx constant;
13987 rtx other = NULL_RTX;
13988
13989 if (CONST_INT_P (XEXP (x, 1)))
13990 {
13991 constant = XEXP (x, 1);
13992 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13993 }
13994 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13995 {
13996 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13997 other = XEXP (x, 1);
13998 }
13999 else
14000 constant = 0;
14001
14002 if (constant)
14003 {
14004 changed = 1;
14005 x = gen_rtx_PLUS (Pmode,
14006 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14007 XEXP (XEXP (XEXP (x, 0), 1), 0)),
14008 plus_constant (Pmode, other,
14009 INTVAL (constant)));
14010 }
14011 }
14012
14013 if (changed && ix86_legitimate_address_p (mode, x, false))
14014 return x;
14015
14016 if (GET_CODE (XEXP (x, 0)) == MULT)
14017 {
14018 changed = 1;
14019 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14020 }
14021
14022 if (GET_CODE (XEXP (x, 1)) == MULT)
14023 {
14024 changed = 1;
14025 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14026 }
14027
14028 if (changed
14029 && REG_P (XEXP (x, 1))
14030 && REG_P (XEXP (x, 0)))
14031 return x;
14032
14033 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14034 {
14035 changed = 1;
14036 x = legitimize_pic_address (x, 0);
14037 }
14038
14039 if (changed && ix86_legitimate_address_p (mode, x, false))
14040 return x;
14041
14042 if (REG_P (XEXP (x, 0)))
14043 {
14044 rtx temp = gen_reg_rtx (Pmode);
14045 rtx val = force_operand (XEXP (x, 1), temp);
14046 if (val != temp)
14047 {
14048 val = convert_to_mode (Pmode, val, 1);
14049 emit_move_insn (temp, val);
14050 }
14051
14052 XEXP (x, 1) = temp;
14053 return x;
14054 }
14055
14056 else if (REG_P (XEXP (x, 1)))
14057 {
14058 rtx temp = gen_reg_rtx (Pmode);
14059 rtx val = force_operand (XEXP (x, 0), temp);
14060 if (val != temp)
14061 {
14062 val = convert_to_mode (Pmode, val, 1);
14063 emit_move_insn (temp, val);
14064 }
14065
14066 XEXP (x, 0) = temp;
14067 return x;
14068 }
14069 }
14070
14071 return x;
14072 }
14073 \f
14074 /* Print an integer constant expression in assembler syntax. Addition
14075 and subtraction are the only arithmetic that may appear in these
14076 expressions. FILE is the stdio stream to write to, X is the rtx, and
14077 CODE is the operand print code from the output string. */
14078
14079 static void
14080 output_pic_addr_const (FILE *file, rtx x, int code)
14081 {
14082 char buf[256];
14083
14084 switch (GET_CODE (x))
14085 {
14086 case PC:
14087 gcc_assert (flag_pic);
14088 putc ('.', file);
14089 break;
14090
14091 case SYMBOL_REF:
14092 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14093 output_addr_const (file, x);
14094 else
14095 {
14096 const char *name = XSTR (x, 0);
14097
14098 /* Mark the decl as referenced so that cgraph will
14099 output the function. */
14100 if (SYMBOL_REF_DECL (x))
14101 mark_decl_referenced (SYMBOL_REF_DECL (x));
14102
14103 #if TARGET_MACHO
14104 if (MACHOPIC_INDIRECT
14105 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14106 name = machopic_indirection_name (x, /*stub_p=*/true);
14107 #endif
14108 assemble_name (file, name);
14109 }
14110 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14111 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14112 fputs ("@PLT", file);
14113 break;
14114
14115 case LABEL_REF:
14116 x = XEXP (x, 0);
14117 /* FALLTHRU */
14118 case CODE_LABEL:
14119 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14120 assemble_name (asm_out_file, buf);
14121 break;
14122
14123 case CONST_INT:
14124 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14125 break;
14126
14127 case CONST:
14128 /* This used to output parentheses around the expression,
14129 but that does not work on the 386 (either ATT or BSD assembler). */
14130 output_pic_addr_const (file, XEXP (x, 0), code);
14131 break;
14132
14133 case CONST_DOUBLE:
14134 if (GET_MODE (x) == VOIDmode)
14135 {
14136 /* We can use %d if the number is <32 bits and positive. */
14137 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14138 fprintf (file, "0x%lx%08lx",
14139 (unsigned long) CONST_DOUBLE_HIGH (x),
14140 (unsigned long) CONST_DOUBLE_LOW (x));
14141 else
14142 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14143 }
14144 else
14145 /* We can't handle floating point constants;
14146 TARGET_PRINT_OPERAND must handle them. */
14147 output_operand_lossage ("floating constant misused");
14148 break;
14149
14150 case PLUS:
14151 /* Some assemblers need integer constants to appear first. */
14152 if (CONST_INT_P (XEXP (x, 0)))
14153 {
14154 output_pic_addr_const (file, XEXP (x, 0), code);
14155 putc ('+', file);
14156 output_pic_addr_const (file, XEXP (x, 1), code);
14157 }
14158 else
14159 {
14160 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14161 output_pic_addr_const (file, XEXP (x, 1), code);
14162 putc ('+', file);
14163 output_pic_addr_const (file, XEXP (x, 0), code);
14164 }
14165 break;
14166
14167 case MINUS:
14168 if (!TARGET_MACHO)
14169 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14170 output_pic_addr_const (file, XEXP (x, 0), code);
14171 putc ('-', file);
14172 output_pic_addr_const (file, XEXP (x, 1), code);
14173 if (!TARGET_MACHO)
14174 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14175 break;
14176
14177 case UNSPEC:
14178 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14179 {
14180 bool f = i386_asm_output_addr_const_extra (file, x);
14181 gcc_assert (f);
14182 break;
14183 }
14184
14185 gcc_assert (XVECLEN (x, 0) == 1);
14186 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14187 switch (XINT (x, 1))
14188 {
14189 case UNSPEC_GOT:
14190 fputs ("@GOT", file);
14191 break;
14192 case UNSPEC_GOTOFF:
14193 fputs ("@GOTOFF", file);
14194 break;
14195 case UNSPEC_PLTOFF:
14196 fputs ("@PLTOFF", file);
14197 break;
14198 case UNSPEC_PCREL:
14199 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14200 "(%rip)" : "[rip]", file);
14201 break;
14202 case UNSPEC_GOTPCREL:
14203 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14204 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14205 break;
14206 case UNSPEC_GOTTPOFF:
14207 /* FIXME: This might be @TPOFF in Sun ld too. */
14208 fputs ("@gottpoff", file);
14209 break;
14210 case UNSPEC_TPOFF:
14211 fputs ("@tpoff", file);
14212 break;
14213 case UNSPEC_NTPOFF:
14214 if (TARGET_64BIT)
14215 fputs ("@tpoff", file);
14216 else
14217 fputs ("@ntpoff", file);
14218 break;
14219 case UNSPEC_DTPOFF:
14220 fputs ("@dtpoff", file);
14221 break;
14222 case UNSPEC_GOTNTPOFF:
14223 if (TARGET_64BIT)
14224 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14225 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14226 else
14227 fputs ("@gotntpoff", file);
14228 break;
14229 case UNSPEC_INDNTPOFF:
14230 fputs ("@indntpoff", file);
14231 break;
14232 #if TARGET_MACHO
14233 case UNSPEC_MACHOPIC_OFFSET:
14234 putc ('-', file);
14235 machopic_output_function_base_name (file);
14236 break;
14237 #endif
14238 default:
14239 output_operand_lossage ("invalid UNSPEC as operand");
14240 break;
14241 }
14242 break;
14243
14244 default:
14245 output_operand_lossage ("invalid expression as operand");
14246 }
14247 }
14248
14249 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14250 We need to emit DTP-relative relocations. */
14251
14252 static void ATTRIBUTE_UNUSED
14253 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14254 {
14255 fputs (ASM_LONG, file);
14256 output_addr_const (file, x);
14257 fputs ("@dtpoff", file);
14258 switch (size)
14259 {
14260 case 4:
14261 break;
14262 case 8:
14263 fputs (", 0", file);
14264 break;
14265 default:
14266 gcc_unreachable ();
14267 }
14268 }
14269
14270 /* Return true if X is a representation of the PIC register. This copes
14271 with calls from ix86_find_base_term, where the register might have
14272 been replaced by a cselib value. */
14273
14274 static bool
14275 ix86_pic_register_p (rtx x)
14276 {
14277 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14278 return (pic_offset_table_rtx
14279 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14280 else if (pic_offset_table_rtx)
14281 return REG_P (x) && REGNO (x) == REGNO (pic_offset_table_rtx);
14282 else
14283 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14284 }
14285
14286 /* Helper function for ix86_delegitimize_address.
14287 Attempt to delegitimize TLS local-exec accesses. */
14288
14289 static rtx
14290 ix86_delegitimize_tls_address (rtx orig_x)
14291 {
14292 rtx x = orig_x, unspec;
14293 struct ix86_address addr;
14294
14295 if (!TARGET_TLS_DIRECT_SEG_REFS)
14296 return orig_x;
14297 if (MEM_P (x))
14298 x = XEXP (x, 0);
14299 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14300 return orig_x;
14301 if (ix86_decompose_address (x, &addr) == 0
14302 || addr.seg != DEFAULT_TLS_SEG_REG
14303 || addr.disp == NULL_RTX
14304 || GET_CODE (addr.disp) != CONST)
14305 return orig_x;
14306 unspec = XEXP (addr.disp, 0);
14307 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14308 unspec = XEXP (unspec, 0);
14309 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14310 return orig_x;
14311 x = XVECEXP (unspec, 0, 0);
14312 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14313 if (unspec != XEXP (addr.disp, 0))
14314 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14315 if (addr.index)
14316 {
14317 rtx idx = addr.index;
14318 if (addr.scale != 1)
14319 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14320 x = gen_rtx_PLUS (Pmode, idx, x);
14321 }
14322 if (addr.base)
14323 x = gen_rtx_PLUS (Pmode, addr.base, x);
14324 if (MEM_P (orig_x))
14325 x = replace_equiv_address_nv (orig_x, x);
14326 return x;
14327 }
14328
14329 /* In the name of slightly smaller debug output, and to cater to
14330 general assembler lossage, recognize PIC+GOTOFF and turn it back
14331 into a direct symbol reference.
14332
14333 On Darwin, this is necessary to avoid a crash, because Darwin
14334 has a different PIC label for each routine but the DWARF debugging
14335 information is not associated with any particular routine, so it's
14336 necessary to remove references to the PIC label from RTL stored by
14337 the DWARF output code. */
14338
14339 static rtx
14340 ix86_delegitimize_address (rtx x)
14341 {
14342 rtx orig_x = delegitimize_mem_from_attrs (x);
14343 /* addend is NULL or some rtx if x is something+GOTOFF where
14344 something doesn't include the PIC register. */
14345 rtx addend = NULL_RTX;
14346 /* reg_addend is NULL or a multiple of some register. */
14347 rtx reg_addend = NULL_RTX;
14348 /* const_addend is NULL or a const_int. */
14349 rtx const_addend = NULL_RTX;
14350 /* This is the result, or NULL. */
14351 rtx result = NULL_RTX;
14352
14353 x = orig_x;
14354
14355 if (MEM_P (x))
14356 x = XEXP (x, 0);
14357
14358 if (TARGET_64BIT)
14359 {
14360 if (GET_CODE (x) == CONST
14361 && GET_CODE (XEXP (x, 0)) == PLUS
14362 && GET_MODE (XEXP (x, 0)) == Pmode
14363 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14364 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14365 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14366 {
14367 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14368 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14369 if (MEM_P (orig_x))
14370 x = replace_equiv_address_nv (orig_x, x);
14371 return x;
14372 }
14373
14374 if (GET_CODE (x) == CONST
14375 && GET_CODE (XEXP (x, 0)) == UNSPEC
14376 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14377 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14378 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14379 {
14380 x = XVECEXP (XEXP (x, 0), 0, 0);
14381 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14382 {
14383 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14384 GET_MODE (x), 0);
14385 if (x == NULL_RTX)
14386 return orig_x;
14387 }
14388 return x;
14389 }
14390
14391 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14392 return ix86_delegitimize_tls_address (orig_x);
14393
14394 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14395 and -mcmodel=medium -fpic. */
14396 }
14397
14398 if (GET_CODE (x) != PLUS
14399 || GET_CODE (XEXP (x, 1)) != CONST)
14400 return ix86_delegitimize_tls_address (orig_x);
14401
14402 if (ix86_pic_register_p (XEXP (x, 0)))
14403 /* %ebx + GOT/GOTOFF */
14404 ;
14405 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14406 {
14407 /* %ebx + %reg * scale + GOT/GOTOFF */
14408 reg_addend = XEXP (x, 0);
14409 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14410 reg_addend = XEXP (reg_addend, 1);
14411 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14412 reg_addend = XEXP (reg_addend, 0);
14413 else
14414 {
14415 reg_addend = NULL_RTX;
14416 addend = XEXP (x, 0);
14417 }
14418 }
14419 else
14420 addend = XEXP (x, 0);
14421
14422 x = XEXP (XEXP (x, 1), 0);
14423 if (GET_CODE (x) == PLUS
14424 && CONST_INT_P (XEXP (x, 1)))
14425 {
14426 const_addend = XEXP (x, 1);
14427 x = XEXP (x, 0);
14428 }
14429
14430 if (GET_CODE (x) == UNSPEC
14431 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14432 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14433 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14434 && !MEM_P (orig_x) && !addend)))
14435 result = XVECEXP (x, 0, 0);
14436
14437 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14438 && !MEM_P (orig_x))
14439 result = XVECEXP (x, 0, 0);
14440
14441 if (! result)
14442 return ix86_delegitimize_tls_address (orig_x);
14443
14444 if (const_addend)
14445 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14446 if (reg_addend)
14447 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14448 if (addend)
14449 {
14450 /* If the rest of original X doesn't involve the PIC register, add
14451 addend and subtract pic_offset_table_rtx. This can happen e.g.
14452 for code like:
14453 leal (%ebx, %ecx, 4), %ecx
14454 ...
14455 movl foo@GOTOFF(%ecx), %edx
14456 in which case we return (%ecx - %ebx) + foo.
14457
14458 Note that when pseudo_pic_reg is used we can generate it only
14459 before reload_completed. */
14460 if (pic_offset_table_rtx
14461 && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
14462 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14463 pic_offset_table_rtx),
14464 result);
14465 else
14466 return orig_x;
14467 }
14468 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14469 {
14470 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14471 if (result == NULL_RTX)
14472 return orig_x;
14473 }
14474 return result;
14475 }
14476
14477 /* If X is a machine specific address (i.e. a symbol or label being
14478 referenced as a displacement from the GOT implemented using an
14479 UNSPEC), then return the base term. Otherwise return X. */
14480
14481 rtx
14482 ix86_find_base_term (rtx x)
14483 {
14484 rtx term;
14485
14486 if (TARGET_64BIT)
14487 {
14488 if (GET_CODE (x) != CONST)
14489 return x;
14490 term = XEXP (x, 0);
14491 if (GET_CODE (term) == PLUS
14492 && (CONST_INT_P (XEXP (term, 1))
14493 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14494 term = XEXP (term, 0);
14495 if (GET_CODE (term) != UNSPEC
14496 || (XINT (term, 1) != UNSPEC_GOTPCREL
14497 && XINT (term, 1) != UNSPEC_PCREL))
14498 return x;
14499
14500 return XVECEXP (term, 0, 0);
14501 }
14502
14503 return ix86_delegitimize_address (x);
14504 }
14505 \f
14506 static void
14507 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14508 bool fp, FILE *file)
14509 {
14510 const char *suffix;
14511
14512 if (mode == CCFPmode || mode == CCFPUmode)
14513 {
14514 code = ix86_fp_compare_code_to_integer (code);
14515 mode = CCmode;
14516 }
14517 if (reverse)
14518 code = reverse_condition (code);
14519
14520 switch (code)
14521 {
14522 case EQ:
14523 switch (mode)
14524 {
14525 case CCAmode:
14526 suffix = "a";
14527 break;
14528
14529 case CCCmode:
14530 suffix = "c";
14531 break;
14532
14533 case CCOmode:
14534 suffix = "o";
14535 break;
14536
14537 case CCSmode:
14538 suffix = "s";
14539 break;
14540
14541 default:
14542 suffix = "e";
14543 }
14544 break;
14545 case NE:
14546 switch (mode)
14547 {
14548 case CCAmode:
14549 suffix = "na";
14550 break;
14551
14552 case CCCmode:
14553 suffix = "nc";
14554 break;
14555
14556 case CCOmode:
14557 suffix = "no";
14558 break;
14559
14560 case CCSmode:
14561 suffix = "ns";
14562 break;
14563
14564 default:
14565 suffix = "ne";
14566 }
14567 break;
14568 case GT:
14569 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14570 suffix = "g";
14571 break;
14572 case GTU:
14573 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14574 Those same assemblers have the same but opposite lossage on cmov. */
14575 if (mode == CCmode)
14576 suffix = fp ? "nbe" : "a";
14577 else
14578 gcc_unreachable ();
14579 break;
14580 case LT:
14581 switch (mode)
14582 {
14583 case CCNOmode:
14584 case CCGOCmode:
14585 suffix = "s";
14586 break;
14587
14588 case CCmode:
14589 case CCGCmode:
14590 suffix = "l";
14591 break;
14592
14593 default:
14594 gcc_unreachable ();
14595 }
14596 break;
14597 case LTU:
14598 if (mode == CCmode)
14599 suffix = "b";
14600 else if (mode == CCCmode)
14601 suffix = "c";
14602 else
14603 gcc_unreachable ();
14604 break;
14605 case GE:
14606 switch (mode)
14607 {
14608 case CCNOmode:
14609 case CCGOCmode:
14610 suffix = "ns";
14611 break;
14612
14613 case CCmode:
14614 case CCGCmode:
14615 suffix = "ge";
14616 break;
14617
14618 default:
14619 gcc_unreachable ();
14620 }
14621 break;
14622 case GEU:
14623 if (mode == CCmode)
14624 suffix = fp ? "nb" : "ae";
14625 else if (mode == CCCmode)
14626 suffix = "nc";
14627 else
14628 gcc_unreachable ();
14629 break;
14630 case LE:
14631 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14632 suffix = "le";
14633 break;
14634 case LEU:
14635 if (mode == CCmode)
14636 suffix = "be";
14637 else
14638 gcc_unreachable ();
14639 break;
14640 case UNORDERED:
14641 suffix = fp ? "u" : "p";
14642 break;
14643 case ORDERED:
14644 suffix = fp ? "nu" : "np";
14645 break;
14646 default:
14647 gcc_unreachable ();
14648 }
14649 fputs (suffix, file);
14650 }
14651
14652 /* Print the name of register X to FILE based on its machine mode and number.
14653 If CODE is 'w', pretend the mode is HImode.
14654 If CODE is 'b', pretend the mode is QImode.
14655 If CODE is 'k', pretend the mode is SImode.
14656 If CODE is 'q', pretend the mode is DImode.
14657 If CODE is 'x', pretend the mode is V4SFmode.
14658 If CODE is 't', pretend the mode is V8SFmode.
14659 If CODE is 'g', pretend the mode is V16SFmode.
14660 If CODE is 'h', pretend the reg is the 'high' byte register.
14661 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14662 If CODE is 'd', duplicate the operand for AVX instruction.
14663 */
14664
14665 void
14666 print_reg (rtx x, int code, FILE *file)
14667 {
14668 const char *reg;
14669 unsigned int regno;
14670 bool duplicated = code == 'd' && TARGET_AVX;
14671
14672 if (ASSEMBLER_DIALECT == ASM_ATT)
14673 putc ('%', file);
14674
14675 if (x == pc_rtx)
14676 {
14677 gcc_assert (TARGET_64BIT);
14678 fputs ("rip", file);
14679 return;
14680 }
14681
14682 regno = true_regnum (x);
14683 gcc_assert (regno != ARG_POINTER_REGNUM
14684 && regno != FRAME_POINTER_REGNUM
14685 && regno != FLAGS_REG
14686 && regno != FPSR_REG
14687 && regno != FPCR_REG);
14688
14689 if (code == 'w' || MMX_REG_P (x))
14690 code = 2;
14691 else if (code == 'b')
14692 code = 1;
14693 else if (code == 'k')
14694 code = 4;
14695 else if (code == 'q')
14696 code = 8;
14697 else if (code == 'y')
14698 code = 3;
14699 else if (code == 'h')
14700 code = 0;
14701 else if (code == 'x')
14702 code = 16;
14703 else if (code == 't')
14704 code = 32;
14705 else if (code == 'g')
14706 code = 64;
14707 else
14708 code = GET_MODE_SIZE (GET_MODE (x));
14709
14710 /* Irritatingly, AMD extended registers use different naming convention
14711 from the normal registers: "r%d[bwd]" */
14712 if (REX_INT_REGNO_P (regno))
14713 {
14714 gcc_assert (TARGET_64BIT);
14715 putc ('r', file);
14716 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14717 switch (code)
14718 {
14719 case 0:
14720 error ("extended registers have no high halves");
14721 break;
14722 case 1:
14723 putc ('b', file);
14724 break;
14725 case 2:
14726 putc ('w', file);
14727 break;
14728 case 4:
14729 putc ('d', file);
14730 break;
14731 case 8:
14732 /* no suffix */
14733 break;
14734 default:
14735 error ("unsupported operand size for extended register");
14736 break;
14737 }
14738 return;
14739 }
14740
14741 reg = NULL;
14742 switch (code)
14743 {
14744 case 3:
14745 if (STACK_TOP_P (x))
14746 {
14747 reg = "st(0)";
14748 break;
14749 }
14750 /* FALLTHRU */
14751 case 8:
14752 case 4:
14753 case 12:
14754 if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
14755 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14756 /* FALLTHRU */
14757 case 16:
14758 case 2:
14759 normal:
14760 reg = hi_reg_name[regno];
14761 break;
14762 case 1:
14763 if (regno >= ARRAY_SIZE (qi_reg_name))
14764 goto normal;
14765 reg = qi_reg_name[regno];
14766 break;
14767 case 0:
14768 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14769 goto normal;
14770 reg = qi_high_reg_name[regno];
14771 break;
14772 case 32:
14773 if (SSE_REG_P (x))
14774 {
14775 gcc_assert (!duplicated);
14776 putc ('y', file);
14777 fputs (hi_reg_name[regno] + 1, file);
14778 return;
14779 }
14780 case 64:
14781 if (SSE_REG_P (x))
14782 {
14783 gcc_assert (!duplicated);
14784 putc ('z', file);
14785 fputs (hi_reg_name[REGNO (x)] + 1, file);
14786 return;
14787 }
14788 break;
14789 default:
14790 gcc_unreachable ();
14791 }
14792
14793 fputs (reg, file);
14794 if (duplicated)
14795 {
14796 if (ASSEMBLER_DIALECT == ASM_ATT)
14797 fprintf (file, ", %%%s", reg);
14798 else
14799 fprintf (file, ", %s", reg);
14800 }
14801 }
14802
14803 /* Meaning of CODE:
14804 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14805 C -- print opcode suffix for set/cmov insn.
14806 c -- like C, but print reversed condition
14807 F,f -- likewise, but for floating-point.
14808 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14809 otherwise nothing
14810 R -- print embeded rounding and sae.
14811 r -- print only sae.
14812 z -- print the opcode suffix for the size of the current operand.
14813 Z -- likewise, with special suffixes for x87 instructions.
14814 * -- print a star (in certain assembler syntax)
14815 A -- print an absolute memory reference.
14816 E -- print address with DImode register names if TARGET_64BIT.
14817 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14818 s -- print a shift double count, followed by the assemblers argument
14819 delimiter.
14820 b -- print the QImode name of the register for the indicated operand.
14821 %b0 would print %al if operands[0] is reg 0.
14822 w -- likewise, print the HImode name of the register.
14823 k -- likewise, print the SImode name of the register.
14824 q -- likewise, print the DImode name of the register.
14825 x -- likewise, print the V4SFmode name of the register.
14826 t -- likewise, print the V8SFmode name of the register.
14827 g -- likewise, print the V16SFmode name of the register.
14828 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14829 y -- print "st(0)" instead of "st" as a register.
14830 d -- print duplicated register operand for AVX instruction.
14831 D -- print condition for SSE cmp instruction.
14832 P -- if PIC, print an @PLT suffix.
14833 p -- print raw symbol name.
14834 X -- don't print any sort of PIC '@' suffix for a symbol.
14835 & -- print some in-use local-dynamic symbol name.
14836 H -- print a memory address offset by 8; used for sse high-parts
14837 Y -- print condition for XOP pcom* instruction.
14838 + -- print a branch hint as 'cs' or 'ds' prefix
14839 ; -- print a semicolon (after prefixes due to bug in older gas).
14840 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14841 @ -- print a segment register of thread base pointer load
14842 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14843 */
14844
14845 void
14846 ix86_print_operand (FILE *file, rtx x, int code)
14847 {
14848 if (code)
14849 {
14850 switch (code)
14851 {
14852 case 'A':
14853 switch (ASSEMBLER_DIALECT)
14854 {
14855 case ASM_ATT:
14856 putc ('*', file);
14857 break;
14858
14859 case ASM_INTEL:
14860 /* Intel syntax. For absolute addresses, registers should not
14861 be surrounded by braces. */
14862 if (!REG_P (x))
14863 {
14864 putc ('[', file);
14865 ix86_print_operand (file, x, 0);
14866 putc (']', file);
14867 return;
14868 }
14869 break;
14870
14871 default:
14872 gcc_unreachable ();
14873 }
14874
14875 ix86_print_operand (file, x, 0);
14876 return;
14877
14878 case 'E':
14879 /* Wrap address in an UNSPEC to declare special handling. */
14880 if (TARGET_64BIT)
14881 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14882
14883 output_address (x);
14884 return;
14885
14886 case 'L':
14887 if (ASSEMBLER_DIALECT == ASM_ATT)
14888 putc ('l', file);
14889 return;
14890
14891 case 'W':
14892 if (ASSEMBLER_DIALECT == ASM_ATT)
14893 putc ('w', file);
14894 return;
14895
14896 case 'B':
14897 if (ASSEMBLER_DIALECT == ASM_ATT)
14898 putc ('b', file);
14899 return;
14900
14901 case 'Q':
14902 if (ASSEMBLER_DIALECT == ASM_ATT)
14903 putc ('l', file);
14904 return;
14905
14906 case 'S':
14907 if (ASSEMBLER_DIALECT == ASM_ATT)
14908 putc ('s', file);
14909 return;
14910
14911 case 'T':
14912 if (ASSEMBLER_DIALECT == ASM_ATT)
14913 putc ('t', file);
14914 return;
14915
14916 case 'O':
14917 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14918 if (ASSEMBLER_DIALECT != ASM_ATT)
14919 return;
14920
14921 switch (GET_MODE_SIZE (GET_MODE (x)))
14922 {
14923 case 2:
14924 putc ('w', file);
14925 break;
14926
14927 case 4:
14928 putc ('l', file);
14929 break;
14930
14931 case 8:
14932 putc ('q', file);
14933 break;
14934
14935 default:
14936 output_operand_lossage
14937 ("invalid operand size for operand code 'O'");
14938 return;
14939 }
14940
14941 putc ('.', file);
14942 #endif
14943 return;
14944
14945 case 'z':
14946 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14947 {
14948 /* Opcodes don't get size suffixes if using Intel opcodes. */
14949 if (ASSEMBLER_DIALECT == ASM_INTEL)
14950 return;
14951
14952 switch (GET_MODE_SIZE (GET_MODE (x)))
14953 {
14954 case 1:
14955 putc ('b', file);
14956 return;
14957
14958 case 2:
14959 putc ('w', file);
14960 return;
14961
14962 case 4:
14963 putc ('l', file);
14964 return;
14965
14966 case 8:
14967 putc ('q', file);
14968 return;
14969
14970 default:
14971 output_operand_lossage
14972 ("invalid operand size for operand code 'z'");
14973 return;
14974 }
14975 }
14976
14977 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14978 warning
14979 (0, "non-integer operand used with operand code 'z'");
14980 /* FALLTHRU */
14981
14982 case 'Z':
14983 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14984 if (ASSEMBLER_DIALECT == ASM_INTEL)
14985 return;
14986
14987 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14988 {
14989 switch (GET_MODE_SIZE (GET_MODE (x)))
14990 {
14991 case 2:
14992 #ifdef HAVE_AS_IX86_FILDS
14993 putc ('s', file);
14994 #endif
14995 return;
14996
14997 case 4:
14998 putc ('l', file);
14999 return;
15000
15001 case 8:
15002 #ifdef HAVE_AS_IX86_FILDQ
15003 putc ('q', file);
15004 #else
15005 fputs ("ll", file);
15006 #endif
15007 return;
15008
15009 default:
15010 break;
15011 }
15012 }
15013 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15014 {
15015 /* 387 opcodes don't get size suffixes
15016 if the operands are registers. */
15017 if (STACK_REG_P (x))
15018 return;
15019
15020 switch (GET_MODE_SIZE (GET_MODE (x)))
15021 {
15022 case 4:
15023 putc ('s', file);
15024 return;
15025
15026 case 8:
15027 putc ('l', file);
15028 return;
15029
15030 case 12:
15031 case 16:
15032 putc ('t', file);
15033 return;
15034
15035 default:
15036 break;
15037 }
15038 }
15039 else
15040 {
15041 output_operand_lossage
15042 ("invalid operand type used with operand code 'Z'");
15043 return;
15044 }
15045
15046 output_operand_lossage
15047 ("invalid operand size for operand code 'Z'");
15048 return;
15049
15050 case 'd':
15051 case 'b':
15052 case 'w':
15053 case 'k':
15054 case 'q':
15055 case 'h':
15056 case 't':
15057 case 'g':
15058 case 'y':
15059 case 'x':
15060 case 'X':
15061 case 'P':
15062 case 'p':
15063 break;
15064
15065 case 's':
15066 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15067 {
15068 ix86_print_operand (file, x, 0);
15069 fputs (", ", file);
15070 }
15071 return;
15072
15073 case 'Y':
15074 switch (GET_CODE (x))
15075 {
15076 case NE:
15077 fputs ("neq", file);
15078 break;
15079 case EQ:
15080 fputs ("eq", file);
15081 break;
15082 case GE:
15083 case GEU:
15084 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15085 break;
15086 case GT:
15087 case GTU:
15088 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15089 break;
15090 case LE:
15091 case LEU:
15092 fputs ("le", file);
15093 break;
15094 case LT:
15095 case LTU:
15096 fputs ("lt", file);
15097 break;
15098 case UNORDERED:
15099 fputs ("unord", file);
15100 break;
15101 case ORDERED:
15102 fputs ("ord", file);
15103 break;
15104 case UNEQ:
15105 fputs ("ueq", file);
15106 break;
15107 case UNGE:
15108 fputs ("nlt", file);
15109 break;
15110 case UNGT:
15111 fputs ("nle", file);
15112 break;
15113 case UNLE:
15114 fputs ("ule", file);
15115 break;
15116 case UNLT:
15117 fputs ("ult", file);
15118 break;
15119 case LTGT:
15120 fputs ("une", file);
15121 break;
15122 default:
15123 output_operand_lossage ("operand is not a condition code, "
15124 "invalid operand code 'Y'");
15125 return;
15126 }
15127 return;
15128
15129 case 'D':
15130 /* Little bit of braindamage here. The SSE compare instructions
15131 does use completely different names for the comparisons that the
15132 fp conditional moves. */
15133 switch (GET_CODE (x))
15134 {
15135 case UNEQ:
15136 if (TARGET_AVX)
15137 {
15138 fputs ("eq_us", file);
15139 break;
15140 }
15141 case EQ:
15142 fputs ("eq", file);
15143 break;
15144 case UNLT:
15145 if (TARGET_AVX)
15146 {
15147 fputs ("nge", file);
15148 break;
15149 }
15150 case LT:
15151 fputs ("lt", file);
15152 break;
15153 case UNLE:
15154 if (TARGET_AVX)
15155 {
15156 fputs ("ngt", file);
15157 break;
15158 }
15159 case LE:
15160 fputs ("le", file);
15161 break;
15162 case UNORDERED:
15163 fputs ("unord", file);
15164 break;
15165 case LTGT:
15166 if (TARGET_AVX)
15167 {
15168 fputs ("neq_oq", file);
15169 break;
15170 }
15171 case NE:
15172 fputs ("neq", file);
15173 break;
15174 case GE:
15175 if (TARGET_AVX)
15176 {
15177 fputs ("ge", file);
15178 break;
15179 }
15180 case UNGE:
15181 fputs ("nlt", file);
15182 break;
15183 case GT:
15184 if (TARGET_AVX)
15185 {
15186 fputs ("gt", file);
15187 break;
15188 }
15189 case UNGT:
15190 fputs ("nle", file);
15191 break;
15192 case ORDERED:
15193 fputs ("ord", file);
15194 break;
15195 default:
15196 output_operand_lossage ("operand is not a condition code, "
15197 "invalid operand code 'D'");
15198 return;
15199 }
15200 return;
15201
15202 case 'F':
15203 case 'f':
15204 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15205 if (ASSEMBLER_DIALECT == ASM_ATT)
15206 putc ('.', file);
15207 #endif
15208
15209 case 'C':
15210 case 'c':
15211 if (!COMPARISON_P (x))
15212 {
15213 output_operand_lossage ("operand is not a condition code, "
15214 "invalid operand code '%c'", code);
15215 return;
15216 }
15217 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15218 code == 'c' || code == 'f',
15219 code == 'F' || code == 'f',
15220 file);
15221 return;
15222
15223 case 'H':
15224 if (!offsettable_memref_p (x))
15225 {
15226 output_operand_lossage ("operand is not an offsettable memory "
15227 "reference, invalid operand code 'H'");
15228 return;
15229 }
15230 /* It doesn't actually matter what mode we use here, as we're
15231 only going to use this for printing. */
15232 x = adjust_address_nv (x, DImode, 8);
15233 /* Output 'qword ptr' for intel assembler dialect. */
15234 if (ASSEMBLER_DIALECT == ASM_INTEL)
15235 code = 'q';
15236 break;
15237
15238 case 'K':
15239 gcc_assert (CONST_INT_P (x));
15240
15241 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15242 #ifdef HAVE_AS_IX86_HLE
15243 fputs ("xacquire ", file);
15244 #else
15245 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15246 #endif
15247 else if (INTVAL (x) & IX86_HLE_RELEASE)
15248 #ifdef HAVE_AS_IX86_HLE
15249 fputs ("xrelease ", file);
15250 #else
15251 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15252 #endif
15253 /* We do not want to print value of the operand. */
15254 return;
15255
15256 case 'N':
15257 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15258 fputs ("{z}", file);
15259 return;
15260
15261 case 'r':
15262 gcc_assert (CONST_INT_P (x));
15263 gcc_assert (INTVAL (x) == ROUND_SAE);
15264
15265 if (ASSEMBLER_DIALECT == ASM_INTEL)
15266 fputs (", ", file);
15267
15268 fputs ("{sae}", file);
15269
15270 if (ASSEMBLER_DIALECT == ASM_ATT)
15271 fputs (", ", file);
15272
15273 return;
15274
15275 case 'R':
15276 gcc_assert (CONST_INT_P (x));
15277
15278 if (ASSEMBLER_DIALECT == ASM_INTEL)
15279 fputs (", ", file);
15280
15281 switch (INTVAL (x))
15282 {
15283 case ROUND_NEAREST_INT | ROUND_SAE:
15284 fputs ("{rn-sae}", file);
15285 break;
15286 case ROUND_NEG_INF | ROUND_SAE:
15287 fputs ("{rd-sae}", file);
15288 break;
15289 case ROUND_POS_INF | ROUND_SAE:
15290 fputs ("{ru-sae}", file);
15291 break;
15292 case ROUND_ZERO | ROUND_SAE:
15293 fputs ("{rz-sae}", file);
15294 break;
15295 default:
15296 gcc_unreachable ();
15297 }
15298
15299 if (ASSEMBLER_DIALECT == ASM_ATT)
15300 fputs (", ", file);
15301
15302 return;
15303
15304 case '*':
15305 if (ASSEMBLER_DIALECT == ASM_ATT)
15306 putc ('*', file);
15307 return;
15308
15309 case '&':
15310 {
15311 const char *name = get_some_local_dynamic_name ();
15312 if (name == NULL)
15313 output_operand_lossage ("'%%&' used without any "
15314 "local dynamic TLS references");
15315 else
15316 assemble_name (file, name);
15317 return;
15318 }
15319
15320 case '+':
15321 {
15322 rtx x;
15323
15324 if (!optimize
15325 || optimize_function_for_size_p (cfun)
15326 || !TARGET_BRANCH_PREDICTION_HINTS)
15327 return;
15328
15329 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15330 if (x)
15331 {
15332 int pred_val = XINT (x, 0);
15333
15334 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15335 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15336 {
15337 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15338 bool cputaken
15339 = final_forward_branch_p (current_output_insn) == 0;
15340
15341 /* Emit hints only in the case default branch prediction
15342 heuristics would fail. */
15343 if (taken != cputaken)
15344 {
15345 /* We use 3e (DS) prefix for taken branches and
15346 2e (CS) prefix for not taken branches. */
15347 if (taken)
15348 fputs ("ds ; ", file);
15349 else
15350 fputs ("cs ; ", file);
15351 }
15352 }
15353 }
15354 return;
15355 }
15356
15357 case ';':
15358 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15359 putc (';', file);
15360 #endif
15361 return;
15362
15363 case '@':
15364 if (ASSEMBLER_DIALECT == ASM_ATT)
15365 putc ('%', file);
15366
15367 /* The kernel uses a different segment register for performance
15368 reasons; a system call would not have to trash the userspace
15369 segment register, which would be expensive. */
15370 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15371 fputs ("fs", file);
15372 else
15373 fputs ("gs", file);
15374 return;
15375
15376 case '~':
15377 putc (TARGET_AVX2 ? 'i' : 'f', file);
15378 return;
15379
15380 case '^':
15381 if (TARGET_64BIT && Pmode != word_mode)
15382 fputs ("addr32 ", file);
15383 return;
15384
15385 default:
15386 output_operand_lossage ("invalid operand code '%c'", code);
15387 }
15388 }
15389
15390 if (REG_P (x))
15391 print_reg (x, code, file);
15392
15393 else if (MEM_P (x))
15394 {
15395 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15396 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15397 && GET_MODE (x) != BLKmode)
15398 {
15399 const char * size;
15400 switch (GET_MODE_SIZE (GET_MODE (x)))
15401 {
15402 case 1: size = "BYTE"; break;
15403 case 2: size = "WORD"; break;
15404 case 4: size = "DWORD"; break;
15405 case 8: size = "QWORD"; break;
15406 case 12: size = "TBYTE"; break;
15407 case 16:
15408 if (GET_MODE (x) == XFmode)
15409 size = "TBYTE";
15410 else
15411 size = "XMMWORD";
15412 break;
15413 case 32: size = "YMMWORD"; break;
15414 case 64: size = "ZMMWORD"; break;
15415 default:
15416 gcc_unreachable ();
15417 }
15418
15419 /* Check for explicit size override (codes 'b', 'w', 'k',
15420 'q' and 'x') */
15421 if (code == 'b')
15422 size = "BYTE";
15423 else if (code == 'w')
15424 size = "WORD";
15425 else if (code == 'k')
15426 size = "DWORD";
15427 else if (code == 'q')
15428 size = "QWORD";
15429 else if (code == 'x')
15430 size = "XMMWORD";
15431
15432 fputs (size, file);
15433 fputs (" PTR ", file);
15434 }
15435
15436 x = XEXP (x, 0);
15437 /* Avoid (%rip) for call operands. */
15438 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15439 && !CONST_INT_P (x))
15440 output_addr_const (file, x);
15441 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15442 output_operand_lossage ("invalid constraints for operand");
15443 else
15444 output_address (x);
15445 }
15446
15447 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15448 {
15449 REAL_VALUE_TYPE r;
15450 long l;
15451
15452 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15453 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15454
15455 if (ASSEMBLER_DIALECT == ASM_ATT)
15456 putc ('$', file);
15457 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15458 if (code == 'q')
15459 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15460 (unsigned long long) (int) l);
15461 else
15462 fprintf (file, "0x%08x", (unsigned int) l);
15463 }
15464
15465 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15466 {
15467 REAL_VALUE_TYPE r;
15468 long l[2];
15469
15470 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15471 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15472
15473 if (ASSEMBLER_DIALECT == ASM_ATT)
15474 putc ('$', file);
15475 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15476 }
15477
15478 /* These float cases don't actually occur as immediate operands. */
15479 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15480 {
15481 char dstr[30];
15482
15483 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15484 fputs (dstr, file);
15485 }
15486
15487 else
15488 {
15489 /* We have patterns that allow zero sets of memory, for instance.
15490 In 64-bit mode, we should probably support all 8-byte vectors,
15491 since we can in fact encode that into an immediate. */
15492 if (GET_CODE (x) == CONST_VECTOR)
15493 {
15494 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15495 x = const0_rtx;
15496 }
15497
15498 if (code != 'P' && code != 'p')
15499 {
15500 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15501 {
15502 if (ASSEMBLER_DIALECT == ASM_ATT)
15503 putc ('$', file);
15504 }
15505 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15506 || GET_CODE (x) == LABEL_REF)
15507 {
15508 if (ASSEMBLER_DIALECT == ASM_ATT)
15509 putc ('$', file);
15510 else
15511 fputs ("OFFSET FLAT:", file);
15512 }
15513 }
15514 if (CONST_INT_P (x))
15515 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15516 else if (flag_pic || MACHOPIC_INDIRECT)
15517 output_pic_addr_const (file, x, code);
15518 else
15519 output_addr_const (file, x);
15520 }
15521 }
15522
15523 static bool
15524 ix86_print_operand_punct_valid_p (unsigned char code)
15525 {
15526 return (code == '@' || code == '*' || code == '+' || code == '&'
15527 || code == ';' || code == '~' || code == '^');
15528 }
15529 \f
15530 /* Print a memory operand whose address is ADDR. */
15531
15532 static void
15533 ix86_print_operand_address (FILE *file, rtx addr)
15534 {
15535 struct ix86_address parts;
15536 rtx base, index, disp;
15537 int scale;
15538 int ok;
15539 bool vsib = false;
15540 int code = 0;
15541
15542 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15543 {
15544 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15545 gcc_assert (parts.index == NULL_RTX);
15546 parts.index = XVECEXP (addr, 0, 1);
15547 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15548 addr = XVECEXP (addr, 0, 0);
15549 vsib = true;
15550 }
15551 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15552 {
15553 gcc_assert (TARGET_64BIT);
15554 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15555 code = 'q';
15556 }
15557 else
15558 ok = ix86_decompose_address (addr, &parts);
15559
15560 gcc_assert (ok);
15561
15562 base = parts.base;
15563 index = parts.index;
15564 disp = parts.disp;
15565 scale = parts.scale;
15566
15567 switch (parts.seg)
15568 {
15569 case SEG_DEFAULT:
15570 break;
15571 case SEG_FS:
15572 case SEG_GS:
15573 if (ASSEMBLER_DIALECT == ASM_ATT)
15574 putc ('%', file);
15575 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15576 break;
15577 default:
15578 gcc_unreachable ();
15579 }
15580
15581 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15582 if (TARGET_64BIT && !base && !index)
15583 {
15584 rtx symbol = disp;
15585
15586 if (GET_CODE (disp) == CONST
15587 && GET_CODE (XEXP (disp, 0)) == PLUS
15588 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15589 symbol = XEXP (XEXP (disp, 0), 0);
15590
15591 if (GET_CODE (symbol) == LABEL_REF
15592 || (GET_CODE (symbol) == SYMBOL_REF
15593 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15594 base = pc_rtx;
15595 }
15596 if (!base && !index)
15597 {
15598 /* Displacement only requires special attention. */
15599
15600 if (CONST_INT_P (disp))
15601 {
15602 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15603 fputs ("ds:", file);
15604 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15605 }
15606 else if (flag_pic)
15607 output_pic_addr_const (file, disp, 0);
15608 else
15609 output_addr_const (file, disp);
15610 }
15611 else
15612 {
15613 /* Print SImode register names to force addr32 prefix. */
15614 if (SImode_address_operand (addr, VOIDmode))
15615 {
15616 #ifdef ENABLE_CHECKING
15617 gcc_assert (TARGET_64BIT);
15618 switch (GET_CODE (addr))
15619 {
15620 case SUBREG:
15621 gcc_assert (GET_MODE (addr) == SImode);
15622 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15623 break;
15624 case ZERO_EXTEND:
15625 case AND:
15626 gcc_assert (GET_MODE (addr) == DImode);
15627 break;
15628 default:
15629 gcc_unreachable ();
15630 }
15631 #endif
15632 gcc_assert (!code);
15633 code = 'k';
15634 }
15635 else if (code == 0
15636 && TARGET_X32
15637 && disp
15638 && CONST_INT_P (disp)
15639 && INTVAL (disp) < -16*1024*1024)
15640 {
15641 /* X32 runs in 64-bit mode, where displacement, DISP, in
15642 address DISP(%r64), is encoded as 32-bit immediate sign-
15643 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15644 address is %r64 + 0xffffffffbffffd00. When %r64 <
15645 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15646 which is invalid for x32. The correct address is %r64
15647 - 0x40000300 == 0xf7ffdd64. To properly encode
15648 -0x40000300(%r64) for x32, we zero-extend negative
15649 displacement by forcing addr32 prefix which truncates
15650 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15651 zero-extend all negative displacements, including -1(%rsp).
15652 However, for small negative displacements, sign-extension
15653 won't cause overflow. We only zero-extend negative
15654 displacements if they < -16*1024*1024, which is also used
15655 to check legitimate address displacements for PIC. */
15656 code = 'k';
15657 }
15658
15659 if (ASSEMBLER_DIALECT == ASM_ATT)
15660 {
15661 if (disp)
15662 {
15663 if (flag_pic)
15664 output_pic_addr_const (file, disp, 0);
15665 else if (GET_CODE (disp) == LABEL_REF)
15666 output_asm_label (disp);
15667 else
15668 output_addr_const (file, disp);
15669 }
15670
15671 putc ('(', file);
15672 if (base)
15673 print_reg (base, code, file);
15674 if (index)
15675 {
15676 putc (',', file);
15677 print_reg (index, vsib ? 0 : code, file);
15678 if (scale != 1 || vsib)
15679 fprintf (file, ",%d", scale);
15680 }
15681 putc (')', file);
15682 }
15683 else
15684 {
15685 rtx offset = NULL_RTX;
15686
15687 if (disp)
15688 {
15689 /* Pull out the offset of a symbol; print any symbol itself. */
15690 if (GET_CODE (disp) == CONST
15691 && GET_CODE (XEXP (disp, 0)) == PLUS
15692 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15693 {
15694 offset = XEXP (XEXP (disp, 0), 1);
15695 disp = gen_rtx_CONST (VOIDmode,
15696 XEXP (XEXP (disp, 0), 0));
15697 }
15698
15699 if (flag_pic)
15700 output_pic_addr_const (file, disp, 0);
15701 else if (GET_CODE (disp) == LABEL_REF)
15702 output_asm_label (disp);
15703 else if (CONST_INT_P (disp))
15704 offset = disp;
15705 else
15706 output_addr_const (file, disp);
15707 }
15708
15709 putc ('[', file);
15710 if (base)
15711 {
15712 print_reg (base, code, file);
15713 if (offset)
15714 {
15715 if (INTVAL (offset) >= 0)
15716 putc ('+', file);
15717 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15718 }
15719 }
15720 else if (offset)
15721 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15722 else
15723 putc ('0', file);
15724
15725 if (index)
15726 {
15727 putc ('+', file);
15728 print_reg (index, vsib ? 0 : code, file);
15729 if (scale != 1 || vsib)
15730 fprintf (file, "*%d", scale);
15731 }
15732 putc (']', file);
15733 }
15734 }
15735 }
15736
15737 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15738
15739 static bool
15740 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15741 {
15742 rtx op;
15743
15744 if (GET_CODE (x) != UNSPEC)
15745 return false;
15746
15747 op = XVECEXP (x, 0, 0);
15748 switch (XINT (x, 1))
15749 {
15750 case UNSPEC_GOTTPOFF:
15751 output_addr_const (file, op);
15752 /* FIXME: This might be @TPOFF in Sun ld. */
15753 fputs ("@gottpoff", file);
15754 break;
15755 case UNSPEC_TPOFF:
15756 output_addr_const (file, op);
15757 fputs ("@tpoff", file);
15758 break;
15759 case UNSPEC_NTPOFF:
15760 output_addr_const (file, op);
15761 if (TARGET_64BIT)
15762 fputs ("@tpoff", file);
15763 else
15764 fputs ("@ntpoff", file);
15765 break;
15766 case UNSPEC_DTPOFF:
15767 output_addr_const (file, op);
15768 fputs ("@dtpoff", file);
15769 break;
15770 case UNSPEC_GOTNTPOFF:
15771 output_addr_const (file, op);
15772 if (TARGET_64BIT)
15773 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15774 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15775 else
15776 fputs ("@gotntpoff", file);
15777 break;
15778 case UNSPEC_INDNTPOFF:
15779 output_addr_const (file, op);
15780 fputs ("@indntpoff", file);
15781 break;
15782 #if TARGET_MACHO
15783 case UNSPEC_MACHOPIC_OFFSET:
15784 output_addr_const (file, op);
15785 putc ('-', file);
15786 machopic_output_function_base_name (file);
15787 break;
15788 #endif
15789
15790 case UNSPEC_STACK_CHECK:
15791 {
15792 int offset;
15793
15794 gcc_assert (flag_split_stack);
15795
15796 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15797 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15798 #else
15799 gcc_unreachable ();
15800 #endif
15801
15802 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15803 }
15804 break;
15805
15806 default:
15807 return false;
15808 }
15809
15810 return true;
15811 }
15812 \f
15813 /* Split one or more double-mode RTL references into pairs of half-mode
15814 references. The RTL can be REG, offsettable MEM, integer constant, or
15815 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15816 split and "num" is its length. lo_half and hi_half are output arrays
15817 that parallel "operands". */
15818
15819 void
15820 split_double_mode (enum machine_mode mode, rtx operands[],
15821 int num, rtx lo_half[], rtx hi_half[])
15822 {
15823 enum machine_mode half_mode;
15824 unsigned int byte;
15825
15826 switch (mode)
15827 {
15828 case TImode:
15829 half_mode = DImode;
15830 break;
15831 case DImode:
15832 half_mode = SImode;
15833 break;
15834 default:
15835 gcc_unreachable ();
15836 }
15837
15838 byte = GET_MODE_SIZE (half_mode);
15839
15840 while (num--)
15841 {
15842 rtx op = operands[num];
15843
15844 /* simplify_subreg refuse to split volatile memory addresses,
15845 but we still have to handle it. */
15846 if (MEM_P (op))
15847 {
15848 lo_half[num] = adjust_address (op, half_mode, 0);
15849 hi_half[num] = adjust_address (op, half_mode, byte);
15850 }
15851 else
15852 {
15853 lo_half[num] = simplify_gen_subreg (half_mode, op,
15854 GET_MODE (op) == VOIDmode
15855 ? mode : GET_MODE (op), 0);
15856 hi_half[num] = simplify_gen_subreg (half_mode, op,
15857 GET_MODE (op) == VOIDmode
15858 ? mode : GET_MODE (op), byte);
15859 }
15860 }
15861 }
15862 \f
15863 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15864 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15865 is the expression of the binary operation. The output may either be
15866 emitted here, or returned to the caller, like all output_* functions.
15867
15868 There is no guarantee that the operands are the same mode, as they
15869 might be within FLOAT or FLOAT_EXTEND expressions. */
15870
15871 #ifndef SYSV386_COMPAT
15872 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15873 wants to fix the assemblers because that causes incompatibility
15874 with gcc. No-one wants to fix gcc because that causes
15875 incompatibility with assemblers... You can use the option of
15876 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15877 #define SYSV386_COMPAT 1
15878 #endif
15879
15880 const char *
15881 output_387_binary_op (rtx insn, rtx *operands)
15882 {
15883 static char buf[40];
15884 const char *p;
15885 const char *ssep;
15886 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15887
15888 #ifdef ENABLE_CHECKING
15889 /* Even if we do not want to check the inputs, this documents input
15890 constraints. Which helps in understanding the following code. */
15891 if (STACK_REG_P (operands[0])
15892 && ((REG_P (operands[1])
15893 && REGNO (operands[0]) == REGNO (operands[1])
15894 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15895 || (REG_P (operands[2])
15896 && REGNO (operands[0]) == REGNO (operands[2])
15897 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15898 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15899 ; /* ok */
15900 else
15901 gcc_assert (is_sse);
15902 #endif
15903
15904 switch (GET_CODE (operands[3]))
15905 {
15906 case PLUS:
15907 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15908 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15909 p = "fiadd";
15910 else
15911 p = "fadd";
15912 ssep = "vadd";
15913 break;
15914
15915 case MINUS:
15916 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15917 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15918 p = "fisub";
15919 else
15920 p = "fsub";
15921 ssep = "vsub";
15922 break;
15923
15924 case MULT:
15925 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15926 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15927 p = "fimul";
15928 else
15929 p = "fmul";
15930 ssep = "vmul";
15931 break;
15932
15933 case DIV:
15934 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15935 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15936 p = "fidiv";
15937 else
15938 p = "fdiv";
15939 ssep = "vdiv";
15940 break;
15941
15942 default:
15943 gcc_unreachable ();
15944 }
15945
15946 if (is_sse)
15947 {
15948 if (TARGET_AVX)
15949 {
15950 strcpy (buf, ssep);
15951 if (GET_MODE (operands[0]) == SFmode)
15952 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15953 else
15954 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15955 }
15956 else
15957 {
15958 strcpy (buf, ssep + 1);
15959 if (GET_MODE (operands[0]) == SFmode)
15960 strcat (buf, "ss\t{%2, %0|%0, %2}");
15961 else
15962 strcat (buf, "sd\t{%2, %0|%0, %2}");
15963 }
15964 return buf;
15965 }
15966 strcpy (buf, p);
15967
15968 switch (GET_CODE (operands[3]))
15969 {
15970 case MULT:
15971 case PLUS:
15972 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15973 {
15974 rtx temp = operands[2];
15975 operands[2] = operands[1];
15976 operands[1] = temp;
15977 }
15978
15979 /* know operands[0] == operands[1]. */
15980
15981 if (MEM_P (operands[2]))
15982 {
15983 p = "%Z2\t%2";
15984 break;
15985 }
15986
15987 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15988 {
15989 if (STACK_TOP_P (operands[0]))
15990 /* How is it that we are storing to a dead operand[2]?
15991 Well, presumably operands[1] is dead too. We can't
15992 store the result to st(0) as st(0) gets popped on this
15993 instruction. Instead store to operands[2] (which I
15994 think has to be st(1)). st(1) will be popped later.
15995 gcc <= 2.8.1 didn't have this check and generated
15996 assembly code that the Unixware assembler rejected. */
15997 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15998 else
15999 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16000 break;
16001 }
16002
16003 if (STACK_TOP_P (operands[0]))
16004 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16005 else
16006 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16007 break;
16008
16009 case MINUS:
16010 case DIV:
16011 if (MEM_P (operands[1]))
16012 {
16013 p = "r%Z1\t%1";
16014 break;
16015 }
16016
16017 if (MEM_P (operands[2]))
16018 {
16019 p = "%Z2\t%2";
16020 break;
16021 }
16022
16023 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16024 {
16025 #if SYSV386_COMPAT
16026 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16027 derived assemblers, confusingly reverse the direction of
16028 the operation for fsub{r} and fdiv{r} when the
16029 destination register is not st(0). The Intel assembler
16030 doesn't have this brain damage. Read !SYSV386_COMPAT to
16031 figure out what the hardware really does. */
16032 if (STACK_TOP_P (operands[0]))
16033 p = "{p\t%0, %2|rp\t%2, %0}";
16034 else
16035 p = "{rp\t%2, %0|p\t%0, %2}";
16036 #else
16037 if (STACK_TOP_P (operands[0]))
16038 /* As above for fmul/fadd, we can't store to st(0). */
16039 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
16040 else
16041 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
16042 #endif
16043 break;
16044 }
16045
16046 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16047 {
16048 #if SYSV386_COMPAT
16049 if (STACK_TOP_P (operands[0]))
16050 p = "{rp\t%0, %1|p\t%1, %0}";
16051 else
16052 p = "{p\t%1, %0|rp\t%0, %1}";
16053 #else
16054 if (STACK_TOP_P (operands[0]))
16055 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16056 else
16057 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16058 #endif
16059 break;
16060 }
16061
16062 if (STACK_TOP_P (operands[0]))
16063 {
16064 if (STACK_TOP_P (operands[1]))
16065 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16066 else
16067 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16068 break;
16069 }
16070 else if (STACK_TOP_P (operands[1]))
16071 {
16072 #if SYSV386_COMPAT
16073 p = "{\t%1, %0|r\t%0, %1}";
16074 #else
16075 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16076 #endif
16077 }
16078 else
16079 {
16080 #if SYSV386_COMPAT
16081 p = "{r\t%2, %0|\t%0, %2}";
16082 #else
16083 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16084 #endif
16085 }
16086 break;
16087
16088 default:
16089 gcc_unreachable ();
16090 }
16091
16092 strcat (buf, p);
16093 return buf;
16094 }
16095
16096 /* Check if a 256bit AVX register is referenced inside of EXP. */
16097
16098 static int
16099 ix86_check_avx256_register (rtx *pexp, void *)
16100 {
16101 rtx exp = *pexp;
16102
16103 if (GET_CODE (exp) == SUBREG)
16104 exp = SUBREG_REG (exp);
16105
16106 if (REG_P (exp)
16107 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16108 return 1;
16109
16110 return 0;
16111 }
16112
16113 /* Return needed mode for entity in optimize_mode_switching pass. */
16114
16115 static int
16116 ix86_avx_u128_mode_needed (rtx_insn *insn)
16117 {
16118 if (CALL_P (insn))
16119 {
16120 rtx link;
16121
16122 /* Needed mode is set to AVX_U128_CLEAN if there are
16123 no 256bit modes used in function arguments. */
16124 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16125 link;
16126 link = XEXP (link, 1))
16127 {
16128 if (GET_CODE (XEXP (link, 0)) == USE)
16129 {
16130 rtx arg = XEXP (XEXP (link, 0), 0);
16131
16132 if (ix86_check_avx256_register (&arg, NULL))
16133 return AVX_U128_DIRTY;
16134 }
16135 }
16136
16137 return AVX_U128_CLEAN;
16138 }
16139
16140 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16141 changes state only when a 256bit register is written to, but we need
16142 to prevent the compiler from moving optimal insertion point above
16143 eventual read from 256bit register. */
16144 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16145 return AVX_U128_DIRTY;
16146
16147 return AVX_U128_ANY;
16148 }
16149
16150 /* Return mode that i387 must be switched into
16151 prior to the execution of insn. */
16152
16153 static int
16154 ix86_i387_mode_needed (int entity, rtx_insn *insn)
16155 {
16156 enum attr_i387_cw mode;
16157
16158 /* The mode UNINITIALIZED is used to store control word after a
16159 function call or ASM pattern. The mode ANY specify that function
16160 has no requirements on the control word and make no changes in the
16161 bits we are interested in. */
16162
16163 if (CALL_P (insn)
16164 || (NONJUMP_INSN_P (insn)
16165 && (asm_noperands (PATTERN (insn)) >= 0
16166 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16167 return I387_CW_UNINITIALIZED;
16168
16169 if (recog_memoized (insn) < 0)
16170 return I387_CW_ANY;
16171
16172 mode = get_attr_i387_cw (insn);
16173
16174 switch (entity)
16175 {
16176 case I387_TRUNC:
16177 if (mode == I387_CW_TRUNC)
16178 return mode;
16179 break;
16180
16181 case I387_FLOOR:
16182 if (mode == I387_CW_FLOOR)
16183 return mode;
16184 break;
16185
16186 case I387_CEIL:
16187 if (mode == I387_CW_CEIL)
16188 return mode;
16189 break;
16190
16191 case I387_MASK_PM:
16192 if (mode == I387_CW_MASK_PM)
16193 return mode;
16194 break;
16195
16196 default:
16197 gcc_unreachable ();
16198 }
16199
16200 return I387_CW_ANY;
16201 }
16202
16203 /* Return mode that entity must be switched into
16204 prior to the execution of insn. */
16205
16206 static int
16207 ix86_mode_needed (int entity, rtx_insn *insn)
16208 {
16209 switch (entity)
16210 {
16211 case AVX_U128:
16212 return ix86_avx_u128_mode_needed (insn);
16213 case I387_TRUNC:
16214 case I387_FLOOR:
16215 case I387_CEIL:
16216 case I387_MASK_PM:
16217 return ix86_i387_mode_needed (entity, insn);
16218 default:
16219 gcc_unreachable ();
16220 }
16221 return 0;
16222 }
16223
16224 /* Check if a 256bit AVX register is referenced in stores. */
16225
16226 static void
16227 ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16228 {
16229 if (ix86_check_avx256_register (&dest, NULL))
16230 {
16231 bool *used = (bool *) data;
16232 *used = true;
16233 }
16234 }
16235
16236 /* Calculate mode of upper 128bit AVX registers after the insn. */
16237
16238 static int
16239 ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16240 {
16241 rtx pat = PATTERN (insn);
16242
16243 if (vzeroupper_operation (pat, VOIDmode)
16244 || vzeroall_operation (pat, VOIDmode))
16245 return AVX_U128_CLEAN;
16246
16247 /* We know that state is clean after CALL insn if there are no
16248 256bit registers used in the function return register. */
16249 if (CALL_P (insn))
16250 {
16251 bool avx_reg256_found = false;
16252 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16253
16254 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16255 }
16256
16257 /* Otherwise, return current mode. Remember that if insn
16258 references AVX 256bit registers, the mode was already changed
16259 to DIRTY from MODE_NEEDED. */
16260 return mode;
16261 }
16262
16263 /* Return the mode that an insn results in. */
16264
16265 int
16266 ix86_mode_after (int entity, int mode, rtx_insn *insn)
16267 {
16268 switch (entity)
16269 {
16270 case AVX_U128:
16271 return ix86_avx_u128_mode_after (mode, insn);
16272 case I387_TRUNC:
16273 case I387_FLOOR:
16274 case I387_CEIL:
16275 case I387_MASK_PM:
16276 return mode;
16277 default:
16278 gcc_unreachable ();
16279 }
16280 }
16281
16282 static int
16283 ix86_avx_u128_mode_entry (void)
16284 {
16285 tree arg;
16286
16287 /* Entry mode is set to AVX_U128_DIRTY if there are
16288 256bit modes used in function arguments. */
16289 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16290 arg = TREE_CHAIN (arg))
16291 {
16292 rtx incoming = DECL_INCOMING_RTL (arg);
16293
16294 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16295 return AVX_U128_DIRTY;
16296 }
16297
16298 return AVX_U128_CLEAN;
16299 }
16300
16301 /* Return a mode that ENTITY is assumed to be
16302 switched to at function entry. */
16303
16304 static int
16305 ix86_mode_entry (int entity)
16306 {
16307 switch (entity)
16308 {
16309 case AVX_U128:
16310 return ix86_avx_u128_mode_entry ();
16311 case I387_TRUNC:
16312 case I387_FLOOR:
16313 case I387_CEIL:
16314 case I387_MASK_PM:
16315 return I387_CW_ANY;
16316 default:
16317 gcc_unreachable ();
16318 }
16319 }
16320
16321 static int
16322 ix86_avx_u128_mode_exit (void)
16323 {
16324 rtx reg = crtl->return_rtx;
16325
16326 /* Exit mode is set to AVX_U128_DIRTY if there are
16327 256bit modes used in the function return register. */
16328 if (reg && ix86_check_avx256_register (&reg, NULL))
16329 return AVX_U128_DIRTY;
16330
16331 return AVX_U128_CLEAN;
16332 }
16333
16334 /* Return a mode that ENTITY is assumed to be
16335 switched to at function exit. */
16336
16337 static int
16338 ix86_mode_exit (int entity)
16339 {
16340 switch (entity)
16341 {
16342 case AVX_U128:
16343 return ix86_avx_u128_mode_exit ();
16344 case I387_TRUNC:
16345 case I387_FLOOR:
16346 case I387_CEIL:
16347 case I387_MASK_PM:
16348 return I387_CW_ANY;
16349 default:
16350 gcc_unreachable ();
16351 }
16352 }
16353
16354 static int
16355 ix86_mode_priority (int, int n)
16356 {
16357 return n;
16358 }
16359
16360 /* Output code to initialize control word copies used by trunc?f?i and
16361 rounding patterns. CURRENT_MODE is set to current control word,
16362 while NEW_MODE is set to new control word. */
16363
16364 static void
16365 emit_i387_cw_initialization (int mode)
16366 {
16367 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16368 rtx new_mode;
16369
16370 enum ix86_stack_slot slot;
16371
16372 rtx reg = gen_reg_rtx (HImode);
16373
16374 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16375 emit_move_insn (reg, copy_rtx (stored_mode));
16376
16377 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16378 || optimize_insn_for_size_p ())
16379 {
16380 switch (mode)
16381 {
16382 case I387_CW_TRUNC:
16383 /* round toward zero (truncate) */
16384 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16385 slot = SLOT_CW_TRUNC;
16386 break;
16387
16388 case I387_CW_FLOOR:
16389 /* round down toward -oo */
16390 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16391 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16392 slot = SLOT_CW_FLOOR;
16393 break;
16394
16395 case I387_CW_CEIL:
16396 /* round up toward +oo */
16397 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16398 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16399 slot = SLOT_CW_CEIL;
16400 break;
16401
16402 case I387_CW_MASK_PM:
16403 /* mask precision exception for nearbyint() */
16404 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16405 slot = SLOT_CW_MASK_PM;
16406 break;
16407
16408 default:
16409 gcc_unreachable ();
16410 }
16411 }
16412 else
16413 {
16414 switch (mode)
16415 {
16416 case I387_CW_TRUNC:
16417 /* round toward zero (truncate) */
16418 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16419 slot = SLOT_CW_TRUNC;
16420 break;
16421
16422 case I387_CW_FLOOR:
16423 /* round down toward -oo */
16424 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16425 slot = SLOT_CW_FLOOR;
16426 break;
16427
16428 case I387_CW_CEIL:
16429 /* round up toward +oo */
16430 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16431 slot = SLOT_CW_CEIL;
16432 break;
16433
16434 case I387_CW_MASK_PM:
16435 /* mask precision exception for nearbyint() */
16436 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16437 slot = SLOT_CW_MASK_PM;
16438 break;
16439
16440 default:
16441 gcc_unreachable ();
16442 }
16443 }
16444
16445 gcc_assert (slot < MAX_386_STACK_LOCALS);
16446
16447 new_mode = assign_386_stack_local (HImode, slot);
16448 emit_move_insn (new_mode, reg);
16449 }
16450
16451 /* Emit vzeroupper. */
16452
16453 void
16454 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16455 {
16456 int i;
16457
16458 /* Cancel automatic vzeroupper insertion if there are
16459 live call-saved SSE registers at the insertion point. */
16460
16461 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16462 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16463 return;
16464
16465 if (TARGET_64BIT)
16466 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16467 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16468 return;
16469
16470 emit_insn (gen_avx_vzeroupper ());
16471 }
16472
16473 /* Generate one or more insns to set ENTITY to MODE. */
16474
16475 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16476 is the set of hard registers live at the point where the insn(s)
16477 are to be inserted. */
16478
16479 static void
16480 ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
16481 HARD_REG_SET regs_live)
16482 {
16483 switch (entity)
16484 {
16485 case AVX_U128:
16486 if (mode == AVX_U128_CLEAN)
16487 ix86_avx_emit_vzeroupper (regs_live);
16488 break;
16489 case I387_TRUNC:
16490 case I387_FLOOR:
16491 case I387_CEIL:
16492 case I387_MASK_PM:
16493 if (mode != I387_CW_ANY
16494 && mode != I387_CW_UNINITIALIZED)
16495 emit_i387_cw_initialization (mode);
16496 break;
16497 default:
16498 gcc_unreachable ();
16499 }
16500 }
16501
16502 /* Output code for INSN to convert a float to a signed int. OPERANDS
16503 are the insn operands. The output may be [HSD]Imode and the input
16504 operand may be [SDX]Fmode. */
16505
16506 const char *
16507 output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
16508 {
16509 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16510 int dimode_p = GET_MODE (operands[0]) == DImode;
16511 int round_mode = get_attr_i387_cw (insn);
16512
16513 /* Jump through a hoop or two for DImode, since the hardware has no
16514 non-popping instruction. We used to do this a different way, but
16515 that was somewhat fragile and broke with post-reload splitters. */
16516 if ((dimode_p || fisttp) && !stack_top_dies)
16517 output_asm_insn ("fld\t%y1", operands);
16518
16519 gcc_assert (STACK_TOP_P (operands[1]));
16520 gcc_assert (MEM_P (operands[0]));
16521 gcc_assert (GET_MODE (operands[1]) != TFmode);
16522
16523 if (fisttp)
16524 output_asm_insn ("fisttp%Z0\t%0", operands);
16525 else
16526 {
16527 if (round_mode != I387_CW_ANY)
16528 output_asm_insn ("fldcw\t%3", operands);
16529 if (stack_top_dies || dimode_p)
16530 output_asm_insn ("fistp%Z0\t%0", operands);
16531 else
16532 output_asm_insn ("fist%Z0\t%0", operands);
16533 if (round_mode != I387_CW_ANY)
16534 output_asm_insn ("fldcw\t%2", operands);
16535 }
16536
16537 return "";
16538 }
16539
16540 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16541 have the values zero or one, indicates the ffreep insn's operand
16542 from the OPERANDS array. */
16543
16544 static const char *
16545 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16546 {
16547 if (TARGET_USE_FFREEP)
16548 #ifdef HAVE_AS_IX86_FFREEP
16549 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16550 #else
16551 {
16552 static char retval[32];
16553 int regno = REGNO (operands[opno]);
16554
16555 gcc_assert (STACK_REGNO_P (regno));
16556
16557 regno -= FIRST_STACK_REG;
16558
16559 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16560 return retval;
16561 }
16562 #endif
16563
16564 return opno ? "fstp\t%y1" : "fstp\t%y0";
16565 }
16566
16567
16568 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16569 should be used. UNORDERED_P is true when fucom should be used. */
16570
16571 const char *
16572 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16573 {
16574 int stack_top_dies;
16575 rtx cmp_op0, cmp_op1;
16576 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16577
16578 if (eflags_p)
16579 {
16580 cmp_op0 = operands[0];
16581 cmp_op1 = operands[1];
16582 }
16583 else
16584 {
16585 cmp_op0 = operands[1];
16586 cmp_op1 = operands[2];
16587 }
16588
16589 if (is_sse)
16590 {
16591 if (GET_MODE (operands[0]) == SFmode)
16592 if (unordered_p)
16593 return "%vucomiss\t{%1, %0|%0, %1}";
16594 else
16595 return "%vcomiss\t{%1, %0|%0, %1}";
16596 else
16597 if (unordered_p)
16598 return "%vucomisd\t{%1, %0|%0, %1}";
16599 else
16600 return "%vcomisd\t{%1, %0|%0, %1}";
16601 }
16602
16603 gcc_assert (STACK_TOP_P (cmp_op0));
16604
16605 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16606
16607 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16608 {
16609 if (stack_top_dies)
16610 {
16611 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16612 return output_387_ffreep (operands, 1);
16613 }
16614 else
16615 return "ftst\n\tfnstsw\t%0";
16616 }
16617
16618 if (STACK_REG_P (cmp_op1)
16619 && stack_top_dies
16620 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16621 && REGNO (cmp_op1) != FIRST_STACK_REG)
16622 {
16623 /* If both the top of the 387 stack dies, and the other operand
16624 is also a stack register that dies, then this must be a
16625 `fcompp' float compare */
16626
16627 if (eflags_p)
16628 {
16629 /* There is no double popping fcomi variant. Fortunately,
16630 eflags is immune from the fstp's cc clobbering. */
16631 if (unordered_p)
16632 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16633 else
16634 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16635 return output_387_ffreep (operands, 0);
16636 }
16637 else
16638 {
16639 if (unordered_p)
16640 return "fucompp\n\tfnstsw\t%0";
16641 else
16642 return "fcompp\n\tfnstsw\t%0";
16643 }
16644 }
16645 else
16646 {
16647 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16648
16649 static const char * const alt[16] =
16650 {
16651 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16652 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16653 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16654 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16655
16656 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16657 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16658 NULL,
16659 NULL,
16660
16661 "fcomi\t{%y1, %0|%0, %y1}",
16662 "fcomip\t{%y1, %0|%0, %y1}",
16663 "fucomi\t{%y1, %0|%0, %y1}",
16664 "fucomip\t{%y1, %0|%0, %y1}",
16665
16666 NULL,
16667 NULL,
16668 NULL,
16669 NULL
16670 };
16671
16672 int mask;
16673 const char *ret;
16674
16675 mask = eflags_p << 3;
16676 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16677 mask |= unordered_p << 1;
16678 mask |= stack_top_dies;
16679
16680 gcc_assert (mask < 16);
16681 ret = alt[mask];
16682 gcc_assert (ret);
16683
16684 return ret;
16685 }
16686 }
16687
16688 void
16689 ix86_output_addr_vec_elt (FILE *file, int value)
16690 {
16691 const char *directive = ASM_LONG;
16692
16693 #ifdef ASM_QUAD
16694 if (TARGET_LP64)
16695 directive = ASM_QUAD;
16696 #else
16697 gcc_assert (!TARGET_64BIT);
16698 #endif
16699
16700 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16701 }
16702
16703 void
16704 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16705 {
16706 const char *directive = ASM_LONG;
16707
16708 #ifdef ASM_QUAD
16709 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16710 directive = ASM_QUAD;
16711 #else
16712 gcc_assert (!TARGET_64BIT);
16713 #endif
16714 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16715 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16716 fprintf (file, "%s%s%d-%s%d\n",
16717 directive, LPREFIX, value, LPREFIX, rel);
16718 else if (HAVE_AS_GOTOFF_IN_DATA)
16719 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16720 #if TARGET_MACHO
16721 else if (TARGET_MACHO)
16722 {
16723 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16724 machopic_output_function_base_name (file);
16725 putc ('\n', file);
16726 }
16727 #endif
16728 else
16729 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16730 GOT_SYMBOL_NAME, LPREFIX, value);
16731 }
16732 \f
16733 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16734 for the target. */
16735
16736 void
16737 ix86_expand_clear (rtx dest)
16738 {
16739 rtx tmp;
16740
16741 /* We play register width games, which are only valid after reload. */
16742 gcc_assert (reload_completed);
16743
16744 /* Avoid HImode and its attendant prefix byte. */
16745 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16746 dest = gen_rtx_REG (SImode, REGNO (dest));
16747 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16748
16749 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16750 {
16751 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16752 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16753 }
16754
16755 emit_insn (tmp);
16756 }
16757
16758 /* X is an unchanging MEM. If it is a constant pool reference, return
16759 the constant pool rtx, else NULL. */
16760
16761 rtx
16762 maybe_get_pool_constant (rtx x)
16763 {
16764 x = ix86_delegitimize_address (XEXP (x, 0));
16765
16766 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16767 return get_pool_constant (x);
16768
16769 return NULL_RTX;
16770 }
16771
16772 void
16773 ix86_expand_move (enum machine_mode mode, rtx operands[])
16774 {
16775 rtx op0, op1;
16776 enum tls_model model;
16777
16778 op0 = operands[0];
16779 op1 = operands[1];
16780
16781 if (GET_CODE (op1) == SYMBOL_REF)
16782 {
16783 rtx tmp;
16784
16785 model = SYMBOL_REF_TLS_MODEL (op1);
16786 if (model)
16787 {
16788 op1 = legitimize_tls_address (op1, model, true);
16789 op1 = force_operand (op1, op0);
16790 if (op1 == op0)
16791 return;
16792 op1 = convert_to_mode (mode, op1, 1);
16793 }
16794 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16795 op1 = tmp;
16796 }
16797 else if (GET_CODE (op1) == CONST
16798 && GET_CODE (XEXP (op1, 0)) == PLUS
16799 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16800 {
16801 rtx addend = XEXP (XEXP (op1, 0), 1);
16802 rtx symbol = XEXP (XEXP (op1, 0), 0);
16803 rtx tmp;
16804
16805 model = SYMBOL_REF_TLS_MODEL (symbol);
16806 if (model)
16807 tmp = legitimize_tls_address (symbol, model, true);
16808 else
16809 tmp = legitimize_pe_coff_symbol (symbol, true);
16810
16811 if (tmp)
16812 {
16813 tmp = force_operand (tmp, NULL);
16814 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16815 op0, 1, OPTAB_DIRECT);
16816 if (tmp == op0)
16817 return;
16818 op1 = convert_to_mode (mode, tmp, 1);
16819 }
16820 }
16821
16822 if ((flag_pic || MACHOPIC_INDIRECT)
16823 && symbolic_operand (op1, mode))
16824 {
16825 if (TARGET_MACHO && !TARGET_64BIT)
16826 {
16827 #if TARGET_MACHO
16828 /* dynamic-no-pic */
16829 if (MACHOPIC_INDIRECT)
16830 {
16831 rtx temp = ((reload_in_progress
16832 || ((op0 && REG_P (op0))
16833 && mode == Pmode))
16834 ? op0 : gen_reg_rtx (Pmode));
16835 op1 = machopic_indirect_data_reference (op1, temp);
16836 if (MACHOPIC_PURE)
16837 op1 = machopic_legitimize_pic_address (op1, mode,
16838 temp == op1 ? 0 : temp);
16839 }
16840 if (op0 != op1 && GET_CODE (op0) != MEM)
16841 {
16842 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16843 emit_insn (insn);
16844 return;
16845 }
16846 if (GET_CODE (op0) == MEM)
16847 op1 = force_reg (Pmode, op1);
16848 else
16849 {
16850 rtx temp = op0;
16851 if (GET_CODE (temp) != REG)
16852 temp = gen_reg_rtx (Pmode);
16853 temp = legitimize_pic_address (op1, temp);
16854 if (temp == op0)
16855 return;
16856 op1 = temp;
16857 }
16858 /* dynamic-no-pic */
16859 #endif
16860 }
16861 else
16862 {
16863 if (MEM_P (op0))
16864 op1 = force_reg (mode, op1);
16865 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16866 {
16867 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16868 op1 = legitimize_pic_address (op1, reg);
16869 if (op0 == op1)
16870 return;
16871 op1 = convert_to_mode (mode, op1, 1);
16872 }
16873 }
16874 }
16875 else
16876 {
16877 if (MEM_P (op0)
16878 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16879 || !push_operand (op0, mode))
16880 && MEM_P (op1))
16881 op1 = force_reg (mode, op1);
16882
16883 if (push_operand (op0, mode)
16884 && ! general_no_elim_operand (op1, mode))
16885 op1 = copy_to_mode_reg (mode, op1);
16886
16887 /* Force large constants in 64bit compilation into register
16888 to get them CSEed. */
16889 if (can_create_pseudo_p ()
16890 && (mode == DImode) && TARGET_64BIT
16891 && immediate_operand (op1, mode)
16892 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16893 && !register_operand (op0, mode)
16894 && optimize)
16895 op1 = copy_to_mode_reg (mode, op1);
16896
16897 if (can_create_pseudo_p ()
16898 && FLOAT_MODE_P (mode)
16899 && GET_CODE (op1) == CONST_DOUBLE)
16900 {
16901 /* If we are loading a floating point constant to a register,
16902 force the value to memory now, since we'll get better code
16903 out the back end. */
16904
16905 op1 = validize_mem (force_const_mem (mode, op1));
16906 if (!register_operand (op0, mode))
16907 {
16908 rtx temp = gen_reg_rtx (mode);
16909 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16910 emit_move_insn (op0, temp);
16911 return;
16912 }
16913 }
16914 }
16915
16916 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16917 }
16918
16919 void
16920 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16921 {
16922 rtx op0 = operands[0], op1 = operands[1];
16923 unsigned int align = GET_MODE_ALIGNMENT (mode);
16924
16925 if (push_operand (op0, VOIDmode))
16926 op0 = emit_move_resolve_push (mode, op0);
16927
16928 /* Force constants other than zero into memory. We do not know how
16929 the instructions used to build constants modify the upper 64 bits
16930 of the register, once we have that information we may be able
16931 to handle some of them more efficiently. */
16932 if (can_create_pseudo_p ()
16933 && register_operand (op0, mode)
16934 && (CONSTANT_P (op1)
16935 || (GET_CODE (op1) == SUBREG
16936 && CONSTANT_P (SUBREG_REG (op1))))
16937 && !standard_sse_constant_p (op1))
16938 op1 = validize_mem (force_const_mem (mode, op1));
16939
16940 /* We need to check memory alignment for SSE mode since attribute
16941 can make operands unaligned. */
16942 if (can_create_pseudo_p ()
16943 && SSE_REG_MODE_P (mode)
16944 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16945 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16946 {
16947 rtx tmp[2];
16948
16949 /* ix86_expand_vector_move_misalign() does not like constants ... */
16950 if (CONSTANT_P (op1)
16951 || (GET_CODE (op1) == SUBREG
16952 && CONSTANT_P (SUBREG_REG (op1))))
16953 op1 = validize_mem (force_const_mem (mode, op1));
16954
16955 /* ... nor both arguments in memory. */
16956 if (!register_operand (op0, mode)
16957 && !register_operand (op1, mode))
16958 op1 = force_reg (mode, op1);
16959
16960 tmp[0] = op0; tmp[1] = op1;
16961 ix86_expand_vector_move_misalign (mode, tmp);
16962 return;
16963 }
16964
16965 /* Make operand1 a register if it isn't already. */
16966 if (can_create_pseudo_p ()
16967 && !register_operand (op0, mode)
16968 && !register_operand (op1, mode))
16969 {
16970 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16971 return;
16972 }
16973
16974 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16975 }
16976
16977 /* Split 32-byte AVX unaligned load and store if needed. */
16978
16979 static void
16980 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16981 {
16982 rtx m;
16983 rtx (*extract) (rtx, rtx, rtx);
16984 rtx (*load_unaligned) (rtx, rtx);
16985 rtx (*store_unaligned) (rtx, rtx);
16986 enum machine_mode mode;
16987
16988 switch (GET_MODE (op0))
16989 {
16990 default:
16991 gcc_unreachable ();
16992 case V32QImode:
16993 extract = gen_avx_vextractf128v32qi;
16994 load_unaligned = gen_avx_loaddquv32qi;
16995 store_unaligned = gen_avx_storedquv32qi;
16996 mode = V16QImode;
16997 break;
16998 case V8SFmode:
16999 extract = gen_avx_vextractf128v8sf;
17000 load_unaligned = gen_avx_loadups256;
17001 store_unaligned = gen_avx_storeups256;
17002 mode = V4SFmode;
17003 break;
17004 case V4DFmode:
17005 extract = gen_avx_vextractf128v4df;
17006 load_unaligned = gen_avx_loadupd256;
17007 store_unaligned = gen_avx_storeupd256;
17008 mode = V2DFmode;
17009 break;
17010 }
17011
17012 if (MEM_P (op1))
17013 {
17014 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
17015 {
17016 rtx r = gen_reg_rtx (mode);
17017 m = adjust_address (op1, mode, 0);
17018 emit_move_insn (r, m);
17019 m = adjust_address (op1, mode, 16);
17020 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17021 emit_move_insn (op0, r);
17022 }
17023 /* Normal *mov<mode>_internal pattern will handle
17024 unaligned loads just fine if misaligned_operand
17025 is true, and without the UNSPEC it can be combined
17026 with arithmetic instructions. */
17027 else if (misaligned_operand (op1, GET_MODE (op1)))
17028 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17029 else
17030 emit_insn (load_unaligned (op0, op1));
17031 }
17032 else if (MEM_P (op0))
17033 {
17034 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
17035 {
17036 m = adjust_address (op0, mode, 0);
17037 emit_insn (extract (m, op1, const0_rtx));
17038 m = adjust_address (op0, mode, 16);
17039 emit_insn (extract (m, op1, const1_rtx));
17040 }
17041 else
17042 emit_insn (store_unaligned (op0, op1));
17043 }
17044 else
17045 gcc_unreachable ();
17046 }
17047
17048 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
17049 straight to ix86_expand_vector_move. */
17050 /* Code generation for scalar reg-reg moves of single and double precision data:
17051 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17052 movaps reg, reg
17053 else
17054 movss reg, reg
17055 if (x86_sse_partial_reg_dependency == true)
17056 movapd reg, reg
17057 else
17058 movsd reg, reg
17059
17060 Code generation for scalar loads of double precision data:
17061 if (x86_sse_split_regs == true)
17062 movlpd mem, reg (gas syntax)
17063 else
17064 movsd mem, reg
17065
17066 Code generation for unaligned packed loads of single precision data
17067 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17068 if (x86_sse_unaligned_move_optimal)
17069 movups mem, reg
17070
17071 if (x86_sse_partial_reg_dependency == true)
17072 {
17073 xorps reg, reg
17074 movlps mem, reg
17075 movhps mem+8, reg
17076 }
17077 else
17078 {
17079 movlps mem, reg
17080 movhps mem+8, reg
17081 }
17082
17083 Code generation for unaligned packed loads of double precision data
17084 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17085 if (x86_sse_unaligned_move_optimal)
17086 movupd mem, reg
17087
17088 if (x86_sse_split_regs == true)
17089 {
17090 movlpd mem, reg
17091 movhpd mem+8, reg
17092 }
17093 else
17094 {
17095 movsd mem, reg
17096 movhpd mem+8, reg
17097 }
17098 */
17099
17100 void
17101 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17102 {
17103 rtx op0, op1, orig_op0 = NULL_RTX, m;
17104 rtx (*load_unaligned) (rtx, rtx);
17105 rtx (*store_unaligned) (rtx, rtx);
17106
17107 op0 = operands[0];
17108 op1 = operands[1];
17109
17110 if (GET_MODE_SIZE (mode) == 64)
17111 {
17112 switch (GET_MODE_CLASS (mode))
17113 {
17114 case MODE_VECTOR_INT:
17115 case MODE_INT:
17116 if (GET_MODE (op0) != V16SImode)
17117 {
17118 if (!MEM_P (op0))
17119 {
17120 orig_op0 = op0;
17121 op0 = gen_reg_rtx (V16SImode);
17122 }
17123 else
17124 op0 = gen_lowpart (V16SImode, op0);
17125 }
17126 op1 = gen_lowpart (V16SImode, op1);
17127 /* FALLTHRU */
17128
17129 case MODE_VECTOR_FLOAT:
17130 switch (GET_MODE (op0))
17131 {
17132 default:
17133 gcc_unreachable ();
17134 case V16SImode:
17135 load_unaligned = gen_avx512f_loaddquv16si;
17136 store_unaligned = gen_avx512f_storedquv16si;
17137 break;
17138 case V16SFmode:
17139 load_unaligned = gen_avx512f_loadups512;
17140 store_unaligned = gen_avx512f_storeups512;
17141 break;
17142 case V8DFmode:
17143 load_unaligned = gen_avx512f_loadupd512;
17144 store_unaligned = gen_avx512f_storeupd512;
17145 break;
17146 }
17147
17148 if (MEM_P (op1))
17149 emit_insn (load_unaligned (op0, op1));
17150 else if (MEM_P (op0))
17151 emit_insn (store_unaligned (op0, op1));
17152 else
17153 gcc_unreachable ();
17154 if (orig_op0)
17155 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17156 break;
17157
17158 default:
17159 gcc_unreachable ();
17160 }
17161
17162 return;
17163 }
17164
17165 if (TARGET_AVX
17166 && GET_MODE_SIZE (mode) == 32)
17167 {
17168 switch (GET_MODE_CLASS (mode))
17169 {
17170 case MODE_VECTOR_INT:
17171 case MODE_INT:
17172 if (GET_MODE (op0) != V32QImode)
17173 {
17174 if (!MEM_P (op0))
17175 {
17176 orig_op0 = op0;
17177 op0 = gen_reg_rtx (V32QImode);
17178 }
17179 else
17180 op0 = gen_lowpart (V32QImode, op0);
17181 }
17182 op1 = gen_lowpart (V32QImode, op1);
17183 /* FALLTHRU */
17184
17185 case MODE_VECTOR_FLOAT:
17186 ix86_avx256_split_vector_move_misalign (op0, op1);
17187 if (orig_op0)
17188 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17189 break;
17190
17191 default:
17192 gcc_unreachable ();
17193 }
17194
17195 return;
17196 }
17197
17198 if (MEM_P (op1))
17199 {
17200 /* Normal *mov<mode>_internal pattern will handle
17201 unaligned loads just fine if misaligned_operand
17202 is true, and without the UNSPEC it can be combined
17203 with arithmetic instructions. */
17204 if (TARGET_AVX
17205 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17206 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17207 && misaligned_operand (op1, GET_MODE (op1)))
17208 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17209 /* ??? If we have typed data, then it would appear that using
17210 movdqu is the only way to get unaligned data loaded with
17211 integer type. */
17212 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17213 {
17214 if (GET_MODE (op0) != V16QImode)
17215 {
17216 orig_op0 = op0;
17217 op0 = gen_reg_rtx (V16QImode);
17218 }
17219 op1 = gen_lowpart (V16QImode, op1);
17220 /* We will eventually emit movups based on insn attributes. */
17221 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17222 if (orig_op0)
17223 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17224 }
17225 else if (TARGET_SSE2 && mode == V2DFmode)
17226 {
17227 rtx zero;
17228
17229 if (TARGET_AVX
17230 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17231 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17232 || optimize_insn_for_size_p ())
17233 {
17234 /* We will eventually emit movups based on insn attributes. */
17235 emit_insn (gen_sse2_loadupd (op0, op1));
17236 return;
17237 }
17238
17239 /* When SSE registers are split into halves, we can avoid
17240 writing to the top half twice. */
17241 if (TARGET_SSE_SPLIT_REGS)
17242 {
17243 emit_clobber (op0);
17244 zero = op0;
17245 }
17246 else
17247 {
17248 /* ??? Not sure about the best option for the Intel chips.
17249 The following would seem to satisfy; the register is
17250 entirely cleared, breaking the dependency chain. We
17251 then store to the upper half, with a dependency depth
17252 of one. A rumor has it that Intel recommends two movsd
17253 followed by an unpacklpd, but this is unconfirmed. And
17254 given that the dependency depth of the unpacklpd would
17255 still be one, I'm not sure why this would be better. */
17256 zero = CONST0_RTX (V2DFmode);
17257 }
17258
17259 m = adjust_address (op1, DFmode, 0);
17260 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17261 m = adjust_address (op1, DFmode, 8);
17262 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17263 }
17264 else
17265 {
17266 rtx t;
17267
17268 if (TARGET_AVX
17269 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17270 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17271 || optimize_insn_for_size_p ())
17272 {
17273 if (GET_MODE (op0) != V4SFmode)
17274 {
17275 orig_op0 = op0;
17276 op0 = gen_reg_rtx (V4SFmode);
17277 }
17278 op1 = gen_lowpart (V4SFmode, op1);
17279 emit_insn (gen_sse_loadups (op0, op1));
17280 if (orig_op0)
17281 emit_move_insn (orig_op0,
17282 gen_lowpart (GET_MODE (orig_op0), op0));
17283 return;
17284 }
17285
17286 if (mode != V4SFmode)
17287 t = gen_reg_rtx (V4SFmode);
17288 else
17289 t = op0;
17290
17291 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17292 emit_move_insn (t, CONST0_RTX (V4SFmode));
17293 else
17294 emit_clobber (t);
17295
17296 m = adjust_address (op1, V2SFmode, 0);
17297 emit_insn (gen_sse_loadlps (t, t, m));
17298 m = adjust_address (op1, V2SFmode, 8);
17299 emit_insn (gen_sse_loadhps (t, t, m));
17300 if (mode != V4SFmode)
17301 emit_move_insn (op0, gen_lowpart (mode, t));
17302 }
17303 }
17304 else if (MEM_P (op0))
17305 {
17306 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17307 {
17308 op0 = gen_lowpart (V16QImode, op0);
17309 op1 = gen_lowpart (V16QImode, op1);
17310 /* We will eventually emit movups based on insn attributes. */
17311 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17312 }
17313 else if (TARGET_SSE2 && mode == V2DFmode)
17314 {
17315 if (TARGET_AVX
17316 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17317 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17318 || optimize_insn_for_size_p ())
17319 /* We will eventually emit movups based on insn attributes. */
17320 emit_insn (gen_sse2_storeupd (op0, op1));
17321 else
17322 {
17323 m = adjust_address (op0, DFmode, 0);
17324 emit_insn (gen_sse2_storelpd (m, op1));
17325 m = adjust_address (op0, DFmode, 8);
17326 emit_insn (gen_sse2_storehpd (m, op1));
17327 }
17328 }
17329 else
17330 {
17331 if (mode != V4SFmode)
17332 op1 = gen_lowpart (V4SFmode, op1);
17333
17334 if (TARGET_AVX
17335 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17336 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17337 || optimize_insn_for_size_p ())
17338 {
17339 op0 = gen_lowpart (V4SFmode, op0);
17340 emit_insn (gen_sse_storeups (op0, op1));
17341 }
17342 else
17343 {
17344 m = adjust_address (op0, V2SFmode, 0);
17345 emit_insn (gen_sse_storelps (m, op1));
17346 m = adjust_address (op0, V2SFmode, 8);
17347 emit_insn (gen_sse_storehps (m, op1));
17348 }
17349 }
17350 }
17351 else
17352 gcc_unreachable ();
17353 }
17354
17355 /* Helper function of ix86_fixup_binary_operands to canonicalize
17356 operand order. Returns true if the operands should be swapped. */
17357
17358 static bool
17359 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17360 rtx operands[])
17361 {
17362 rtx dst = operands[0];
17363 rtx src1 = operands[1];
17364 rtx src2 = operands[2];
17365
17366 /* If the operation is not commutative, we can't do anything. */
17367 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17368 return false;
17369
17370 /* Highest priority is that src1 should match dst. */
17371 if (rtx_equal_p (dst, src1))
17372 return false;
17373 if (rtx_equal_p (dst, src2))
17374 return true;
17375
17376 /* Next highest priority is that immediate constants come second. */
17377 if (immediate_operand (src2, mode))
17378 return false;
17379 if (immediate_operand (src1, mode))
17380 return true;
17381
17382 /* Lowest priority is that memory references should come second. */
17383 if (MEM_P (src2))
17384 return false;
17385 if (MEM_P (src1))
17386 return true;
17387
17388 return false;
17389 }
17390
17391
17392 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17393 destination to use for the operation. If different from the true
17394 destination in operands[0], a copy operation will be required. */
17395
17396 rtx
17397 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17398 rtx operands[])
17399 {
17400 rtx dst = operands[0];
17401 rtx src1 = operands[1];
17402 rtx src2 = operands[2];
17403
17404 /* Canonicalize operand order. */
17405 if (ix86_swap_binary_operands_p (code, mode, operands))
17406 {
17407 rtx temp;
17408
17409 /* It is invalid to swap operands of different modes. */
17410 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17411
17412 temp = src1;
17413 src1 = src2;
17414 src2 = temp;
17415 }
17416
17417 /* Both source operands cannot be in memory. */
17418 if (MEM_P (src1) && MEM_P (src2))
17419 {
17420 /* Optimization: Only read from memory once. */
17421 if (rtx_equal_p (src1, src2))
17422 {
17423 src2 = force_reg (mode, src2);
17424 src1 = src2;
17425 }
17426 else if (rtx_equal_p (dst, src1))
17427 src2 = force_reg (mode, src2);
17428 else
17429 src1 = force_reg (mode, src1);
17430 }
17431
17432 /* If the destination is memory, and we do not have matching source
17433 operands, do things in registers. */
17434 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17435 dst = gen_reg_rtx (mode);
17436
17437 /* Source 1 cannot be a constant. */
17438 if (CONSTANT_P (src1))
17439 src1 = force_reg (mode, src1);
17440
17441 /* Source 1 cannot be a non-matching memory. */
17442 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17443 src1 = force_reg (mode, src1);
17444
17445 /* Improve address combine. */
17446 if (code == PLUS
17447 && GET_MODE_CLASS (mode) == MODE_INT
17448 && MEM_P (src2))
17449 src2 = force_reg (mode, src2);
17450
17451 operands[1] = src1;
17452 operands[2] = src2;
17453 return dst;
17454 }
17455
17456 /* Similarly, but assume that the destination has already been
17457 set up properly. */
17458
17459 void
17460 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17461 enum machine_mode mode, rtx operands[])
17462 {
17463 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17464 gcc_assert (dst == operands[0]);
17465 }
17466
17467 /* Attempt to expand a binary operator. Make the expansion closer to the
17468 actual machine, then just general_operand, which will allow 3 separate
17469 memory references (one output, two input) in a single insn. */
17470
17471 void
17472 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17473 rtx operands[])
17474 {
17475 rtx src1, src2, dst, op, clob;
17476
17477 dst = ix86_fixup_binary_operands (code, mode, operands);
17478 src1 = operands[1];
17479 src2 = operands[2];
17480
17481 /* Emit the instruction. */
17482
17483 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17484 if (reload_in_progress)
17485 {
17486 /* Reload doesn't know about the flags register, and doesn't know that
17487 it doesn't want to clobber it. We can only do this with PLUS. */
17488 gcc_assert (code == PLUS);
17489 emit_insn (op);
17490 }
17491 else if (reload_completed
17492 && code == PLUS
17493 && !rtx_equal_p (dst, src1))
17494 {
17495 /* This is going to be an LEA; avoid splitting it later. */
17496 emit_insn (op);
17497 }
17498 else
17499 {
17500 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17501 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17502 }
17503
17504 /* Fix up the destination if needed. */
17505 if (dst != operands[0])
17506 emit_move_insn (operands[0], dst);
17507 }
17508
17509 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17510 the given OPERANDS. */
17511
17512 void
17513 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17514 rtx operands[])
17515 {
17516 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17517 if (GET_CODE (operands[1]) == SUBREG)
17518 {
17519 op1 = operands[1];
17520 op2 = operands[2];
17521 }
17522 else if (GET_CODE (operands[2]) == SUBREG)
17523 {
17524 op1 = operands[2];
17525 op2 = operands[1];
17526 }
17527 /* Optimize (__m128i) d | (__m128i) e and similar code
17528 when d and e are float vectors into float vector logical
17529 insn. In C/C++ without using intrinsics there is no other way
17530 to express vector logical operation on float vectors than
17531 to cast them temporarily to integer vectors. */
17532 if (op1
17533 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17534 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17535 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17536 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17537 && SUBREG_BYTE (op1) == 0
17538 && (GET_CODE (op2) == CONST_VECTOR
17539 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17540 && SUBREG_BYTE (op2) == 0))
17541 && can_create_pseudo_p ())
17542 {
17543 rtx dst;
17544 switch (GET_MODE (SUBREG_REG (op1)))
17545 {
17546 case V4SFmode:
17547 case V8SFmode:
17548 case V16SFmode:
17549 case V2DFmode:
17550 case V4DFmode:
17551 case V8DFmode:
17552 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17553 if (GET_CODE (op2) == CONST_VECTOR)
17554 {
17555 op2 = gen_lowpart (GET_MODE (dst), op2);
17556 op2 = force_reg (GET_MODE (dst), op2);
17557 }
17558 else
17559 {
17560 op1 = operands[1];
17561 op2 = SUBREG_REG (operands[2]);
17562 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17563 op2 = force_reg (GET_MODE (dst), op2);
17564 }
17565 op1 = SUBREG_REG (op1);
17566 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17567 op1 = force_reg (GET_MODE (dst), op1);
17568 emit_insn (gen_rtx_SET (VOIDmode, dst,
17569 gen_rtx_fmt_ee (code, GET_MODE (dst),
17570 op1, op2)));
17571 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17572 return;
17573 default:
17574 break;
17575 }
17576 }
17577 if (!nonimmediate_operand (operands[1], mode))
17578 operands[1] = force_reg (mode, operands[1]);
17579 if (!nonimmediate_operand (operands[2], mode))
17580 operands[2] = force_reg (mode, operands[2]);
17581 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17582 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17583 gen_rtx_fmt_ee (code, mode, operands[1],
17584 operands[2])));
17585 }
17586
17587 /* Return TRUE or FALSE depending on whether the binary operator meets the
17588 appropriate constraints. */
17589
17590 bool
17591 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17592 rtx operands[3])
17593 {
17594 rtx dst = operands[0];
17595 rtx src1 = operands[1];
17596 rtx src2 = operands[2];
17597
17598 /* Both source operands cannot be in memory. */
17599 if (MEM_P (src1) && MEM_P (src2))
17600 return false;
17601
17602 /* Canonicalize operand order for commutative operators. */
17603 if (ix86_swap_binary_operands_p (code, mode, operands))
17604 {
17605 rtx temp = src1;
17606 src1 = src2;
17607 src2 = temp;
17608 }
17609
17610 /* If the destination is memory, we must have a matching source operand. */
17611 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17612 return false;
17613
17614 /* Source 1 cannot be a constant. */
17615 if (CONSTANT_P (src1))
17616 return false;
17617
17618 /* Source 1 cannot be a non-matching memory. */
17619 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17620 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17621 return (code == AND
17622 && (mode == HImode
17623 || mode == SImode
17624 || (TARGET_64BIT && mode == DImode))
17625 && satisfies_constraint_L (src2));
17626
17627 return true;
17628 }
17629
17630 /* Attempt to expand a unary operator. Make the expansion closer to the
17631 actual machine, then just general_operand, which will allow 2 separate
17632 memory references (one output, one input) in a single insn. */
17633
17634 void
17635 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17636 rtx operands[])
17637 {
17638 int matching_memory;
17639 rtx src, dst, op, clob;
17640
17641 dst = operands[0];
17642 src = operands[1];
17643
17644 /* If the destination is memory, and we do not have matching source
17645 operands, do things in registers. */
17646 matching_memory = 0;
17647 if (MEM_P (dst))
17648 {
17649 if (rtx_equal_p (dst, src))
17650 matching_memory = 1;
17651 else
17652 dst = gen_reg_rtx (mode);
17653 }
17654
17655 /* When source operand is memory, destination must match. */
17656 if (MEM_P (src) && !matching_memory)
17657 src = force_reg (mode, src);
17658
17659 /* Emit the instruction. */
17660
17661 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17662 if (reload_in_progress || code == NOT)
17663 {
17664 /* Reload doesn't know about the flags register, and doesn't know that
17665 it doesn't want to clobber it. */
17666 gcc_assert (code == NOT);
17667 emit_insn (op);
17668 }
17669 else
17670 {
17671 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17672 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17673 }
17674
17675 /* Fix up the destination if needed. */
17676 if (dst != operands[0])
17677 emit_move_insn (operands[0], dst);
17678 }
17679
17680 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17681 divisor are within the range [0-255]. */
17682
17683 void
17684 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17685 bool signed_p)
17686 {
17687 rtx_code_label *end_label, *qimode_label;
17688 rtx insn, div, mod;
17689 rtx scratch, tmp0, tmp1, tmp2;
17690 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17691 rtx (*gen_zero_extend) (rtx, rtx);
17692 rtx (*gen_test_ccno_1) (rtx, rtx);
17693
17694 switch (mode)
17695 {
17696 case SImode:
17697 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17698 gen_test_ccno_1 = gen_testsi_ccno_1;
17699 gen_zero_extend = gen_zero_extendqisi2;
17700 break;
17701 case DImode:
17702 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17703 gen_test_ccno_1 = gen_testdi_ccno_1;
17704 gen_zero_extend = gen_zero_extendqidi2;
17705 break;
17706 default:
17707 gcc_unreachable ();
17708 }
17709
17710 end_label = gen_label_rtx ();
17711 qimode_label = gen_label_rtx ();
17712
17713 scratch = gen_reg_rtx (mode);
17714
17715 /* Use 8bit unsigned divimod if dividend and divisor are within
17716 the range [0-255]. */
17717 emit_move_insn (scratch, operands[2]);
17718 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17719 scratch, 1, OPTAB_DIRECT);
17720 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17721 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17722 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17723 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17724 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17725 pc_rtx);
17726 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17727 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17728 JUMP_LABEL (insn) = qimode_label;
17729
17730 /* Generate original signed/unsigned divimod. */
17731 div = gen_divmod4_1 (operands[0], operands[1],
17732 operands[2], operands[3]);
17733 emit_insn (div);
17734
17735 /* Branch to the end. */
17736 emit_jump_insn (gen_jump (end_label));
17737 emit_barrier ();
17738
17739 /* Generate 8bit unsigned divide. */
17740 emit_label (qimode_label);
17741 /* Don't use operands[0] for result of 8bit divide since not all
17742 registers support QImode ZERO_EXTRACT. */
17743 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17744 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17745 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17746 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17747
17748 if (signed_p)
17749 {
17750 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17751 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17752 }
17753 else
17754 {
17755 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17756 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17757 }
17758
17759 /* Extract remainder from AH. */
17760 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17761 if (REG_P (operands[1]))
17762 insn = emit_move_insn (operands[1], tmp1);
17763 else
17764 {
17765 /* Need a new scratch register since the old one has result
17766 of 8bit divide. */
17767 scratch = gen_reg_rtx (mode);
17768 emit_move_insn (scratch, tmp1);
17769 insn = emit_move_insn (operands[1], scratch);
17770 }
17771 set_unique_reg_note (insn, REG_EQUAL, mod);
17772
17773 /* Zero extend quotient from AL. */
17774 tmp1 = gen_lowpart (QImode, tmp0);
17775 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17776 set_unique_reg_note (insn, REG_EQUAL, div);
17777
17778 emit_label (end_label);
17779 }
17780
17781 /* Whether it is OK to emit CFI directives when emitting asm code. */
17782
17783 bool
17784 ix86_emit_cfi ()
17785 {
17786 return dwarf2out_do_cfi_asm ();
17787 }
17788
17789 #define LEA_MAX_STALL (3)
17790 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17791
17792 /* Increase given DISTANCE in half-cycles according to
17793 dependencies between PREV and NEXT instructions.
17794 Add 1 half-cycle if there is no dependency and
17795 go to next cycle if there is some dependecy. */
17796
17797 static unsigned int
17798 increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
17799 {
17800 df_ref def, use;
17801
17802 if (!prev || !next)
17803 return distance + (distance & 1) + 2;
17804
17805 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17806 return distance + 1;
17807
17808 FOR_EACH_INSN_USE (use, next)
17809 FOR_EACH_INSN_DEF (def, prev)
17810 if (!DF_REF_IS_ARTIFICIAL (def)
17811 && DF_REF_REGNO (use) == DF_REF_REGNO (def))
17812 return distance + (distance & 1) + 2;
17813
17814 return distance + 1;
17815 }
17816
17817 /* Function checks if instruction INSN defines register number
17818 REGNO1 or REGNO2. */
17819
17820 static bool
17821 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17822 rtx insn)
17823 {
17824 df_ref def;
17825
17826 FOR_EACH_INSN_DEF (def, insn)
17827 if (DF_REF_REG_DEF_P (def)
17828 && !DF_REF_IS_ARTIFICIAL (def)
17829 && (regno1 == DF_REF_REGNO (def)
17830 || regno2 == DF_REF_REGNO (def)))
17831 return true;
17832
17833 return false;
17834 }
17835
17836 /* Function checks if instruction INSN uses register number
17837 REGNO as a part of address expression. */
17838
17839 static bool
17840 insn_uses_reg_mem (unsigned int regno, rtx insn)
17841 {
17842 df_ref use;
17843
17844 FOR_EACH_INSN_USE (use, insn)
17845 if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
17846 return true;
17847
17848 return false;
17849 }
17850
17851 /* Search backward for non-agu definition of register number REGNO1
17852 or register number REGNO2 in basic block starting from instruction
17853 START up to head of basic block or instruction INSN.
17854
17855 Function puts true value into *FOUND var if definition was found
17856 and false otherwise.
17857
17858 Distance in half-cycles between START and found instruction or head
17859 of BB is added to DISTANCE and returned. */
17860
17861 static int
17862 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17863 rtx_insn *insn, int distance,
17864 rtx_insn *start, bool *found)
17865 {
17866 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17867 rtx_insn *prev = start;
17868 rtx_insn *next = NULL;
17869
17870 *found = false;
17871
17872 while (prev
17873 && prev != insn
17874 && distance < LEA_SEARCH_THRESHOLD)
17875 {
17876 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17877 {
17878 distance = increase_distance (prev, next, distance);
17879 if (insn_defines_reg (regno1, regno2, prev))
17880 {
17881 if (recog_memoized (prev) < 0
17882 || get_attr_type (prev) != TYPE_LEA)
17883 {
17884 *found = true;
17885 return distance;
17886 }
17887 }
17888
17889 next = prev;
17890 }
17891 if (prev == BB_HEAD (bb))
17892 break;
17893
17894 prev = PREV_INSN (prev);
17895 }
17896
17897 return distance;
17898 }
17899
17900 /* Search backward for non-agu definition of register number REGNO1
17901 or register number REGNO2 in INSN's basic block until
17902 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17903 2. Reach neighbour BBs boundary, or
17904 3. Reach agu definition.
17905 Returns the distance between the non-agu definition point and INSN.
17906 If no definition point, returns -1. */
17907
17908 static int
17909 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17910 rtx_insn *insn)
17911 {
17912 basic_block bb = BLOCK_FOR_INSN (insn);
17913 int distance = 0;
17914 bool found = false;
17915
17916 if (insn != BB_HEAD (bb))
17917 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17918 distance, PREV_INSN (insn),
17919 &found);
17920
17921 if (!found && distance < LEA_SEARCH_THRESHOLD)
17922 {
17923 edge e;
17924 edge_iterator ei;
17925 bool simple_loop = false;
17926
17927 FOR_EACH_EDGE (e, ei, bb->preds)
17928 if (e->src == bb)
17929 {
17930 simple_loop = true;
17931 break;
17932 }
17933
17934 if (simple_loop)
17935 distance = distance_non_agu_define_in_bb (regno1, regno2,
17936 insn, distance,
17937 BB_END (bb), &found);
17938 else
17939 {
17940 int shortest_dist = -1;
17941 bool found_in_bb = false;
17942
17943 FOR_EACH_EDGE (e, ei, bb->preds)
17944 {
17945 int bb_dist
17946 = distance_non_agu_define_in_bb (regno1, regno2,
17947 insn, distance,
17948 BB_END (e->src),
17949 &found_in_bb);
17950 if (found_in_bb)
17951 {
17952 if (shortest_dist < 0)
17953 shortest_dist = bb_dist;
17954 else if (bb_dist > 0)
17955 shortest_dist = MIN (bb_dist, shortest_dist);
17956
17957 found = true;
17958 }
17959 }
17960
17961 distance = shortest_dist;
17962 }
17963 }
17964
17965 /* get_attr_type may modify recog data. We want to make sure
17966 that recog data is valid for instruction INSN, on which
17967 distance_non_agu_define is called. INSN is unchanged here. */
17968 extract_insn_cached (insn);
17969
17970 if (!found)
17971 return -1;
17972
17973 return distance >> 1;
17974 }
17975
17976 /* Return the distance in half-cycles between INSN and the next
17977 insn that uses register number REGNO in memory address added
17978 to DISTANCE. Return -1 if REGNO0 is set.
17979
17980 Put true value into *FOUND if register usage was found and
17981 false otherwise.
17982 Put true value into *REDEFINED if register redefinition was
17983 found and false otherwise. */
17984
17985 static int
17986 distance_agu_use_in_bb (unsigned int regno,
17987 rtx_insn *insn, int distance, rtx_insn *start,
17988 bool *found, bool *redefined)
17989 {
17990 basic_block bb = NULL;
17991 rtx_insn *next = start;
17992 rtx_insn *prev = NULL;
17993
17994 *found = false;
17995 *redefined = false;
17996
17997 if (start != NULL_RTX)
17998 {
17999 bb = BLOCK_FOR_INSN (start);
18000 if (start != BB_HEAD (bb))
18001 /* If insn and start belong to the same bb, set prev to insn,
18002 so the call to increase_distance will increase the distance
18003 between insns by 1. */
18004 prev = insn;
18005 }
18006
18007 while (next
18008 && next != insn
18009 && distance < LEA_SEARCH_THRESHOLD)
18010 {
18011 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18012 {
18013 distance = increase_distance(prev, next, distance);
18014 if (insn_uses_reg_mem (regno, next))
18015 {
18016 /* Return DISTANCE if OP0 is used in memory
18017 address in NEXT. */
18018 *found = true;
18019 return distance;
18020 }
18021
18022 if (insn_defines_reg (regno, INVALID_REGNUM, next))
18023 {
18024 /* Return -1 if OP0 is set in NEXT. */
18025 *redefined = true;
18026 return -1;
18027 }
18028
18029 prev = next;
18030 }
18031
18032 if (next == BB_END (bb))
18033 break;
18034
18035 next = NEXT_INSN (next);
18036 }
18037
18038 return distance;
18039 }
18040
18041 /* Return the distance between INSN and the next insn that uses
18042 register number REGNO0 in memory address. Return -1 if no such
18043 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
18044
18045 static int
18046 distance_agu_use (unsigned int regno0, rtx_insn *insn)
18047 {
18048 basic_block bb = BLOCK_FOR_INSN (insn);
18049 int distance = 0;
18050 bool found = false;
18051 bool redefined = false;
18052
18053 if (insn != BB_END (bb))
18054 distance = distance_agu_use_in_bb (regno0, insn, distance,
18055 NEXT_INSN (insn),
18056 &found, &redefined);
18057
18058 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18059 {
18060 edge e;
18061 edge_iterator ei;
18062 bool simple_loop = false;
18063
18064 FOR_EACH_EDGE (e, ei, bb->succs)
18065 if (e->dest == bb)
18066 {
18067 simple_loop = true;
18068 break;
18069 }
18070
18071 if (simple_loop)
18072 distance = distance_agu_use_in_bb (regno0, insn,
18073 distance, BB_HEAD (bb),
18074 &found, &redefined);
18075 else
18076 {
18077 int shortest_dist = -1;
18078 bool found_in_bb = false;
18079 bool redefined_in_bb = false;
18080
18081 FOR_EACH_EDGE (e, ei, bb->succs)
18082 {
18083 int bb_dist
18084 = distance_agu_use_in_bb (regno0, insn,
18085 distance, BB_HEAD (e->dest),
18086 &found_in_bb, &redefined_in_bb);
18087 if (found_in_bb)
18088 {
18089 if (shortest_dist < 0)
18090 shortest_dist = bb_dist;
18091 else if (bb_dist > 0)
18092 shortest_dist = MIN (bb_dist, shortest_dist);
18093
18094 found = true;
18095 }
18096 }
18097
18098 distance = shortest_dist;
18099 }
18100 }
18101
18102 if (!found || redefined)
18103 return -1;
18104
18105 return distance >> 1;
18106 }
18107
18108 /* Define this macro to tune LEA priority vs ADD, it take effect when
18109 there is a dilemma of choicing LEA or ADD
18110 Negative value: ADD is more preferred than LEA
18111 Zero: Netrual
18112 Positive value: LEA is more preferred than ADD*/
18113 #define IX86_LEA_PRIORITY 0
18114
18115 /* Return true if usage of lea INSN has performance advantage
18116 over a sequence of instructions. Instructions sequence has
18117 SPLIT_COST cycles higher latency than lea latency. */
18118
18119 static bool
18120 ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18121 unsigned int regno2, int split_cost, bool has_scale)
18122 {
18123 int dist_define, dist_use;
18124
18125 /* For Silvermont if using a 2-source or 3-source LEA for
18126 non-destructive destination purposes, or due to wanting
18127 ability to use SCALE, the use of LEA is justified. */
18128 if (TARGET_SILVERMONT || TARGET_INTEL)
18129 {
18130 if (has_scale)
18131 return true;
18132 if (split_cost < 1)
18133 return false;
18134 if (regno0 == regno1 || regno0 == regno2)
18135 return false;
18136 return true;
18137 }
18138
18139 dist_define = distance_non_agu_define (regno1, regno2, insn);
18140 dist_use = distance_agu_use (regno0, insn);
18141
18142 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18143 {
18144 /* If there is no non AGU operand definition, no AGU
18145 operand usage and split cost is 0 then both lea
18146 and non lea variants have same priority. Currently
18147 we prefer lea for 64 bit code and non lea on 32 bit
18148 code. */
18149 if (dist_use < 0 && split_cost == 0)
18150 return TARGET_64BIT || IX86_LEA_PRIORITY;
18151 else
18152 return true;
18153 }
18154
18155 /* With longer definitions distance lea is more preferable.
18156 Here we change it to take into account splitting cost and
18157 lea priority. */
18158 dist_define += split_cost + IX86_LEA_PRIORITY;
18159
18160 /* If there is no use in memory addess then we just check
18161 that split cost exceeds AGU stall. */
18162 if (dist_use < 0)
18163 return dist_define > LEA_MAX_STALL;
18164
18165 /* If this insn has both backward non-agu dependence and forward
18166 agu dependence, the one with short distance takes effect. */
18167 return dist_define >= dist_use;
18168 }
18169
18170 /* Return true if it is legal to clobber flags by INSN and
18171 false otherwise. */
18172
18173 static bool
18174 ix86_ok_to_clobber_flags (rtx_insn *insn)
18175 {
18176 basic_block bb = BLOCK_FOR_INSN (insn);
18177 df_ref use;
18178 bitmap live;
18179
18180 while (insn)
18181 {
18182 if (NONDEBUG_INSN_P (insn))
18183 {
18184 FOR_EACH_INSN_USE (use, insn)
18185 if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18186 return false;
18187
18188 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18189 return true;
18190 }
18191
18192 if (insn == BB_END (bb))
18193 break;
18194
18195 insn = NEXT_INSN (insn);
18196 }
18197
18198 live = df_get_live_out(bb);
18199 return !REGNO_REG_SET_P (live, FLAGS_REG);
18200 }
18201
18202 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18203 move and add to avoid AGU stalls. */
18204
18205 bool
18206 ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18207 {
18208 unsigned int regno0, regno1, regno2;
18209
18210 /* Check if we need to optimize. */
18211 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18212 return false;
18213
18214 /* Check it is correct to split here. */
18215 if (!ix86_ok_to_clobber_flags(insn))
18216 return false;
18217
18218 regno0 = true_regnum (operands[0]);
18219 regno1 = true_regnum (operands[1]);
18220 regno2 = true_regnum (operands[2]);
18221
18222 /* We need to split only adds with non destructive
18223 destination operand. */
18224 if (regno0 == regno1 || regno0 == regno2)
18225 return false;
18226 else
18227 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18228 }
18229
18230 /* Return true if we should emit lea instruction instead of mov
18231 instruction. */
18232
18233 bool
18234 ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18235 {
18236 unsigned int regno0, regno1;
18237
18238 /* Check if we need to optimize. */
18239 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18240 return false;
18241
18242 /* Use lea for reg to reg moves only. */
18243 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18244 return false;
18245
18246 regno0 = true_regnum (operands[0]);
18247 regno1 = true_regnum (operands[1]);
18248
18249 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18250 }
18251
18252 /* Return true if we need to split lea into a sequence of
18253 instructions to avoid AGU stalls. */
18254
18255 bool
18256 ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18257 {
18258 unsigned int regno0, regno1, regno2;
18259 int split_cost;
18260 struct ix86_address parts;
18261 int ok;
18262
18263 /* Check we need to optimize. */
18264 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18265 return false;
18266
18267 /* The "at least two components" test below might not catch simple
18268 move or zero extension insns if parts.base is non-NULL and parts.disp
18269 is const0_rtx as the only components in the address, e.g. if the
18270 register is %rbp or %r13. As this test is much cheaper and moves or
18271 zero extensions are the common case, do this check first. */
18272 if (REG_P (operands[1])
18273 || (SImode_address_operand (operands[1], VOIDmode)
18274 && REG_P (XEXP (operands[1], 0))))
18275 return false;
18276
18277 /* Check if it is OK to split here. */
18278 if (!ix86_ok_to_clobber_flags (insn))
18279 return false;
18280
18281 ok = ix86_decompose_address (operands[1], &parts);
18282 gcc_assert (ok);
18283
18284 /* There should be at least two components in the address. */
18285 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18286 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18287 return false;
18288
18289 /* We should not split into add if non legitimate pic
18290 operand is used as displacement. */
18291 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18292 return false;
18293
18294 regno0 = true_regnum (operands[0]) ;
18295 regno1 = INVALID_REGNUM;
18296 regno2 = INVALID_REGNUM;
18297
18298 if (parts.base)
18299 regno1 = true_regnum (parts.base);
18300 if (parts.index)
18301 regno2 = true_regnum (parts.index);
18302
18303 split_cost = 0;
18304
18305 /* Compute how many cycles we will add to execution time
18306 if split lea into a sequence of instructions. */
18307 if (parts.base || parts.index)
18308 {
18309 /* Have to use mov instruction if non desctructive
18310 destination form is used. */
18311 if (regno1 != regno0 && regno2 != regno0)
18312 split_cost += 1;
18313
18314 /* Have to add index to base if both exist. */
18315 if (parts.base && parts.index)
18316 split_cost += 1;
18317
18318 /* Have to use shift and adds if scale is 2 or greater. */
18319 if (parts.scale > 1)
18320 {
18321 if (regno0 != regno1)
18322 split_cost += 1;
18323 else if (regno2 == regno0)
18324 split_cost += 4;
18325 else
18326 split_cost += parts.scale;
18327 }
18328
18329 /* Have to use add instruction with immediate if
18330 disp is non zero. */
18331 if (parts.disp && parts.disp != const0_rtx)
18332 split_cost += 1;
18333
18334 /* Subtract the price of lea. */
18335 split_cost -= 1;
18336 }
18337
18338 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18339 parts.scale > 1);
18340 }
18341
18342 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18343 matches destination. RTX includes clobber of FLAGS_REG. */
18344
18345 static void
18346 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18347 rtx dst, rtx src)
18348 {
18349 rtx op, clob;
18350
18351 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18352 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18353
18354 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18355 }
18356
18357 /* Return true if regno1 def is nearest to the insn. */
18358
18359 static bool
18360 find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
18361 {
18362 rtx_insn *prev = insn;
18363 rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
18364
18365 if (insn == start)
18366 return false;
18367 while (prev && prev != start)
18368 {
18369 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18370 {
18371 prev = PREV_INSN (prev);
18372 continue;
18373 }
18374 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18375 return true;
18376 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18377 return false;
18378 prev = PREV_INSN (prev);
18379 }
18380
18381 /* None of the regs is defined in the bb. */
18382 return false;
18383 }
18384
18385 /* Split lea instructions into a sequence of instructions
18386 which are executed on ALU to avoid AGU stalls.
18387 It is assumed that it is allowed to clobber flags register
18388 at lea position. */
18389
18390 void
18391 ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], enum machine_mode mode)
18392 {
18393 unsigned int regno0, regno1, regno2;
18394 struct ix86_address parts;
18395 rtx target, tmp;
18396 int ok, adds;
18397
18398 ok = ix86_decompose_address (operands[1], &parts);
18399 gcc_assert (ok);
18400
18401 target = gen_lowpart (mode, operands[0]);
18402
18403 regno0 = true_regnum (target);
18404 regno1 = INVALID_REGNUM;
18405 regno2 = INVALID_REGNUM;
18406
18407 if (parts.base)
18408 {
18409 parts.base = gen_lowpart (mode, parts.base);
18410 regno1 = true_regnum (parts.base);
18411 }
18412
18413 if (parts.index)
18414 {
18415 parts.index = gen_lowpart (mode, parts.index);
18416 regno2 = true_regnum (parts.index);
18417 }
18418
18419 if (parts.disp)
18420 parts.disp = gen_lowpart (mode, parts.disp);
18421
18422 if (parts.scale > 1)
18423 {
18424 /* Case r1 = r1 + ... */
18425 if (regno1 == regno0)
18426 {
18427 /* If we have a case r1 = r1 + C * r2 then we
18428 should use multiplication which is very
18429 expensive. Assume cost model is wrong if we
18430 have such case here. */
18431 gcc_assert (regno2 != regno0);
18432
18433 for (adds = parts.scale; adds > 0; adds--)
18434 ix86_emit_binop (PLUS, mode, target, parts.index);
18435 }
18436 else
18437 {
18438 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18439 if (regno0 != regno2)
18440 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18441
18442 /* Use shift for scaling. */
18443 ix86_emit_binop (ASHIFT, mode, target,
18444 GEN_INT (exact_log2 (parts.scale)));
18445
18446 if (parts.base)
18447 ix86_emit_binop (PLUS, mode, target, parts.base);
18448
18449 if (parts.disp && parts.disp != const0_rtx)
18450 ix86_emit_binop (PLUS, mode, target, parts.disp);
18451 }
18452 }
18453 else if (!parts.base && !parts.index)
18454 {
18455 gcc_assert(parts.disp);
18456 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18457 }
18458 else
18459 {
18460 if (!parts.base)
18461 {
18462 if (regno0 != regno2)
18463 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18464 }
18465 else if (!parts.index)
18466 {
18467 if (regno0 != regno1)
18468 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18469 }
18470 else
18471 {
18472 if (regno0 == regno1)
18473 tmp = parts.index;
18474 else if (regno0 == regno2)
18475 tmp = parts.base;
18476 else
18477 {
18478 rtx tmp1;
18479
18480 /* Find better operand for SET instruction, depending
18481 on which definition is farther from the insn. */
18482 if (find_nearest_reg_def (insn, regno1, regno2))
18483 tmp = parts.index, tmp1 = parts.base;
18484 else
18485 tmp = parts.base, tmp1 = parts.index;
18486
18487 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18488
18489 if (parts.disp && parts.disp != const0_rtx)
18490 ix86_emit_binop (PLUS, mode, target, parts.disp);
18491
18492 ix86_emit_binop (PLUS, mode, target, tmp1);
18493 return;
18494 }
18495
18496 ix86_emit_binop (PLUS, mode, target, tmp);
18497 }
18498
18499 if (parts.disp && parts.disp != const0_rtx)
18500 ix86_emit_binop (PLUS, mode, target, parts.disp);
18501 }
18502 }
18503
18504 /* Return true if it is ok to optimize an ADD operation to LEA
18505 operation to avoid flag register consumation. For most processors,
18506 ADD is faster than LEA. For the processors like BONNELL, if the
18507 destination register of LEA holds an actual address which will be
18508 used soon, LEA is better and otherwise ADD is better. */
18509
18510 bool
18511 ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
18512 {
18513 unsigned int regno0 = true_regnum (operands[0]);
18514 unsigned int regno1 = true_regnum (operands[1]);
18515 unsigned int regno2 = true_regnum (operands[2]);
18516
18517 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18518 if (regno0 != regno1 && regno0 != regno2)
18519 return true;
18520
18521 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18522 return false;
18523
18524 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18525 }
18526
18527 /* Return true if destination reg of SET_BODY is shift count of
18528 USE_BODY. */
18529
18530 static bool
18531 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18532 {
18533 rtx set_dest;
18534 rtx shift_rtx;
18535 int i;
18536
18537 /* Retrieve destination of SET_BODY. */
18538 switch (GET_CODE (set_body))
18539 {
18540 case SET:
18541 set_dest = SET_DEST (set_body);
18542 if (!set_dest || !REG_P (set_dest))
18543 return false;
18544 break;
18545 case PARALLEL:
18546 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18547 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18548 use_body))
18549 return true;
18550 default:
18551 return false;
18552 break;
18553 }
18554
18555 /* Retrieve shift count of USE_BODY. */
18556 switch (GET_CODE (use_body))
18557 {
18558 case SET:
18559 shift_rtx = XEXP (use_body, 1);
18560 break;
18561 case PARALLEL:
18562 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18563 if (ix86_dep_by_shift_count_body (set_body,
18564 XVECEXP (use_body, 0, i)))
18565 return true;
18566 default:
18567 return false;
18568 break;
18569 }
18570
18571 if (shift_rtx
18572 && (GET_CODE (shift_rtx) == ASHIFT
18573 || GET_CODE (shift_rtx) == LSHIFTRT
18574 || GET_CODE (shift_rtx) == ASHIFTRT
18575 || GET_CODE (shift_rtx) == ROTATE
18576 || GET_CODE (shift_rtx) == ROTATERT))
18577 {
18578 rtx shift_count = XEXP (shift_rtx, 1);
18579
18580 /* Return true if shift count is dest of SET_BODY. */
18581 if (REG_P (shift_count))
18582 {
18583 /* Add check since it can be invoked before register
18584 allocation in pre-reload schedule. */
18585 if (reload_completed
18586 && true_regnum (set_dest) == true_regnum (shift_count))
18587 return true;
18588 else if (REGNO(set_dest) == REGNO(shift_count))
18589 return true;
18590 }
18591 }
18592
18593 return false;
18594 }
18595
18596 /* Return true if destination reg of SET_INSN is shift count of
18597 USE_INSN. */
18598
18599 bool
18600 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18601 {
18602 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18603 PATTERN (use_insn));
18604 }
18605
18606 /* Return TRUE or FALSE depending on whether the unary operator meets the
18607 appropriate constraints. */
18608
18609 bool
18610 ix86_unary_operator_ok (enum rtx_code,
18611 enum machine_mode,
18612 rtx operands[2])
18613 {
18614 /* If one of operands is memory, source and destination must match. */
18615 if ((MEM_P (operands[0])
18616 || MEM_P (operands[1]))
18617 && ! rtx_equal_p (operands[0], operands[1]))
18618 return false;
18619 return true;
18620 }
18621
18622 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18623 are ok, keeping in mind the possible movddup alternative. */
18624
18625 bool
18626 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18627 {
18628 if (MEM_P (operands[0]))
18629 return rtx_equal_p (operands[0], operands[1 + high]);
18630 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18631 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18632 return true;
18633 }
18634
18635 /* Post-reload splitter for converting an SF or DFmode value in an
18636 SSE register into an unsigned SImode. */
18637
18638 void
18639 ix86_split_convert_uns_si_sse (rtx operands[])
18640 {
18641 enum machine_mode vecmode;
18642 rtx value, large, zero_or_two31, input, two31, x;
18643
18644 large = operands[1];
18645 zero_or_two31 = operands[2];
18646 input = operands[3];
18647 two31 = operands[4];
18648 vecmode = GET_MODE (large);
18649 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18650
18651 /* Load up the value into the low element. We must ensure that the other
18652 elements are valid floats -- zero is the easiest such value. */
18653 if (MEM_P (input))
18654 {
18655 if (vecmode == V4SFmode)
18656 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18657 else
18658 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18659 }
18660 else
18661 {
18662 input = gen_rtx_REG (vecmode, REGNO (input));
18663 emit_move_insn (value, CONST0_RTX (vecmode));
18664 if (vecmode == V4SFmode)
18665 emit_insn (gen_sse_movss (value, value, input));
18666 else
18667 emit_insn (gen_sse2_movsd (value, value, input));
18668 }
18669
18670 emit_move_insn (large, two31);
18671 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18672
18673 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18674 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18675
18676 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18677 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18678
18679 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18680 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18681
18682 large = gen_rtx_REG (V4SImode, REGNO (large));
18683 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18684
18685 x = gen_rtx_REG (V4SImode, REGNO (value));
18686 if (vecmode == V4SFmode)
18687 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18688 else
18689 emit_insn (gen_sse2_cvttpd2dq (x, value));
18690 value = x;
18691
18692 emit_insn (gen_xorv4si3 (value, value, large));
18693 }
18694
18695 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18696 Expects the 64-bit DImode to be supplied in a pair of integral
18697 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18698 -mfpmath=sse, !optimize_size only. */
18699
18700 void
18701 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18702 {
18703 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18704 rtx int_xmm, fp_xmm;
18705 rtx biases, exponents;
18706 rtx x;
18707
18708 int_xmm = gen_reg_rtx (V4SImode);
18709 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18710 emit_insn (gen_movdi_to_sse (int_xmm, input));
18711 else if (TARGET_SSE_SPLIT_REGS)
18712 {
18713 emit_clobber (int_xmm);
18714 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18715 }
18716 else
18717 {
18718 x = gen_reg_rtx (V2DImode);
18719 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18720 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18721 }
18722
18723 x = gen_rtx_CONST_VECTOR (V4SImode,
18724 gen_rtvec (4, GEN_INT (0x43300000UL),
18725 GEN_INT (0x45300000UL),
18726 const0_rtx, const0_rtx));
18727 exponents = validize_mem (force_const_mem (V4SImode, x));
18728
18729 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18730 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18731
18732 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18733 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18734 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18735 (0x1.0p84 + double(fp_value_hi_xmm)).
18736 Note these exponents differ by 32. */
18737
18738 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18739
18740 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18741 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18742 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18743 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18744 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18745 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18746 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18747 biases = validize_mem (force_const_mem (V2DFmode, biases));
18748 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18749
18750 /* Add the upper and lower DFmode values together. */
18751 if (TARGET_SSE3)
18752 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18753 else
18754 {
18755 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18756 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18757 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18758 }
18759
18760 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18761 }
18762
18763 /* Not used, but eases macroization of patterns. */
18764 void
18765 ix86_expand_convert_uns_sixf_sse (rtx, rtx)
18766 {
18767 gcc_unreachable ();
18768 }
18769
18770 /* Convert an unsigned SImode value into a DFmode. Only currently used
18771 for SSE, but applicable anywhere. */
18772
18773 void
18774 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18775 {
18776 REAL_VALUE_TYPE TWO31r;
18777 rtx x, fp;
18778
18779 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18780 NULL, 1, OPTAB_DIRECT);
18781
18782 fp = gen_reg_rtx (DFmode);
18783 emit_insn (gen_floatsidf2 (fp, x));
18784
18785 real_ldexp (&TWO31r, &dconst1, 31);
18786 x = const_double_from_real_value (TWO31r, DFmode);
18787
18788 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18789 if (x != target)
18790 emit_move_insn (target, x);
18791 }
18792
18793 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18794 32-bit mode; otherwise we have a direct convert instruction. */
18795
18796 void
18797 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18798 {
18799 REAL_VALUE_TYPE TWO32r;
18800 rtx fp_lo, fp_hi, x;
18801
18802 fp_lo = gen_reg_rtx (DFmode);
18803 fp_hi = gen_reg_rtx (DFmode);
18804
18805 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18806
18807 real_ldexp (&TWO32r, &dconst1, 32);
18808 x = const_double_from_real_value (TWO32r, DFmode);
18809 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18810
18811 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18812
18813 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18814 0, OPTAB_DIRECT);
18815 if (x != target)
18816 emit_move_insn (target, x);
18817 }
18818
18819 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18820 For x86_32, -mfpmath=sse, !optimize_size only. */
18821 void
18822 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18823 {
18824 REAL_VALUE_TYPE ONE16r;
18825 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18826
18827 real_ldexp (&ONE16r, &dconst1, 16);
18828 x = const_double_from_real_value (ONE16r, SFmode);
18829 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18830 NULL, 0, OPTAB_DIRECT);
18831 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18832 NULL, 0, OPTAB_DIRECT);
18833 fp_hi = gen_reg_rtx (SFmode);
18834 fp_lo = gen_reg_rtx (SFmode);
18835 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18836 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18837 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18838 0, OPTAB_DIRECT);
18839 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18840 0, OPTAB_DIRECT);
18841 if (!rtx_equal_p (target, fp_hi))
18842 emit_move_insn (target, fp_hi);
18843 }
18844
18845 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18846 a vector of unsigned ints VAL to vector of floats TARGET. */
18847
18848 void
18849 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18850 {
18851 rtx tmp[8];
18852 REAL_VALUE_TYPE TWO16r;
18853 enum machine_mode intmode = GET_MODE (val);
18854 enum machine_mode fltmode = GET_MODE (target);
18855 rtx (*cvt) (rtx, rtx);
18856
18857 if (intmode == V4SImode)
18858 cvt = gen_floatv4siv4sf2;
18859 else
18860 cvt = gen_floatv8siv8sf2;
18861 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18862 tmp[0] = force_reg (intmode, tmp[0]);
18863 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18864 OPTAB_DIRECT);
18865 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18866 NULL_RTX, 1, OPTAB_DIRECT);
18867 tmp[3] = gen_reg_rtx (fltmode);
18868 emit_insn (cvt (tmp[3], tmp[1]));
18869 tmp[4] = gen_reg_rtx (fltmode);
18870 emit_insn (cvt (tmp[4], tmp[2]));
18871 real_ldexp (&TWO16r, &dconst1, 16);
18872 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18873 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18874 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18875 OPTAB_DIRECT);
18876 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18877 OPTAB_DIRECT);
18878 if (tmp[7] != target)
18879 emit_move_insn (target, tmp[7]);
18880 }
18881
18882 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18883 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18884 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18885 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18886
18887 rtx
18888 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18889 {
18890 REAL_VALUE_TYPE TWO31r;
18891 rtx two31r, tmp[4];
18892 enum machine_mode mode = GET_MODE (val);
18893 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18894 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18895 rtx (*cmp) (rtx, rtx, rtx, rtx);
18896 int i;
18897
18898 for (i = 0; i < 3; i++)
18899 tmp[i] = gen_reg_rtx (mode);
18900 real_ldexp (&TWO31r, &dconst1, 31);
18901 two31r = const_double_from_real_value (TWO31r, scalarmode);
18902 two31r = ix86_build_const_vector (mode, 1, two31r);
18903 two31r = force_reg (mode, two31r);
18904 switch (mode)
18905 {
18906 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18907 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18908 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18909 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18910 default: gcc_unreachable ();
18911 }
18912 tmp[3] = gen_rtx_LE (mode, two31r, val);
18913 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18914 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18915 0, OPTAB_DIRECT);
18916 if (intmode == V4SImode || TARGET_AVX2)
18917 *xorp = expand_simple_binop (intmode, ASHIFT,
18918 gen_lowpart (intmode, tmp[0]),
18919 GEN_INT (31), NULL_RTX, 0,
18920 OPTAB_DIRECT);
18921 else
18922 {
18923 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18924 two31 = ix86_build_const_vector (intmode, 1, two31);
18925 *xorp = expand_simple_binop (intmode, AND,
18926 gen_lowpart (intmode, tmp[0]),
18927 two31, NULL_RTX, 0,
18928 OPTAB_DIRECT);
18929 }
18930 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18931 0, OPTAB_DIRECT);
18932 }
18933
18934 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18935 then replicate the value for all elements of the vector
18936 register. */
18937
18938 rtx
18939 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18940 {
18941 int i, n_elt;
18942 rtvec v;
18943 enum machine_mode scalar_mode;
18944
18945 switch (mode)
18946 {
18947 case V64QImode:
18948 case V32QImode:
18949 case V16QImode:
18950 case V32HImode:
18951 case V16HImode:
18952 case V8HImode:
18953 case V16SImode:
18954 case V8SImode:
18955 case V4SImode:
18956 case V8DImode:
18957 case V4DImode:
18958 case V2DImode:
18959 gcc_assert (vect);
18960 case V16SFmode:
18961 case V8SFmode:
18962 case V4SFmode:
18963 case V8DFmode:
18964 case V4DFmode:
18965 case V2DFmode:
18966 n_elt = GET_MODE_NUNITS (mode);
18967 v = rtvec_alloc (n_elt);
18968 scalar_mode = GET_MODE_INNER (mode);
18969
18970 RTVEC_ELT (v, 0) = value;
18971
18972 for (i = 1; i < n_elt; ++i)
18973 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18974
18975 return gen_rtx_CONST_VECTOR (mode, v);
18976
18977 default:
18978 gcc_unreachable ();
18979 }
18980 }
18981
18982 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18983 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18984 for an SSE register. If VECT is true, then replicate the mask for
18985 all elements of the vector register. If INVERT is true, then create
18986 a mask excluding the sign bit. */
18987
18988 rtx
18989 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18990 {
18991 enum machine_mode vec_mode, imode;
18992 HOST_WIDE_INT hi, lo;
18993 int shift = 63;
18994 rtx v;
18995 rtx mask;
18996
18997 /* Find the sign bit, sign extended to 2*HWI. */
18998 switch (mode)
18999 {
19000 case V16SImode:
19001 case V16SFmode:
19002 case V8SImode:
19003 case V4SImode:
19004 case V8SFmode:
19005 case V4SFmode:
19006 vec_mode = mode;
19007 mode = GET_MODE_INNER (mode);
19008 imode = SImode;
19009 lo = 0x80000000, hi = lo < 0;
19010 break;
19011
19012 case V8DImode:
19013 case V4DImode:
19014 case V2DImode:
19015 case V8DFmode:
19016 case V4DFmode:
19017 case V2DFmode:
19018 vec_mode = mode;
19019 mode = GET_MODE_INNER (mode);
19020 imode = DImode;
19021 if (HOST_BITS_PER_WIDE_INT >= 64)
19022 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19023 else
19024 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19025 break;
19026
19027 case TImode:
19028 case TFmode:
19029 vec_mode = VOIDmode;
19030 if (HOST_BITS_PER_WIDE_INT >= 64)
19031 {
19032 imode = TImode;
19033 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19034 }
19035 else
19036 {
19037 rtvec vec;
19038
19039 imode = DImode;
19040 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19041
19042 if (invert)
19043 {
19044 lo = ~lo, hi = ~hi;
19045 v = constm1_rtx;
19046 }
19047 else
19048 v = const0_rtx;
19049
19050 mask = immed_double_const (lo, hi, imode);
19051
19052 vec = gen_rtvec (2, v, mask);
19053 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19054 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19055
19056 return v;
19057 }
19058 break;
19059
19060 default:
19061 gcc_unreachable ();
19062 }
19063
19064 if (invert)
19065 lo = ~lo, hi = ~hi;
19066
19067 /* Force this value into the low part of a fp vector constant. */
19068 mask = immed_double_const (lo, hi, imode);
19069 mask = gen_lowpart (mode, mask);
19070
19071 if (vec_mode == VOIDmode)
19072 return force_reg (mode, mask);
19073
19074 v = ix86_build_const_vector (vec_mode, vect, mask);
19075 return force_reg (vec_mode, v);
19076 }
19077
19078 /* Generate code for floating point ABS or NEG. */
19079
19080 void
19081 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19082 rtx operands[])
19083 {
19084 rtx mask, set, dst, src;
19085 bool use_sse = false;
19086 bool vector_mode = VECTOR_MODE_P (mode);
19087 enum machine_mode vmode = mode;
19088
19089 if (vector_mode)
19090 use_sse = true;
19091 else if (mode == TFmode)
19092 use_sse = true;
19093 else if (TARGET_SSE_MATH)
19094 {
19095 use_sse = SSE_FLOAT_MODE_P (mode);
19096 if (mode == SFmode)
19097 vmode = V4SFmode;
19098 else if (mode == DFmode)
19099 vmode = V2DFmode;
19100 }
19101
19102 /* NEG and ABS performed with SSE use bitwise mask operations.
19103 Create the appropriate mask now. */
19104 if (use_sse)
19105 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19106 else
19107 mask = NULL_RTX;
19108
19109 dst = operands[0];
19110 src = operands[1];
19111
19112 set = gen_rtx_fmt_e (code, mode, src);
19113 set = gen_rtx_SET (VOIDmode, dst, set);
19114
19115 if (mask)
19116 {
19117 rtx use, clob;
19118 rtvec par;
19119
19120 use = gen_rtx_USE (VOIDmode, mask);
19121 if (vector_mode)
19122 par = gen_rtvec (2, set, use);
19123 else
19124 {
19125 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19126 par = gen_rtvec (3, set, use, clob);
19127 }
19128 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19129 }
19130 else
19131 emit_insn (set);
19132 }
19133
19134 /* Expand a copysign operation. Special case operand 0 being a constant. */
19135
19136 void
19137 ix86_expand_copysign (rtx operands[])
19138 {
19139 enum machine_mode mode, vmode;
19140 rtx dest, op0, op1, mask, nmask;
19141
19142 dest = operands[0];
19143 op0 = operands[1];
19144 op1 = operands[2];
19145
19146 mode = GET_MODE (dest);
19147
19148 if (mode == SFmode)
19149 vmode = V4SFmode;
19150 else if (mode == DFmode)
19151 vmode = V2DFmode;
19152 else
19153 vmode = mode;
19154
19155 if (GET_CODE (op0) == CONST_DOUBLE)
19156 {
19157 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19158
19159 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19160 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19161
19162 if (mode == SFmode || mode == DFmode)
19163 {
19164 if (op0 == CONST0_RTX (mode))
19165 op0 = CONST0_RTX (vmode);
19166 else
19167 {
19168 rtx v = ix86_build_const_vector (vmode, false, op0);
19169
19170 op0 = force_reg (vmode, v);
19171 }
19172 }
19173 else if (op0 != CONST0_RTX (mode))
19174 op0 = force_reg (mode, op0);
19175
19176 mask = ix86_build_signbit_mask (vmode, 0, 0);
19177
19178 if (mode == SFmode)
19179 copysign_insn = gen_copysignsf3_const;
19180 else if (mode == DFmode)
19181 copysign_insn = gen_copysigndf3_const;
19182 else
19183 copysign_insn = gen_copysigntf3_const;
19184
19185 emit_insn (copysign_insn (dest, op0, op1, mask));
19186 }
19187 else
19188 {
19189 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19190
19191 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19192 mask = ix86_build_signbit_mask (vmode, 0, 0);
19193
19194 if (mode == SFmode)
19195 copysign_insn = gen_copysignsf3_var;
19196 else if (mode == DFmode)
19197 copysign_insn = gen_copysigndf3_var;
19198 else
19199 copysign_insn = gen_copysigntf3_var;
19200
19201 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19202 }
19203 }
19204
19205 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19206 be a constant, and so has already been expanded into a vector constant. */
19207
19208 void
19209 ix86_split_copysign_const (rtx operands[])
19210 {
19211 enum machine_mode mode, vmode;
19212 rtx dest, op0, mask, x;
19213
19214 dest = operands[0];
19215 op0 = operands[1];
19216 mask = operands[3];
19217
19218 mode = GET_MODE (dest);
19219 vmode = GET_MODE (mask);
19220
19221 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19222 x = gen_rtx_AND (vmode, dest, mask);
19223 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19224
19225 if (op0 != CONST0_RTX (vmode))
19226 {
19227 x = gen_rtx_IOR (vmode, dest, op0);
19228 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19229 }
19230 }
19231
19232 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19233 so we have to do two masks. */
19234
19235 void
19236 ix86_split_copysign_var (rtx operands[])
19237 {
19238 enum machine_mode mode, vmode;
19239 rtx dest, scratch, op0, op1, mask, nmask, x;
19240
19241 dest = operands[0];
19242 scratch = operands[1];
19243 op0 = operands[2];
19244 op1 = operands[3];
19245 nmask = operands[4];
19246 mask = operands[5];
19247
19248 mode = GET_MODE (dest);
19249 vmode = GET_MODE (mask);
19250
19251 if (rtx_equal_p (op0, op1))
19252 {
19253 /* Shouldn't happen often (it's useless, obviously), but when it does
19254 we'd generate incorrect code if we continue below. */
19255 emit_move_insn (dest, op0);
19256 return;
19257 }
19258
19259 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19260 {
19261 gcc_assert (REGNO (op1) == REGNO (scratch));
19262
19263 x = gen_rtx_AND (vmode, scratch, mask);
19264 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19265
19266 dest = mask;
19267 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19268 x = gen_rtx_NOT (vmode, dest);
19269 x = gen_rtx_AND (vmode, x, op0);
19270 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19271 }
19272 else
19273 {
19274 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19275 {
19276 x = gen_rtx_AND (vmode, scratch, mask);
19277 }
19278 else /* alternative 2,4 */
19279 {
19280 gcc_assert (REGNO (mask) == REGNO (scratch));
19281 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19282 x = gen_rtx_AND (vmode, scratch, op1);
19283 }
19284 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19285
19286 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19287 {
19288 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19289 x = gen_rtx_AND (vmode, dest, nmask);
19290 }
19291 else /* alternative 3,4 */
19292 {
19293 gcc_assert (REGNO (nmask) == REGNO (dest));
19294 dest = nmask;
19295 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19296 x = gen_rtx_AND (vmode, dest, op0);
19297 }
19298 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19299 }
19300
19301 x = gen_rtx_IOR (vmode, dest, scratch);
19302 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19303 }
19304
19305 /* Return TRUE or FALSE depending on whether the first SET in INSN
19306 has source and destination with matching CC modes, and that the
19307 CC mode is at least as constrained as REQ_MODE. */
19308
19309 bool
19310 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19311 {
19312 rtx set;
19313 enum machine_mode set_mode;
19314
19315 set = PATTERN (insn);
19316 if (GET_CODE (set) == PARALLEL)
19317 set = XVECEXP (set, 0, 0);
19318 gcc_assert (GET_CODE (set) == SET);
19319 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19320
19321 set_mode = GET_MODE (SET_DEST (set));
19322 switch (set_mode)
19323 {
19324 case CCNOmode:
19325 if (req_mode != CCNOmode
19326 && (req_mode != CCmode
19327 || XEXP (SET_SRC (set), 1) != const0_rtx))
19328 return false;
19329 break;
19330 case CCmode:
19331 if (req_mode == CCGCmode)
19332 return false;
19333 /* FALLTHRU */
19334 case CCGCmode:
19335 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19336 return false;
19337 /* FALLTHRU */
19338 case CCGOCmode:
19339 if (req_mode == CCZmode)
19340 return false;
19341 /* FALLTHRU */
19342 case CCZmode:
19343 break;
19344
19345 case CCAmode:
19346 case CCCmode:
19347 case CCOmode:
19348 case CCSmode:
19349 if (set_mode != req_mode)
19350 return false;
19351 break;
19352
19353 default:
19354 gcc_unreachable ();
19355 }
19356
19357 return GET_MODE (SET_SRC (set)) == set_mode;
19358 }
19359
19360 /* Generate insn patterns to do an integer compare of OPERANDS. */
19361
19362 static rtx
19363 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19364 {
19365 enum machine_mode cmpmode;
19366 rtx tmp, flags;
19367
19368 cmpmode = SELECT_CC_MODE (code, op0, op1);
19369 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19370
19371 /* This is very simple, but making the interface the same as in the
19372 FP case makes the rest of the code easier. */
19373 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19374 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19375
19376 /* Return the test that should be put into the flags user, i.e.
19377 the bcc, scc, or cmov instruction. */
19378 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19379 }
19380
19381 /* Figure out whether to use ordered or unordered fp comparisons.
19382 Return the appropriate mode to use. */
19383
19384 enum machine_mode
19385 ix86_fp_compare_mode (enum rtx_code)
19386 {
19387 /* ??? In order to make all comparisons reversible, we do all comparisons
19388 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19389 all forms trapping and nontrapping comparisons, we can make inequality
19390 comparisons trapping again, since it results in better code when using
19391 FCOM based compares. */
19392 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19393 }
19394
19395 enum machine_mode
19396 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19397 {
19398 enum machine_mode mode = GET_MODE (op0);
19399
19400 if (SCALAR_FLOAT_MODE_P (mode))
19401 {
19402 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19403 return ix86_fp_compare_mode (code);
19404 }
19405
19406 switch (code)
19407 {
19408 /* Only zero flag is needed. */
19409 case EQ: /* ZF=0 */
19410 case NE: /* ZF!=0 */
19411 return CCZmode;
19412 /* Codes needing carry flag. */
19413 case GEU: /* CF=0 */
19414 case LTU: /* CF=1 */
19415 /* Detect overflow checks. They need just the carry flag. */
19416 if (GET_CODE (op0) == PLUS
19417 && rtx_equal_p (op1, XEXP (op0, 0)))
19418 return CCCmode;
19419 else
19420 return CCmode;
19421 case GTU: /* CF=0 & ZF=0 */
19422 case LEU: /* CF=1 | ZF=1 */
19423 return CCmode;
19424 /* Codes possibly doable only with sign flag when
19425 comparing against zero. */
19426 case GE: /* SF=OF or SF=0 */
19427 case LT: /* SF<>OF or SF=1 */
19428 if (op1 == const0_rtx)
19429 return CCGOCmode;
19430 else
19431 /* For other cases Carry flag is not required. */
19432 return CCGCmode;
19433 /* Codes doable only with sign flag when comparing
19434 against zero, but we miss jump instruction for it
19435 so we need to use relational tests against overflow
19436 that thus needs to be zero. */
19437 case GT: /* ZF=0 & SF=OF */
19438 case LE: /* ZF=1 | SF<>OF */
19439 if (op1 == const0_rtx)
19440 return CCNOmode;
19441 else
19442 return CCGCmode;
19443 /* strcmp pattern do (use flags) and combine may ask us for proper
19444 mode. */
19445 case USE:
19446 return CCmode;
19447 default:
19448 gcc_unreachable ();
19449 }
19450 }
19451
19452 /* Return the fixed registers used for condition codes. */
19453
19454 static bool
19455 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19456 {
19457 *p1 = FLAGS_REG;
19458 *p2 = FPSR_REG;
19459 return true;
19460 }
19461
19462 /* If two condition code modes are compatible, return a condition code
19463 mode which is compatible with both. Otherwise, return
19464 VOIDmode. */
19465
19466 static enum machine_mode
19467 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19468 {
19469 if (m1 == m2)
19470 return m1;
19471
19472 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19473 return VOIDmode;
19474
19475 if ((m1 == CCGCmode && m2 == CCGOCmode)
19476 || (m1 == CCGOCmode && m2 == CCGCmode))
19477 return CCGCmode;
19478
19479 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19480 return m2;
19481 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19482 return m1;
19483
19484 switch (m1)
19485 {
19486 default:
19487 gcc_unreachable ();
19488
19489 case CCmode:
19490 case CCGCmode:
19491 case CCGOCmode:
19492 case CCNOmode:
19493 case CCAmode:
19494 case CCCmode:
19495 case CCOmode:
19496 case CCSmode:
19497 case CCZmode:
19498 switch (m2)
19499 {
19500 default:
19501 return VOIDmode;
19502
19503 case CCmode:
19504 case CCGCmode:
19505 case CCGOCmode:
19506 case CCNOmode:
19507 case CCAmode:
19508 case CCCmode:
19509 case CCOmode:
19510 case CCSmode:
19511 case CCZmode:
19512 return CCmode;
19513 }
19514
19515 case CCFPmode:
19516 case CCFPUmode:
19517 /* These are only compatible with themselves, which we already
19518 checked above. */
19519 return VOIDmode;
19520 }
19521 }
19522
19523
19524 /* Return a comparison we can do and that it is equivalent to
19525 swap_condition (code) apart possibly from orderedness.
19526 But, never change orderedness if TARGET_IEEE_FP, returning
19527 UNKNOWN in that case if necessary. */
19528
19529 static enum rtx_code
19530 ix86_fp_swap_condition (enum rtx_code code)
19531 {
19532 switch (code)
19533 {
19534 case GT: /* GTU - CF=0 & ZF=0 */
19535 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19536 case GE: /* GEU - CF=0 */
19537 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19538 case UNLT: /* LTU - CF=1 */
19539 return TARGET_IEEE_FP ? UNKNOWN : GT;
19540 case UNLE: /* LEU - CF=1 | ZF=1 */
19541 return TARGET_IEEE_FP ? UNKNOWN : GE;
19542 default:
19543 return swap_condition (code);
19544 }
19545 }
19546
19547 /* Return cost of comparison CODE using the best strategy for performance.
19548 All following functions do use number of instructions as a cost metrics.
19549 In future this should be tweaked to compute bytes for optimize_size and
19550 take into account performance of various instructions on various CPUs. */
19551
19552 static int
19553 ix86_fp_comparison_cost (enum rtx_code code)
19554 {
19555 int arith_cost;
19556
19557 /* The cost of code using bit-twiddling on %ah. */
19558 switch (code)
19559 {
19560 case UNLE:
19561 case UNLT:
19562 case LTGT:
19563 case GT:
19564 case GE:
19565 case UNORDERED:
19566 case ORDERED:
19567 case UNEQ:
19568 arith_cost = 4;
19569 break;
19570 case LT:
19571 case NE:
19572 case EQ:
19573 case UNGE:
19574 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19575 break;
19576 case LE:
19577 case UNGT:
19578 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19579 break;
19580 default:
19581 gcc_unreachable ();
19582 }
19583
19584 switch (ix86_fp_comparison_strategy (code))
19585 {
19586 case IX86_FPCMP_COMI:
19587 return arith_cost > 4 ? 3 : 2;
19588 case IX86_FPCMP_SAHF:
19589 return arith_cost > 4 ? 4 : 3;
19590 default:
19591 return arith_cost;
19592 }
19593 }
19594
19595 /* Return strategy to use for floating-point. We assume that fcomi is always
19596 preferrable where available, since that is also true when looking at size
19597 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19598
19599 enum ix86_fpcmp_strategy
19600 ix86_fp_comparison_strategy (enum rtx_code)
19601 {
19602 /* Do fcomi/sahf based test when profitable. */
19603
19604 if (TARGET_CMOVE)
19605 return IX86_FPCMP_COMI;
19606
19607 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19608 return IX86_FPCMP_SAHF;
19609
19610 return IX86_FPCMP_ARITH;
19611 }
19612
19613 /* Swap, force into registers, or otherwise massage the two operands
19614 to a fp comparison. The operands are updated in place; the new
19615 comparison code is returned. */
19616
19617 static enum rtx_code
19618 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19619 {
19620 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19621 rtx op0 = *pop0, op1 = *pop1;
19622 enum machine_mode op_mode = GET_MODE (op0);
19623 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19624
19625 /* All of the unordered compare instructions only work on registers.
19626 The same is true of the fcomi compare instructions. The XFmode
19627 compare instructions require registers except when comparing
19628 against zero or when converting operand 1 from fixed point to
19629 floating point. */
19630
19631 if (!is_sse
19632 && (fpcmp_mode == CCFPUmode
19633 || (op_mode == XFmode
19634 && ! (standard_80387_constant_p (op0) == 1
19635 || standard_80387_constant_p (op1) == 1)
19636 && GET_CODE (op1) != FLOAT)
19637 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19638 {
19639 op0 = force_reg (op_mode, op0);
19640 op1 = force_reg (op_mode, op1);
19641 }
19642 else
19643 {
19644 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19645 things around if they appear profitable, otherwise force op0
19646 into a register. */
19647
19648 if (standard_80387_constant_p (op0) == 0
19649 || (MEM_P (op0)
19650 && ! (standard_80387_constant_p (op1) == 0
19651 || MEM_P (op1))))
19652 {
19653 enum rtx_code new_code = ix86_fp_swap_condition (code);
19654 if (new_code != UNKNOWN)
19655 {
19656 rtx tmp;
19657 tmp = op0, op0 = op1, op1 = tmp;
19658 code = new_code;
19659 }
19660 }
19661
19662 if (!REG_P (op0))
19663 op0 = force_reg (op_mode, op0);
19664
19665 if (CONSTANT_P (op1))
19666 {
19667 int tmp = standard_80387_constant_p (op1);
19668 if (tmp == 0)
19669 op1 = validize_mem (force_const_mem (op_mode, op1));
19670 else if (tmp == 1)
19671 {
19672 if (TARGET_CMOVE)
19673 op1 = force_reg (op_mode, op1);
19674 }
19675 else
19676 op1 = force_reg (op_mode, op1);
19677 }
19678 }
19679
19680 /* Try to rearrange the comparison to make it cheaper. */
19681 if (ix86_fp_comparison_cost (code)
19682 > ix86_fp_comparison_cost (swap_condition (code))
19683 && (REG_P (op1) || can_create_pseudo_p ()))
19684 {
19685 rtx tmp;
19686 tmp = op0, op0 = op1, op1 = tmp;
19687 code = swap_condition (code);
19688 if (!REG_P (op0))
19689 op0 = force_reg (op_mode, op0);
19690 }
19691
19692 *pop0 = op0;
19693 *pop1 = op1;
19694 return code;
19695 }
19696
19697 /* Convert comparison codes we use to represent FP comparison to integer
19698 code that will result in proper branch. Return UNKNOWN if no such code
19699 is available. */
19700
19701 enum rtx_code
19702 ix86_fp_compare_code_to_integer (enum rtx_code code)
19703 {
19704 switch (code)
19705 {
19706 case GT:
19707 return GTU;
19708 case GE:
19709 return GEU;
19710 case ORDERED:
19711 case UNORDERED:
19712 return code;
19713 break;
19714 case UNEQ:
19715 return EQ;
19716 break;
19717 case UNLT:
19718 return LTU;
19719 break;
19720 case UNLE:
19721 return LEU;
19722 break;
19723 case LTGT:
19724 return NE;
19725 break;
19726 default:
19727 return UNKNOWN;
19728 }
19729 }
19730
19731 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19732
19733 static rtx
19734 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19735 {
19736 enum machine_mode fpcmp_mode, intcmp_mode;
19737 rtx tmp, tmp2;
19738
19739 fpcmp_mode = ix86_fp_compare_mode (code);
19740 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19741
19742 /* Do fcomi/sahf based test when profitable. */
19743 switch (ix86_fp_comparison_strategy (code))
19744 {
19745 case IX86_FPCMP_COMI:
19746 intcmp_mode = fpcmp_mode;
19747 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19748 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19749 tmp);
19750 emit_insn (tmp);
19751 break;
19752
19753 case IX86_FPCMP_SAHF:
19754 intcmp_mode = fpcmp_mode;
19755 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19756 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19757 tmp);
19758
19759 if (!scratch)
19760 scratch = gen_reg_rtx (HImode);
19761 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19762 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19763 break;
19764
19765 case IX86_FPCMP_ARITH:
19766 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19767 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19768 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19769 if (!scratch)
19770 scratch = gen_reg_rtx (HImode);
19771 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19772
19773 /* In the unordered case, we have to check C2 for NaN's, which
19774 doesn't happen to work out to anything nice combination-wise.
19775 So do some bit twiddling on the value we've got in AH to come
19776 up with an appropriate set of condition codes. */
19777
19778 intcmp_mode = CCNOmode;
19779 switch (code)
19780 {
19781 case GT:
19782 case UNGT:
19783 if (code == GT || !TARGET_IEEE_FP)
19784 {
19785 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19786 code = EQ;
19787 }
19788 else
19789 {
19790 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19791 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19792 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19793 intcmp_mode = CCmode;
19794 code = GEU;
19795 }
19796 break;
19797 case LT:
19798 case UNLT:
19799 if (code == LT && TARGET_IEEE_FP)
19800 {
19801 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19802 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19803 intcmp_mode = CCmode;
19804 code = EQ;
19805 }
19806 else
19807 {
19808 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19809 code = NE;
19810 }
19811 break;
19812 case GE:
19813 case UNGE:
19814 if (code == GE || !TARGET_IEEE_FP)
19815 {
19816 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19817 code = EQ;
19818 }
19819 else
19820 {
19821 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19822 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19823 code = NE;
19824 }
19825 break;
19826 case LE:
19827 case UNLE:
19828 if (code == LE && TARGET_IEEE_FP)
19829 {
19830 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19831 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19832 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19833 intcmp_mode = CCmode;
19834 code = LTU;
19835 }
19836 else
19837 {
19838 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19839 code = NE;
19840 }
19841 break;
19842 case EQ:
19843 case UNEQ:
19844 if (code == EQ && TARGET_IEEE_FP)
19845 {
19846 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19847 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19848 intcmp_mode = CCmode;
19849 code = EQ;
19850 }
19851 else
19852 {
19853 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19854 code = NE;
19855 }
19856 break;
19857 case NE:
19858 case LTGT:
19859 if (code == NE && TARGET_IEEE_FP)
19860 {
19861 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19862 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19863 GEN_INT (0x40)));
19864 code = NE;
19865 }
19866 else
19867 {
19868 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19869 code = EQ;
19870 }
19871 break;
19872
19873 case UNORDERED:
19874 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19875 code = NE;
19876 break;
19877 case ORDERED:
19878 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19879 code = EQ;
19880 break;
19881
19882 default:
19883 gcc_unreachable ();
19884 }
19885 break;
19886
19887 default:
19888 gcc_unreachable();
19889 }
19890
19891 /* Return the test that should be put into the flags user, i.e.
19892 the bcc, scc, or cmov instruction. */
19893 return gen_rtx_fmt_ee (code, VOIDmode,
19894 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19895 const0_rtx);
19896 }
19897
19898 static rtx
19899 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19900 {
19901 rtx ret;
19902
19903 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19904 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19905
19906 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19907 {
19908 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19909 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19910 }
19911 else
19912 ret = ix86_expand_int_compare (code, op0, op1);
19913
19914 return ret;
19915 }
19916
19917 void
19918 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19919 {
19920 enum machine_mode mode = GET_MODE (op0);
19921 rtx tmp;
19922
19923 switch (mode)
19924 {
19925 case SFmode:
19926 case DFmode:
19927 case XFmode:
19928 case QImode:
19929 case HImode:
19930 case SImode:
19931 simple:
19932 tmp = ix86_expand_compare (code, op0, op1);
19933 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19934 gen_rtx_LABEL_REF (VOIDmode, label),
19935 pc_rtx);
19936 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19937 return;
19938
19939 case DImode:
19940 if (TARGET_64BIT)
19941 goto simple;
19942 case TImode:
19943 /* Expand DImode branch into multiple compare+branch. */
19944 {
19945 rtx lo[2], hi[2];
19946 rtx_code_label *label2;
19947 enum rtx_code code1, code2, code3;
19948 enum machine_mode submode;
19949
19950 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19951 {
19952 tmp = op0, op0 = op1, op1 = tmp;
19953 code = swap_condition (code);
19954 }
19955
19956 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19957 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19958
19959 submode = mode == DImode ? SImode : DImode;
19960
19961 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19962 avoid two branches. This costs one extra insn, so disable when
19963 optimizing for size. */
19964
19965 if ((code == EQ || code == NE)
19966 && (!optimize_insn_for_size_p ()
19967 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19968 {
19969 rtx xor0, xor1;
19970
19971 xor1 = hi[0];
19972 if (hi[1] != const0_rtx)
19973 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19974 NULL_RTX, 0, OPTAB_WIDEN);
19975
19976 xor0 = lo[0];
19977 if (lo[1] != const0_rtx)
19978 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19979 NULL_RTX, 0, OPTAB_WIDEN);
19980
19981 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19982 NULL_RTX, 0, OPTAB_WIDEN);
19983
19984 ix86_expand_branch (code, tmp, const0_rtx, label);
19985 return;
19986 }
19987
19988 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19989 op1 is a constant and the low word is zero, then we can just
19990 examine the high word. Similarly for low word -1 and
19991 less-or-equal-than or greater-than. */
19992
19993 if (CONST_INT_P (hi[1]))
19994 switch (code)
19995 {
19996 case LT: case LTU: case GE: case GEU:
19997 if (lo[1] == const0_rtx)
19998 {
19999 ix86_expand_branch (code, hi[0], hi[1], label);
20000 return;
20001 }
20002 break;
20003 case LE: case LEU: case GT: case GTU:
20004 if (lo[1] == constm1_rtx)
20005 {
20006 ix86_expand_branch (code, hi[0], hi[1], label);
20007 return;
20008 }
20009 break;
20010 default:
20011 break;
20012 }
20013
20014 /* Otherwise, we need two or three jumps. */
20015
20016 label2 = gen_label_rtx ();
20017
20018 code1 = code;
20019 code2 = swap_condition (code);
20020 code3 = unsigned_condition (code);
20021
20022 switch (code)
20023 {
20024 case LT: case GT: case LTU: case GTU:
20025 break;
20026
20027 case LE: code1 = LT; code2 = GT; break;
20028 case GE: code1 = GT; code2 = LT; break;
20029 case LEU: code1 = LTU; code2 = GTU; break;
20030 case GEU: code1 = GTU; code2 = LTU; break;
20031
20032 case EQ: code1 = UNKNOWN; code2 = NE; break;
20033 case NE: code2 = UNKNOWN; break;
20034
20035 default:
20036 gcc_unreachable ();
20037 }
20038
20039 /*
20040 * a < b =>
20041 * if (hi(a) < hi(b)) goto true;
20042 * if (hi(a) > hi(b)) goto false;
20043 * if (lo(a) < lo(b)) goto true;
20044 * false:
20045 */
20046
20047 if (code1 != UNKNOWN)
20048 ix86_expand_branch (code1, hi[0], hi[1], label);
20049 if (code2 != UNKNOWN)
20050 ix86_expand_branch (code2, hi[0], hi[1], label2);
20051
20052 ix86_expand_branch (code3, lo[0], lo[1], label);
20053
20054 if (code2 != UNKNOWN)
20055 emit_label (label2);
20056 return;
20057 }
20058
20059 default:
20060 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20061 goto simple;
20062 }
20063 }
20064
20065 /* Split branch based on floating point condition. */
20066 void
20067 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20068 rtx target1, rtx target2, rtx tmp)
20069 {
20070 rtx condition;
20071 rtx i;
20072
20073 if (target2 != pc_rtx)
20074 {
20075 rtx tmp = target2;
20076 code = reverse_condition_maybe_unordered (code);
20077 target2 = target1;
20078 target1 = tmp;
20079 }
20080
20081 condition = ix86_expand_fp_compare (code, op1, op2,
20082 tmp);
20083
20084 i = emit_jump_insn (gen_rtx_SET
20085 (VOIDmode, pc_rtx,
20086 gen_rtx_IF_THEN_ELSE (VOIDmode,
20087 condition, target1, target2)));
20088 if (split_branch_probability >= 0)
20089 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20090 }
20091
20092 void
20093 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20094 {
20095 rtx ret;
20096
20097 gcc_assert (GET_MODE (dest) == QImode);
20098
20099 ret = ix86_expand_compare (code, op0, op1);
20100 PUT_MODE (ret, QImode);
20101 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20102 }
20103
20104 /* Expand comparison setting or clearing carry flag. Return true when
20105 successful and set pop for the operation. */
20106 static bool
20107 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20108 {
20109 enum machine_mode mode =
20110 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20111
20112 /* Do not handle double-mode compares that go through special path. */
20113 if (mode == (TARGET_64BIT ? TImode : DImode))
20114 return false;
20115
20116 if (SCALAR_FLOAT_MODE_P (mode))
20117 {
20118 rtx compare_op;
20119 rtx_insn *compare_seq;
20120
20121 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20122
20123 /* Shortcut: following common codes never translate
20124 into carry flag compares. */
20125 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20126 || code == ORDERED || code == UNORDERED)
20127 return false;
20128
20129 /* These comparisons require zero flag; swap operands so they won't. */
20130 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20131 && !TARGET_IEEE_FP)
20132 {
20133 rtx tmp = op0;
20134 op0 = op1;
20135 op1 = tmp;
20136 code = swap_condition (code);
20137 }
20138
20139 /* Try to expand the comparison and verify that we end up with
20140 carry flag based comparison. This fails to be true only when
20141 we decide to expand comparison using arithmetic that is not
20142 too common scenario. */
20143 start_sequence ();
20144 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20145 compare_seq = get_insns ();
20146 end_sequence ();
20147
20148 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20149 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20150 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20151 else
20152 code = GET_CODE (compare_op);
20153
20154 if (code != LTU && code != GEU)
20155 return false;
20156
20157 emit_insn (compare_seq);
20158 *pop = compare_op;
20159 return true;
20160 }
20161
20162 if (!INTEGRAL_MODE_P (mode))
20163 return false;
20164
20165 switch (code)
20166 {
20167 case LTU:
20168 case GEU:
20169 break;
20170
20171 /* Convert a==0 into (unsigned)a<1. */
20172 case EQ:
20173 case NE:
20174 if (op1 != const0_rtx)
20175 return false;
20176 op1 = const1_rtx;
20177 code = (code == EQ ? LTU : GEU);
20178 break;
20179
20180 /* Convert a>b into b<a or a>=b-1. */
20181 case GTU:
20182 case LEU:
20183 if (CONST_INT_P (op1))
20184 {
20185 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20186 /* Bail out on overflow. We still can swap operands but that
20187 would force loading of the constant into register. */
20188 if (op1 == const0_rtx
20189 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20190 return false;
20191 code = (code == GTU ? GEU : LTU);
20192 }
20193 else
20194 {
20195 rtx tmp = op1;
20196 op1 = op0;
20197 op0 = tmp;
20198 code = (code == GTU ? LTU : GEU);
20199 }
20200 break;
20201
20202 /* Convert a>=0 into (unsigned)a<0x80000000. */
20203 case LT:
20204 case GE:
20205 if (mode == DImode || op1 != const0_rtx)
20206 return false;
20207 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20208 code = (code == LT ? GEU : LTU);
20209 break;
20210 case LE:
20211 case GT:
20212 if (mode == DImode || op1 != constm1_rtx)
20213 return false;
20214 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20215 code = (code == LE ? GEU : LTU);
20216 break;
20217
20218 default:
20219 return false;
20220 }
20221 /* Swapping operands may cause constant to appear as first operand. */
20222 if (!nonimmediate_operand (op0, VOIDmode))
20223 {
20224 if (!can_create_pseudo_p ())
20225 return false;
20226 op0 = force_reg (mode, op0);
20227 }
20228 *pop = ix86_expand_compare (code, op0, op1);
20229 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20230 return true;
20231 }
20232
20233 bool
20234 ix86_expand_int_movcc (rtx operands[])
20235 {
20236 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20237 rtx_insn *compare_seq;
20238 rtx compare_op;
20239 enum machine_mode mode = GET_MODE (operands[0]);
20240 bool sign_bit_compare_p = false;
20241 rtx op0 = XEXP (operands[1], 0);
20242 rtx op1 = XEXP (operands[1], 1);
20243
20244 if (GET_MODE (op0) == TImode
20245 || (GET_MODE (op0) == DImode
20246 && !TARGET_64BIT))
20247 return false;
20248
20249 start_sequence ();
20250 compare_op = ix86_expand_compare (code, op0, op1);
20251 compare_seq = get_insns ();
20252 end_sequence ();
20253
20254 compare_code = GET_CODE (compare_op);
20255
20256 if ((op1 == const0_rtx && (code == GE || code == LT))
20257 || (op1 == constm1_rtx && (code == GT || code == LE)))
20258 sign_bit_compare_p = true;
20259
20260 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20261 HImode insns, we'd be swallowed in word prefix ops. */
20262
20263 if ((mode != HImode || TARGET_FAST_PREFIX)
20264 && (mode != (TARGET_64BIT ? TImode : DImode))
20265 && CONST_INT_P (operands[2])
20266 && CONST_INT_P (operands[3]))
20267 {
20268 rtx out = operands[0];
20269 HOST_WIDE_INT ct = INTVAL (operands[2]);
20270 HOST_WIDE_INT cf = INTVAL (operands[3]);
20271 HOST_WIDE_INT diff;
20272
20273 diff = ct - cf;
20274 /* Sign bit compares are better done using shifts than we do by using
20275 sbb. */
20276 if (sign_bit_compare_p
20277 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20278 {
20279 /* Detect overlap between destination and compare sources. */
20280 rtx tmp = out;
20281
20282 if (!sign_bit_compare_p)
20283 {
20284 rtx flags;
20285 bool fpcmp = false;
20286
20287 compare_code = GET_CODE (compare_op);
20288
20289 flags = XEXP (compare_op, 0);
20290
20291 if (GET_MODE (flags) == CCFPmode
20292 || GET_MODE (flags) == CCFPUmode)
20293 {
20294 fpcmp = true;
20295 compare_code
20296 = ix86_fp_compare_code_to_integer (compare_code);
20297 }
20298
20299 /* To simplify rest of code, restrict to the GEU case. */
20300 if (compare_code == LTU)
20301 {
20302 HOST_WIDE_INT tmp = ct;
20303 ct = cf;
20304 cf = tmp;
20305 compare_code = reverse_condition (compare_code);
20306 code = reverse_condition (code);
20307 }
20308 else
20309 {
20310 if (fpcmp)
20311 PUT_CODE (compare_op,
20312 reverse_condition_maybe_unordered
20313 (GET_CODE (compare_op)));
20314 else
20315 PUT_CODE (compare_op,
20316 reverse_condition (GET_CODE (compare_op)));
20317 }
20318 diff = ct - cf;
20319
20320 if (reg_overlap_mentioned_p (out, op0)
20321 || reg_overlap_mentioned_p (out, op1))
20322 tmp = gen_reg_rtx (mode);
20323
20324 if (mode == DImode)
20325 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20326 else
20327 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20328 flags, compare_op));
20329 }
20330 else
20331 {
20332 if (code == GT || code == GE)
20333 code = reverse_condition (code);
20334 else
20335 {
20336 HOST_WIDE_INT tmp = ct;
20337 ct = cf;
20338 cf = tmp;
20339 diff = ct - cf;
20340 }
20341 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20342 }
20343
20344 if (diff == 1)
20345 {
20346 /*
20347 * cmpl op0,op1
20348 * sbbl dest,dest
20349 * [addl dest, ct]
20350 *
20351 * Size 5 - 8.
20352 */
20353 if (ct)
20354 tmp = expand_simple_binop (mode, PLUS,
20355 tmp, GEN_INT (ct),
20356 copy_rtx (tmp), 1, OPTAB_DIRECT);
20357 }
20358 else if (cf == -1)
20359 {
20360 /*
20361 * cmpl op0,op1
20362 * sbbl dest,dest
20363 * orl $ct, dest
20364 *
20365 * Size 8.
20366 */
20367 tmp = expand_simple_binop (mode, IOR,
20368 tmp, GEN_INT (ct),
20369 copy_rtx (tmp), 1, OPTAB_DIRECT);
20370 }
20371 else if (diff == -1 && ct)
20372 {
20373 /*
20374 * cmpl op0,op1
20375 * sbbl dest,dest
20376 * notl dest
20377 * [addl dest, cf]
20378 *
20379 * Size 8 - 11.
20380 */
20381 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20382 if (cf)
20383 tmp = expand_simple_binop (mode, PLUS,
20384 copy_rtx (tmp), GEN_INT (cf),
20385 copy_rtx (tmp), 1, OPTAB_DIRECT);
20386 }
20387 else
20388 {
20389 /*
20390 * cmpl op0,op1
20391 * sbbl dest,dest
20392 * [notl dest]
20393 * andl cf - ct, dest
20394 * [addl dest, ct]
20395 *
20396 * Size 8 - 11.
20397 */
20398
20399 if (cf == 0)
20400 {
20401 cf = ct;
20402 ct = 0;
20403 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20404 }
20405
20406 tmp = expand_simple_binop (mode, AND,
20407 copy_rtx (tmp),
20408 gen_int_mode (cf - ct, mode),
20409 copy_rtx (tmp), 1, OPTAB_DIRECT);
20410 if (ct)
20411 tmp = expand_simple_binop (mode, PLUS,
20412 copy_rtx (tmp), GEN_INT (ct),
20413 copy_rtx (tmp), 1, OPTAB_DIRECT);
20414 }
20415
20416 if (!rtx_equal_p (tmp, out))
20417 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20418
20419 return true;
20420 }
20421
20422 if (diff < 0)
20423 {
20424 enum machine_mode cmp_mode = GET_MODE (op0);
20425
20426 HOST_WIDE_INT tmp;
20427 tmp = ct, ct = cf, cf = tmp;
20428 diff = -diff;
20429
20430 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20431 {
20432 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20433
20434 /* We may be reversing unordered compare to normal compare, that
20435 is not valid in general (we may convert non-trapping condition
20436 to trapping one), however on i386 we currently emit all
20437 comparisons unordered. */
20438 compare_code = reverse_condition_maybe_unordered (compare_code);
20439 code = reverse_condition_maybe_unordered (code);
20440 }
20441 else
20442 {
20443 compare_code = reverse_condition (compare_code);
20444 code = reverse_condition (code);
20445 }
20446 }
20447
20448 compare_code = UNKNOWN;
20449 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20450 && CONST_INT_P (op1))
20451 {
20452 if (op1 == const0_rtx
20453 && (code == LT || code == GE))
20454 compare_code = code;
20455 else if (op1 == constm1_rtx)
20456 {
20457 if (code == LE)
20458 compare_code = LT;
20459 else if (code == GT)
20460 compare_code = GE;
20461 }
20462 }
20463
20464 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20465 if (compare_code != UNKNOWN
20466 && GET_MODE (op0) == GET_MODE (out)
20467 && (cf == -1 || ct == -1))
20468 {
20469 /* If lea code below could be used, only optimize
20470 if it results in a 2 insn sequence. */
20471
20472 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20473 || diff == 3 || diff == 5 || diff == 9)
20474 || (compare_code == LT && ct == -1)
20475 || (compare_code == GE && cf == -1))
20476 {
20477 /*
20478 * notl op1 (if necessary)
20479 * sarl $31, op1
20480 * orl cf, op1
20481 */
20482 if (ct != -1)
20483 {
20484 cf = ct;
20485 ct = -1;
20486 code = reverse_condition (code);
20487 }
20488
20489 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20490
20491 out = expand_simple_binop (mode, IOR,
20492 out, GEN_INT (cf),
20493 out, 1, OPTAB_DIRECT);
20494 if (out != operands[0])
20495 emit_move_insn (operands[0], out);
20496
20497 return true;
20498 }
20499 }
20500
20501
20502 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20503 || diff == 3 || diff == 5 || diff == 9)
20504 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20505 && (mode != DImode
20506 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20507 {
20508 /*
20509 * xorl dest,dest
20510 * cmpl op1,op2
20511 * setcc dest
20512 * lea cf(dest*(ct-cf)),dest
20513 *
20514 * Size 14.
20515 *
20516 * This also catches the degenerate setcc-only case.
20517 */
20518
20519 rtx tmp;
20520 int nops;
20521
20522 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20523
20524 nops = 0;
20525 /* On x86_64 the lea instruction operates on Pmode, so we need
20526 to get arithmetics done in proper mode to match. */
20527 if (diff == 1)
20528 tmp = copy_rtx (out);
20529 else
20530 {
20531 rtx out1;
20532 out1 = copy_rtx (out);
20533 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20534 nops++;
20535 if (diff & 1)
20536 {
20537 tmp = gen_rtx_PLUS (mode, tmp, out1);
20538 nops++;
20539 }
20540 }
20541 if (cf != 0)
20542 {
20543 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20544 nops++;
20545 }
20546 if (!rtx_equal_p (tmp, out))
20547 {
20548 if (nops == 1)
20549 out = force_operand (tmp, copy_rtx (out));
20550 else
20551 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20552 }
20553 if (!rtx_equal_p (out, operands[0]))
20554 emit_move_insn (operands[0], copy_rtx (out));
20555
20556 return true;
20557 }
20558
20559 /*
20560 * General case: Jumpful:
20561 * xorl dest,dest cmpl op1, op2
20562 * cmpl op1, op2 movl ct, dest
20563 * setcc dest jcc 1f
20564 * decl dest movl cf, dest
20565 * andl (cf-ct),dest 1:
20566 * addl ct,dest
20567 *
20568 * Size 20. Size 14.
20569 *
20570 * This is reasonably steep, but branch mispredict costs are
20571 * high on modern cpus, so consider failing only if optimizing
20572 * for space.
20573 */
20574
20575 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20576 && BRANCH_COST (optimize_insn_for_speed_p (),
20577 false) >= 2)
20578 {
20579 if (cf == 0)
20580 {
20581 enum machine_mode cmp_mode = GET_MODE (op0);
20582
20583 cf = ct;
20584 ct = 0;
20585
20586 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20587 {
20588 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20589
20590 /* We may be reversing unordered compare to normal compare,
20591 that is not valid in general (we may convert non-trapping
20592 condition to trapping one), however on i386 we currently
20593 emit all comparisons unordered. */
20594 code = reverse_condition_maybe_unordered (code);
20595 }
20596 else
20597 {
20598 code = reverse_condition (code);
20599 if (compare_code != UNKNOWN)
20600 compare_code = reverse_condition (compare_code);
20601 }
20602 }
20603
20604 if (compare_code != UNKNOWN)
20605 {
20606 /* notl op1 (if needed)
20607 sarl $31, op1
20608 andl (cf-ct), op1
20609 addl ct, op1
20610
20611 For x < 0 (resp. x <= -1) there will be no notl,
20612 so if possible swap the constants to get rid of the
20613 complement.
20614 True/false will be -1/0 while code below (store flag
20615 followed by decrement) is 0/-1, so the constants need
20616 to be exchanged once more. */
20617
20618 if (compare_code == GE || !cf)
20619 {
20620 code = reverse_condition (code);
20621 compare_code = LT;
20622 }
20623 else
20624 {
20625 HOST_WIDE_INT tmp = cf;
20626 cf = ct;
20627 ct = tmp;
20628 }
20629
20630 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20631 }
20632 else
20633 {
20634 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20635
20636 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20637 constm1_rtx,
20638 copy_rtx (out), 1, OPTAB_DIRECT);
20639 }
20640
20641 out = expand_simple_binop (mode, AND, copy_rtx (out),
20642 gen_int_mode (cf - ct, mode),
20643 copy_rtx (out), 1, OPTAB_DIRECT);
20644 if (ct)
20645 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20646 copy_rtx (out), 1, OPTAB_DIRECT);
20647 if (!rtx_equal_p (out, operands[0]))
20648 emit_move_insn (operands[0], copy_rtx (out));
20649
20650 return true;
20651 }
20652 }
20653
20654 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20655 {
20656 /* Try a few things more with specific constants and a variable. */
20657
20658 optab op;
20659 rtx var, orig_out, out, tmp;
20660
20661 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20662 return false;
20663
20664 /* If one of the two operands is an interesting constant, load a
20665 constant with the above and mask it in with a logical operation. */
20666
20667 if (CONST_INT_P (operands[2]))
20668 {
20669 var = operands[3];
20670 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20671 operands[3] = constm1_rtx, op = and_optab;
20672 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20673 operands[3] = const0_rtx, op = ior_optab;
20674 else
20675 return false;
20676 }
20677 else if (CONST_INT_P (operands[3]))
20678 {
20679 var = operands[2];
20680 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20681 operands[2] = constm1_rtx, op = and_optab;
20682 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20683 operands[2] = const0_rtx, op = ior_optab;
20684 else
20685 return false;
20686 }
20687 else
20688 return false;
20689
20690 orig_out = operands[0];
20691 tmp = gen_reg_rtx (mode);
20692 operands[0] = tmp;
20693
20694 /* Recurse to get the constant loaded. */
20695 if (ix86_expand_int_movcc (operands) == 0)
20696 return false;
20697
20698 /* Mask in the interesting variable. */
20699 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20700 OPTAB_WIDEN);
20701 if (!rtx_equal_p (out, orig_out))
20702 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20703
20704 return true;
20705 }
20706
20707 /*
20708 * For comparison with above,
20709 *
20710 * movl cf,dest
20711 * movl ct,tmp
20712 * cmpl op1,op2
20713 * cmovcc tmp,dest
20714 *
20715 * Size 15.
20716 */
20717
20718 if (! nonimmediate_operand (operands[2], mode))
20719 operands[2] = force_reg (mode, operands[2]);
20720 if (! nonimmediate_operand (operands[3], mode))
20721 operands[3] = force_reg (mode, operands[3]);
20722
20723 if (! register_operand (operands[2], VOIDmode)
20724 && (mode == QImode
20725 || ! register_operand (operands[3], VOIDmode)))
20726 operands[2] = force_reg (mode, operands[2]);
20727
20728 if (mode == QImode
20729 && ! register_operand (operands[3], VOIDmode))
20730 operands[3] = force_reg (mode, operands[3]);
20731
20732 emit_insn (compare_seq);
20733 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20734 gen_rtx_IF_THEN_ELSE (mode,
20735 compare_op, operands[2],
20736 operands[3])));
20737 return true;
20738 }
20739
20740 /* Swap, force into registers, or otherwise massage the two operands
20741 to an sse comparison with a mask result. Thus we differ a bit from
20742 ix86_prepare_fp_compare_args which expects to produce a flags result.
20743
20744 The DEST operand exists to help determine whether to commute commutative
20745 operators. The POP0/POP1 operands are updated in place. The new
20746 comparison code is returned, or UNKNOWN if not implementable. */
20747
20748 static enum rtx_code
20749 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20750 rtx *pop0, rtx *pop1)
20751 {
20752 rtx tmp;
20753
20754 switch (code)
20755 {
20756 case LTGT:
20757 case UNEQ:
20758 /* AVX supports all the needed comparisons. */
20759 if (TARGET_AVX)
20760 break;
20761 /* We have no LTGT as an operator. We could implement it with
20762 NE & ORDERED, but this requires an extra temporary. It's
20763 not clear that it's worth it. */
20764 return UNKNOWN;
20765
20766 case LT:
20767 case LE:
20768 case UNGT:
20769 case UNGE:
20770 /* These are supported directly. */
20771 break;
20772
20773 case EQ:
20774 case NE:
20775 case UNORDERED:
20776 case ORDERED:
20777 /* AVX has 3 operand comparisons, no need to swap anything. */
20778 if (TARGET_AVX)
20779 break;
20780 /* For commutative operators, try to canonicalize the destination
20781 operand to be first in the comparison - this helps reload to
20782 avoid extra moves. */
20783 if (!dest || !rtx_equal_p (dest, *pop1))
20784 break;
20785 /* FALLTHRU */
20786
20787 case GE:
20788 case GT:
20789 case UNLE:
20790 case UNLT:
20791 /* These are not supported directly before AVX, and furthermore
20792 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20793 comparison operands to transform into something that is
20794 supported. */
20795 tmp = *pop0;
20796 *pop0 = *pop1;
20797 *pop1 = tmp;
20798 code = swap_condition (code);
20799 break;
20800
20801 default:
20802 gcc_unreachable ();
20803 }
20804
20805 return code;
20806 }
20807
20808 /* Detect conditional moves that exactly match min/max operational
20809 semantics. Note that this is IEEE safe, as long as we don't
20810 interchange the operands.
20811
20812 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20813 and TRUE if the operation is successful and instructions are emitted. */
20814
20815 static bool
20816 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20817 rtx cmp_op1, rtx if_true, rtx if_false)
20818 {
20819 enum machine_mode mode;
20820 bool is_min;
20821 rtx tmp;
20822
20823 if (code == LT)
20824 ;
20825 else if (code == UNGE)
20826 {
20827 tmp = if_true;
20828 if_true = if_false;
20829 if_false = tmp;
20830 }
20831 else
20832 return false;
20833
20834 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20835 is_min = true;
20836 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20837 is_min = false;
20838 else
20839 return false;
20840
20841 mode = GET_MODE (dest);
20842
20843 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20844 but MODE may be a vector mode and thus not appropriate. */
20845 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20846 {
20847 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20848 rtvec v;
20849
20850 if_true = force_reg (mode, if_true);
20851 v = gen_rtvec (2, if_true, if_false);
20852 tmp = gen_rtx_UNSPEC (mode, v, u);
20853 }
20854 else
20855 {
20856 code = is_min ? SMIN : SMAX;
20857 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20858 }
20859
20860 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20861 return true;
20862 }
20863
20864 /* Expand an sse vector comparison. Return the register with the result. */
20865
20866 static rtx
20867 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20868 rtx op_true, rtx op_false)
20869 {
20870 enum machine_mode mode = GET_MODE (dest);
20871 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20872
20873 /* In general case result of comparison can differ from operands' type. */
20874 enum machine_mode cmp_mode;
20875
20876 /* In AVX512F the result of comparison is an integer mask. */
20877 bool maskcmp = false;
20878 rtx x;
20879
20880 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20881 {
20882 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20883 gcc_assert (cmp_mode != BLKmode);
20884
20885 maskcmp = true;
20886 }
20887 else
20888 cmp_mode = cmp_ops_mode;
20889
20890
20891 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20892 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20893 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20894
20895 if (optimize
20896 || reg_overlap_mentioned_p (dest, op_true)
20897 || reg_overlap_mentioned_p (dest, op_false))
20898 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20899
20900 /* Compare patterns for int modes are unspec in AVX512F only. */
20901 if (maskcmp && (code == GT || code == EQ))
20902 {
20903 rtx (*gen)(rtx, rtx, rtx);
20904
20905 switch (cmp_ops_mode)
20906 {
20907 case V16SImode:
20908 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20909 break;
20910 case V8DImode:
20911 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20912 break;
20913 default:
20914 gen = NULL;
20915 }
20916
20917 if (gen)
20918 {
20919 emit_insn (gen (dest, cmp_op0, cmp_op1));
20920 return dest;
20921 }
20922 }
20923 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20924
20925 if (cmp_mode != mode && !maskcmp)
20926 {
20927 x = force_reg (cmp_ops_mode, x);
20928 convert_move (dest, x, false);
20929 }
20930 else
20931 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20932
20933 return dest;
20934 }
20935
20936 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20937 operations. This is used for both scalar and vector conditional moves. */
20938
20939 static void
20940 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20941 {
20942 enum machine_mode mode = GET_MODE (dest);
20943 enum machine_mode cmpmode = GET_MODE (cmp);
20944
20945 /* In AVX512F the result of comparison is an integer mask. */
20946 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20947
20948 rtx t2, t3, x;
20949
20950 if (vector_all_ones_operand (op_true, mode)
20951 && rtx_equal_p (op_false, CONST0_RTX (mode))
20952 && !maskcmp)
20953 {
20954 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20955 }
20956 else if (op_false == CONST0_RTX (mode)
20957 && !maskcmp)
20958 {
20959 op_true = force_reg (mode, op_true);
20960 x = gen_rtx_AND (mode, cmp, op_true);
20961 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20962 }
20963 else if (op_true == CONST0_RTX (mode)
20964 && !maskcmp)
20965 {
20966 op_false = force_reg (mode, op_false);
20967 x = gen_rtx_NOT (mode, cmp);
20968 x = gen_rtx_AND (mode, x, op_false);
20969 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20970 }
20971 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20972 && !maskcmp)
20973 {
20974 op_false = force_reg (mode, op_false);
20975 x = gen_rtx_IOR (mode, cmp, op_false);
20976 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20977 }
20978 else if (TARGET_XOP
20979 && !maskcmp)
20980 {
20981 op_true = force_reg (mode, op_true);
20982
20983 if (!nonimmediate_operand (op_false, mode))
20984 op_false = force_reg (mode, op_false);
20985
20986 emit_insn (gen_rtx_SET (mode, dest,
20987 gen_rtx_IF_THEN_ELSE (mode, cmp,
20988 op_true,
20989 op_false)));
20990 }
20991 else
20992 {
20993 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20994 rtx d = dest;
20995
20996 if (!nonimmediate_operand (op_true, mode))
20997 op_true = force_reg (mode, op_true);
20998
20999 op_false = force_reg (mode, op_false);
21000
21001 switch (mode)
21002 {
21003 case V4SFmode:
21004 if (TARGET_SSE4_1)
21005 gen = gen_sse4_1_blendvps;
21006 break;
21007 case V2DFmode:
21008 if (TARGET_SSE4_1)
21009 gen = gen_sse4_1_blendvpd;
21010 break;
21011 case V16QImode:
21012 case V8HImode:
21013 case V4SImode:
21014 case V2DImode:
21015 if (TARGET_SSE4_1)
21016 {
21017 gen = gen_sse4_1_pblendvb;
21018 if (mode != V16QImode)
21019 d = gen_reg_rtx (V16QImode);
21020 op_false = gen_lowpart (V16QImode, op_false);
21021 op_true = gen_lowpart (V16QImode, op_true);
21022 cmp = gen_lowpart (V16QImode, cmp);
21023 }
21024 break;
21025 case V8SFmode:
21026 if (TARGET_AVX)
21027 gen = gen_avx_blendvps256;
21028 break;
21029 case V4DFmode:
21030 if (TARGET_AVX)
21031 gen = gen_avx_blendvpd256;
21032 break;
21033 case V32QImode:
21034 case V16HImode:
21035 case V8SImode:
21036 case V4DImode:
21037 if (TARGET_AVX2)
21038 {
21039 gen = gen_avx2_pblendvb;
21040 if (mode != V32QImode)
21041 d = gen_reg_rtx (V32QImode);
21042 op_false = gen_lowpart (V32QImode, op_false);
21043 op_true = gen_lowpart (V32QImode, op_true);
21044 cmp = gen_lowpart (V32QImode, cmp);
21045 }
21046 break;
21047
21048 case V64QImode:
21049 gen = gen_avx512bw_blendmv64qi;
21050 break;
21051 case V32HImode:
21052 gen = gen_avx512bw_blendmv32hi;
21053 break;
21054 case V16SImode:
21055 gen = gen_avx512f_blendmv16si;
21056 break;
21057 case V8DImode:
21058 gen = gen_avx512f_blendmv8di;
21059 break;
21060 case V8DFmode:
21061 gen = gen_avx512f_blendmv8df;
21062 break;
21063 case V16SFmode:
21064 gen = gen_avx512f_blendmv16sf;
21065 break;
21066
21067 default:
21068 break;
21069 }
21070
21071 if (gen != NULL)
21072 {
21073 emit_insn (gen (d, op_false, op_true, cmp));
21074 if (d != dest)
21075 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21076 }
21077 else
21078 {
21079 op_true = force_reg (mode, op_true);
21080
21081 t2 = gen_reg_rtx (mode);
21082 if (optimize)
21083 t3 = gen_reg_rtx (mode);
21084 else
21085 t3 = dest;
21086
21087 x = gen_rtx_AND (mode, op_true, cmp);
21088 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21089
21090 x = gen_rtx_NOT (mode, cmp);
21091 x = gen_rtx_AND (mode, x, op_false);
21092 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21093
21094 x = gen_rtx_IOR (mode, t3, t2);
21095 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21096 }
21097 }
21098 }
21099
21100 /* Expand a floating-point conditional move. Return true if successful. */
21101
21102 bool
21103 ix86_expand_fp_movcc (rtx operands[])
21104 {
21105 enum machine_mode mode = GET_MODE (operands[0]);
21106 enum rtx_code code = GET_CODE (operands[1]);
21107 rtx tmp, compare_op;
21108 rtx op0 = XEXP (operands[1], 0);
21109 rtx op1 = XEXP (operands[1], 1);
21110
21111 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21112 {
21113 enum machine_mode cmode;
21114
21115 /* Since we've no cmove for sse registers, don't force bad register
21116 allocation just to gain access to it. Deny movcc when the
21117 comparison mode doesn't match the move mode. */
21118 cmode = GET_MODE (op0);
21119 if (cmode == VOIDmode)
21120 cmode = GET_MODE (op1);
21121 if (cmode != mode)
21122 return false;
21123
21124 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21125 if (code == UNKNOWN)
21126 return false;
21127
21128 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21129 operands[2], operands[3]))
21130 return true;
21131
21132 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21133 operands[2], operands[3]);
21134 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21135 return true;
21136 }
21137
21138 if (GET_MODE (op0) == TImode
21139 || (GET_MODE (op0) == DImode
21140 && !TARGET_64BIT))
21141 return false;
21142
21143 /* The floating point conditional move instructions don't directly
21144 support conditions resulting from a signed integer comparison. */
21145
21146 compare_op = ix86_expand_compare (code, op0, op1);
21147 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21148 {
21149 tmp = gen_reg_rtx (QImode);
21150 ix86_expand_setcc (tmp, code, op0, op1);
21151
21152 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21153 }
21154
21155 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21156 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21157 operands[2], operands[3])));
21158
21159 return true;
21160 }
21161
21162 /* Expand a floating-point vector conditional move; a vcond operation
21163 rather than a movcc operation. */
21164
21165 bool
21166 ix86_expand_fp_vcond (rtx operands[])
21167 {
21168 enum rtx_code code = GET_CODE (operands[3]);
21169 rtx cmp;
21170
21171 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21172 &operands[4], &operands[5]);
21173 if (code == UNKNOWN)
21174 {
21175 rtx temp;
21176 switch (GET_CODE (operands[3]))
21177 {
21178 case LTGT:
21179 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21180 operands[5], operands[0], operands[0]);
21181 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21182 operands[5], operands[1], operands[2]);
21183 code = AND;
21184 break;
21185 case UNEQ:
21186 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21187 operands[5], operands[0], operands[0]);
21188 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21189 operands[5], operands[1], operands[2]);
21190 code = IOR;
21191 break;
21192 default:
21193 gcc_unreachable ();
21194 }
21195 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21196 OPTAB_DIRECT);
21197 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21198 return true;
21199 }
21200
21201 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21202 operands[5], operands[1], operands[2]))
21203 return true;
21204
21205 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21206 operands[1], operands[2]);
21207 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21208 return true;
21209 }
21210
21211 /* Expand a signed/unsigned integral vector conditional move. */
21212
21213 bool
21214 ix86_expand_int_vcond (rtx operands[])
21215 {
21216 enum machine_mode data_mode = GET_MODE (operands[0]);
21217 enum machine_mode mode = GET_MODE (operands[4]);
21218 enum rtx_code code = GET_CODE (operands[3]);
21219 bool negate = false;
21220 rtx x, cop0, cop1;
21221
21222 cop0 = operands[4];
21223 cop1 = operands[5];
21224
21225 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21226 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21227 if ((code == LT || code == GE)
21228 && data_mode == mode
21229 && cop1 == CONST0_RTX (mode)
21230 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21231 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21232 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21233 && (GET_MODE_SIZE (data_mode) == 16
21234 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21235 {
21236 rtx negop = operands[2 - (code == LT)];
21237 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21238 if (negop == CONST1_RTX (data_mode))
21239 {
21240 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21241 operands[0], 1, OPTAB_DIRECT);
21242 if (res != operands[0])
21243 emit_move_insn (operands[0], res);
21244 return true;
21245 }
21246 else if (GET_MODE_INNER (data_mode) != DImode
21247 && vector_all_ones_operand (negop, data_mode))
21248 {
21249 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21250 operands[0], 0, OPTAB_DIRECT);
21251 if (res != operands[0])
21252 emit_move_insn (operands[0], res);
21253 return true;
21254 }
21255 }
21256
21257 if (!nonimmediate_operand (cop1, mode))
21258 cop1 = force_reg (mode, cop1);
21259 if (!general_operand (operands[1], data_mode))
21260 operands[1] = force_reg (data_mode, operands[1]);
21261 if (!general_operand (operands[2], data_mode))
21262 operands[2] = force_reg (data_mode, operands[2]);
21263
21264 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21265 if (TARGET_XOP
21266 && (mode == V16QImode || mode == V8HImode
21267 || mode == V4SImode || mode == V2DImode))
21268 ;
21269 else
21270 {
21271 /* Canonicalize the comparison to EQ, GT, GTU. */
21272 switch (code)
21273 {
21274 case EQ:
21275 case GT:
21276 case GTU:
21277 break;
21278
21279 case NE:
21280 case LE:
21281 case LEU:
21282 code = reverse_condition (code);
21283 negate = true;
21284 break;
21285
21286 case GE:
21287 case GEU:
21288 code = reverse_condition (code);
21289 negate = true;
21290 /* FALLTHRU */
21291
21292 case LT:
21293 case LTU:
21294 code = swap_condition (code);
21295 x = cop0, cop0 = cop1, cop1 = x;
21296 break;
21297
21298 default:
21299 gcc_unreachable ();
21300 }
21301
21302 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21303 if (mode == V2DImode)
21304 {
21305 switch (code)
21306 {
21307 case EQ:
21308 /* SSE4.1 supports EQ. */
21309 if (!TARGET_SSE4_1)
21310 return false;
21311 break;
21312
21313 case GT:
21314 case GTU:
21315 /* SSE4.2 supports GT/GTU. */
21316 if (!TARGET_SSE4_2)
21317 return false;
21318 break;
21319
21320 default:
21321 gcc_unreachable ();
21322 }
21323 }
21324
21325 /* Unsigned parallel compare is not supported by the hardware.
21326 Play some tricks to turn this into a signed comparison
21327 against 0. */
21328 if (code == GTU)
21329 {
21330 cop0 = force_reg (mode, cop0);
21331
21332 switch (mode)
21333 {
21334 case V16SImode:
21335 case V8DImode:
21336 case V8SImode:
21337 case V4DImode:
21338 case V4SImode:
21339 case V2DImode:
21340 {
21341 rtx t1, t2, mask;
21342 rtx (*gen_sub3) (rtx, rtx, rtx);
21343
21344 switch (mode)
21345 {
21346 case V16SImode: gen_sub3 = gen_subv16si3; break;
21347 case V8DImode: gen_sub3 = gen_subv8di3; break;
21348 case V8SImode: gen_sub3 = gen_subv8si3; break;
21349 case V4DImode: gen_sub3 = gen_subv4di3; break;
21350 case V4SImode: gen_sub3 = gen_subv4si3; break;
21351 case V2DImode: gen_sub3 = gen_subv2di3; break;
21352 default:
21353 gcc_unreachable ();
21354 }
21355 /* Subtract (-(INT MAX) - 1) from both operands to make
21356 them signed. */
21357 mask = ix86_build_signbit_mask (mode, true, false);
21358 t1 = gen_reg_rtx (mode);
21359 emit_insn (gen_sub3 (t1, cop0, mask));
21360
21361 t2 = gen_reg_rtx (mode);
21362 emit_insn (gen_sub3 (t2, cop1, mask));
21363
21364 cop0 = t1;
21365 cop1 = t2;
21366 code = GT;
21367 }
21368 break;
21369
21370 case V64QImode:
21371 case V32HImode:
21372 case V32QImode:
21373 case V16HImode:
21374 case V16QImode:
21375 case V8HImode:
21376 /* Perform a parallel unsigned saturating subtraction. */
21377 x = gen_reg_rtx (mode);
21378 emit_insn (gen_rtx_SET (VOIDmode, x,
21379 gen_rtx_US_MINUS (mode, cop0, cop1)));
21380
21381 cop0 = x;
21382 cop1 = CONST0_RTX (mode);
21383 code = EQ;
21384 negate = !negate;
21385 break;
21386
21387 default:
21388 gcc_unreachable ();
21389 }
21390 }
21391 }
21392
21393 /* Allow the comparison to be done in one mode, but the movcc to
21394 happen in another mode. */
21395 if (data_mode == mode)
21396 {
21397 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21398 operands[1+negate], operands[2-negate]);
21399 }
21400 else
21401 {
21402 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21403 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21404 operands[1+negate], operands[2-negate]);
21405 if (GET_MODE (x) == mode)
21406 x = gen_lowpart (data_mode, x);
21407 }
21408
21409 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21410 operands[2-negate]);
21411 return true;
21412 }
21413
21414 static bool
21415 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21416 {
21417 enum machine_mode mode = GET_MODE (op0);
21418 switch (mode)
21419 {
21420 case V16SImode:
21421 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21422 force_reg (V16SImode, mask),
21423 op1));
21424 return true;
21425 case V16SFmode:
21426 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21427 force_reg (V16SImode, mask),
21428 op1));
21429 return true;
21430 case V8DImode:
21431 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21432 force_reg (V8DImode, mask),
21433 op1));
21434 return true;
21435 case V8DFmode:
21436 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21437 force_reg (V8DImode, mask),
21438 op1));
21439 return true;
21440 default:
21441 return false;
21442 }
21443 }
21444
21445 /* Expand a variable vector permutation. */
21446
21447 void
21448 ix86_expand_vec_perm (rtx operands[])
21449 {
21450 rtx target = operands[0];
21451 rtx op0 = operands[1];
21452 rtx op1 = operands[2];
21453 rtx mask = operands[3];
21454 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21455 enum machine_mode mode = GET_MODE (op0);
21456 enum machine_mode maskmode = GET_MODE (mask);
21457 int w, e, i;
21458 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21459
21460 /* Number of elements in the vector. */
21461 w = GET_MODE_NUNITS (mode);
21462 e = GET_MODE_UNIT_SIZE (mode);
21463 gcc_assert (w <= 64);
21464
21465 if (TARGET_AVX512F
21466 && ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21467 return;
21468
21469 if (TARGET_AVX2)
21470 {
21471 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21472 {
21473 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21474 an constant shuffle operand. With a tiny bit of effort we can
21475 use VPERMD instead. A re-interpretation stall for V4DFmode is
21476 unfortunate but there's no avoiding it.
21477 Similarly for V16HImode we don't have instructions for variable
21478 shuffling, while for V32QImode we can use after preparing suitable
21479 masks vpshufb; vpshufb; vpermq; vpor. */
21480
21481 if (mode == V16HImode)
21482 {
21483 maskmode = mode = V32QImode;
21484 w = 32;
21485 e = 1;
21486 }
21487 else
21488 {
21489 maskmode = mode = V8SImode;
21490 w = 8;
21491 e = 4;
21492 }
21493 t1 = gen_reg_rtx (maskmode);
21494
21495 /* Replicate the low bits of the V4DImode mask into V8SImode:
21496 mask = { A B C D }
21497 t1 = { A A B B C C D D }. */
21498 for (i = 0; i < w / 2; ++i)
21499 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21500 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21501 vt = force_reg (maskmode, vt);
21502 mask = gen_lowpart (maskmode, mask);
21503 if (maskmode == V8SImode)
21504 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21505 else
21506 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21507
21508 /* Multiply the shuffle indicies by two. */
21509 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21510 OPTAB_DIRECT);
21511
21512 /* Add one to the odd shuffle indicies:
21513 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21514 for (i = 0; i < w / 2; ++i)
21515 {
21516 vec[i * 2] = const0_rtx;
21517 vec[i * 2 + 1] = const1_rtx;
21518 }
21519 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21520 vt = validize_mem (force_const_mem (maskmode, vt));
21521 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21522 OPTAB_DIRECT);
21523
21524 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21525 operands[3] = mask = t1;
21526 target = gen_reg_rtx (mode);
21527 op0 = gen_lowpart (mode, op0);
21528 op1 = gen_lowpart (mode, op1);
21529 }
21530
21531 switch (mode)
21532 {
21533 case V8SImode:
21534 /* The VPERMD and VPERMPS instructions already properly ignore
21535 the high bits of the shuffle elements. No need for us to
21536 perform an AND ourselves. */
21537 if (one_operand_shuffle)
21538 {
21539 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21540 if (target != operands[0])
21541 emit_move_insn (operands[0],
21542 gen_lowpart (GET_MODE (operands[0]), target));
21543 }
21544 else
21545 {
21546 t1 = gen_reg_rtx (V8SImode);
21547 t2 = gen_reg_rtx (V8SImode);
21548 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21549 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21550 goto merge_two;
21551 }
21552 return;
21553
21554 case V8SFmode:
21555 mask = gen_lowpart (V8SImode, mask);
21556 if (one_operand_shuffle)
21557 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21558 else
21559 {
21560 t1 = gen_reg_rtx (V8SFmode);
21561 t2 = gen_reg_rtx (V8SFmode);
21562 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21563 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21564 goto merge_two;
21565 }
21566 return;
21567
21568 case V4SImode:
21569 /* By combining the two 128-bit input vectors into one 256-bit
21570 input vector, we can use VPERMD and VPERMPS for the full
21571 two-operand shuffle. */
21572 t1 = gen_reg_rtx (V8SImode);
21573 t2 = gen_reg_rtx (V8SImode);
21574 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21575 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21576 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21577 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21578 return;
21579
21580 case V4SFmode:
21581 t1 = gen_reg_rtx (V8SFmode);
21582 t2 = gen_reg_rtx (V8SImode);
21583 mask = gen_lowpart (V4SImode, mask);
21584 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21585 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21586 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21587 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21588 return;
21589
21590 case V32QImode:
21591 t1 = gen_reg_rtx (V32QImode);
21592 t2 = gen_reg_rtx (V32QImode);
21593 t3 = gen_reg_rtx (V32QImode);
21594 vt2 = GEN_INT (-128);
21595 for (i = 0; i < 32; i++)
21596 vec[i] = vt2;
21597 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21598 vt = force_reg (V32QImode, vt);
21599 for (i = 0; i < 32; i++)
21600 vec[i] = i < 16 ? vt2 : const0_rtx;
21601 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21602 vt2 = force_reg (V32QImode, vt2);
21603 /* From mask create two adjusted masks, which contain the same
21604 bits as mask in the low 7 bits of each vector element.
21605 The first mask will have the most significant bit clear
21606 if it requests element from the same 128-bit lane
21607 and MSB set if it requests element from the other 128-bit lane.
21608 The second mask will have the opposite values of the MSB,
21609 and additionally will have its 128-bit lanes swapped.
21610 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21611 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21612 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21613 stands for other 12 bytes. */
21614 /* The bit whether element is from the same lane or the other
21615 lane is bit 4, so shift it up by 3 to the MSB position. */
21616 t5 = gen_reg_rtx (V4DImode);
21617 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21618 GEN_INT (3)));
21619 /* Clear MSB bits from the mask just in case it had them set. */
21620 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21621 /* After this t1 will have MSB set for elements from other lane. */
21622 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21623 /* Clear bits other than MSB. */
21624 emit_insn (gen_andv32qi3 (t1, t1, vt));
21625 /* Or in the lower bits from mask into t3. */
21626 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21627 /* And invert MSB bits in t1, so MSB is set for elements from the same
21628 lane. */
21629 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21630 /* Swap 128-bit lanes in t3. */
21631 t6 = gen_reg_rtx (V4DImode);
21632 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21633 const2_rtx, GEN_INT (3),
21634 const0_rtx, const1_rtx));
21635 /* And or in the lower bits from mask into t1. */
21636 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21637 if (one_operand_shuffle)
21638 {
21639 /* Each of these shuffles will put 0s in places where
21640 element from the other 128-bit lane is needed, otherwise
21641 will shuffle in the requested value. */
21642 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21643 gen_lowpart (V32QImode, t6)));
21644 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21645 /* For t3 the 128-bit lanes are swapped again. */
21646 t7 = gen_reg_rtx (V4DImode);
21647 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21648 const2_rtx, GEN_INT (3),
21649 const0_rtx, const1_rtx));
21650 /* And oring both together leads to the result. */
21651 emit_insn (gen_iorv32qi3 (target, t1,
21652 gen_lowpart (V32QImode, t7)));
21653 if (target != operands[0])
21654 emit_move_insn (operands[0],
21655 gen_lowpart (GET_MODE (operands[0]), target));
21656 return;
21657 }
21658
21659 t4 = gen_reg_rtx (V32QImode);
21660 /* Similarly to the above one_operand_shuffle code,
21661 just for repeated twice for each operand. merge_two:
21662 code will merge the two results together. */
21663 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21664 gen_lowpart (V32QImode, t6)));
21665 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21666 gen_lowpart (V32QImode, t6)));
21667 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21668 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21669 t7 = gen_reg_rtx (V4DImode);
21670 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21671 const2_rtx, GEN_INT (3),
21672 const0_rtx, const1_rtx));
21673 t8 = gen_reg_rtx (V4DImode);
21674 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21675 const2_rtx, GEN_INT (3),
21676 const0_rtx, const1_rtx));
21677 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21678 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21679 t1 = t4;
21680 t2 = t3;
21681 goto merge_two;
21682
21683 default:
21684 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21685 break;
21686 }
21687 }
21688
21689 if (TARGET_XOP)
21690 {
21691 /* The XOP VPPERM insn supports three inputs. By ignoring the
21692 one_operand_shuffle special case, we avoid creating another
21693 set of constant vectors in memory. */
21694 one_operand_shuffle = false;
21695
21696 /* mask = mask & {2*w-1, ...} */
21697 vt = GEN_INT (2*w - 1);
21698 }
21699 else
21700 {
21701 /* mask = mask & {w-1, ...} */
21702 vt = GEN_INT (w - 1);
21703 }
21704
21705 for (i = 0; i < w; i++)
21706 vec[i] = vt;
21707 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21708 mask = expand_simple_binop (maskmode, AND, mask, vt,
21709 NULL_RTX, 0, OPTAB_DIRECT);
21710
21711 /* For non-QImode operations, convert the word permutation control
21712 into a byte permutation control. */
21713 if (mode != V16QImode)
21714 {
21715 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21716 GEN_INT (exact_log2 (e)),
21717 NULL_RTX, 0, OPTAB_DIRECT);
21718
21719 /* Convert mask to vector of chars. */
21720 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21721
21722 /* Replicate each of the input bytes into byte positions:
21723 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21724 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21725 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21726 for (i = 0; i < 16; ++i)
21727 vec[i] = GEN_INT (i/e * e);
21728 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21729 vt = validize_mem (force_const_mem (V16QImode, vt));
21730 if (TARGET_XOP)
21731 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21732 else
21733 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21734
21735 /* Convert it into the byte positions by doing
21736 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21737 for (i = 0; i < 16; ++i)
21738 vec[i] = GEN_INT (i % e);
21739 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21740 vt = validize_mem (force_const_mem (V16QImode, vt));
21741 emit_insn (gen_addv16qi3 (mask, mask, vt));
21742 }
21743
21744 /* The actual shuffle operations all operate on V16QImode. */
21745 op0 = gen_lowpart (V16QImode, op0);
21746 op1 = gen_lowpart (V16QImode, op1);
21747
21748 if (TARGET_XOP)
21749 {
21750 if (GET_MODE (target) != V16QImode)
21751 target = gen_reg_rtx (V16QImode);
21752 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21753 if (target != operands[0])
21754 emit_move_insn (operands[0],
21755 gen_lowpart (GET_MODE (operands[0]), target));
21756 }
21757 else if (one_operand_shuffle)
21758 {
21759 if (GET_MODE (target) != V16QImode)
21760 target = gen_reg_rtx (V16QImode);
21761 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21762 if (target != operands[0])
21763 emit_move_insn (operands[0],
21764 gen_lowpart (GET_MODE (operands[0]), target));
21765 }
21766 else
21767 {
21768 rtx xops[6];
21769 bool ok;
21770
21771 /* Shuffle the two input vectors independently. */
21772 t1 = gen_reg_rtx (V16QImode);
21773 t2 = gen_reg_rtx (V16QImode);
21774 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21775 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21776
21777 merge_two:
21778 /* Then merge them together. The key is whether any given control
21779 element contained a bit set that indicates the second word. */
21780 mask = operands[3];
21781 vt = GEN_INT (w);
21782 if (maskmode == V2DImode && !TARGET_SSE4_1)
21783 {
21784 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21785 more shuffle to convert the V2DI input mask into a V4SI
21786 input mask. At which point the masking that expand_int_vcond
21787 will work as desired. */
21788 rtx t3 = gen_reg_rtx (V4SImode);
21789 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21790 const0_rtx, const0_rtx,
21791 const2_rtx, const2_rtx));
21792 mask = t3;
21793 maskmode = V4SImode;
21794 e = w = 4;
21795 }
21796
21797 for (i = 0; i < w; i++)
21798 vec[i] = vt;
21799 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21800 vt = force_reg (maskmode, vt);
21801 mask = expand_simple_binop (maskmode, AND, mask, vt,
21802 NULL_RTX, 0, OPTAB_DIRECT);
21803
21804 if (GET_MODE (target) != mode)
21805 target = gen_reg_rtx (mode);
21806 xops[0] = target;
21807 xops[1] = gen_lowpart (mode, t2);
21808 xops[2] = gen_lowpart (mode, t1);
21809 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21810 xops[4] = mask;
21811 xops[5] = vt;
21812 ok = ix86_expand_int_vcond (xops);
21813 gcc_assert (ok);
21814 if (target != operands[0])
21815 emit_move_insn (operands[0],
21816 gen_lowpart (GET_MODE (operands[0]), target));
21817 }
21818 }
21819
21820 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21821 true if we should do zero extension, else sign extension. HIGH_P is
21822 true if we want the N/2 high elements, else the low elements. */
21823
21824 void
21825 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21826 {
21827 enum machine_mode imode = GET_MODE (src);
21828 rtx tmp;
21829
21830 if (TARGET_SSE4_1)
21831 {
21832 rtx (*unpack)(rtx, rtx);
21833 rtx (*extract)(rtx, rtx) = NULL;
21834 enum machine_mode halfmode = BLKmode;
21835
21836 switch (imode)
21837 {
21838 case V32QImode:
21839 if (unsigned_p)
21840 unpack = gen_avx2_zero_extendv16qiv16hi2;
21841 else
21842 unpack = gen_avx2_sign_extendv16qiv16hi2;
21843 halfmode = V16QImode;
21844 extract
21845 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21846 break;
21847 case V32HImode:
21848 if (unsigned_p)
21849 unpack = gen_avx512f_zero_extendv16hiv16si2;
21850 else
21851 unpack = gen_avx512f_sign_extendv16hiv16si2;
21852 halfmode = V16HImode;
21853 extract
21854 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21855 break;
21856 case V16HImode:
21857 if (unsigned_p)
21858 unpack = gen_avx2_zero_extendv8hiv8si2;
21859 else
21860 unpack = gen_avx2_sign_extendv8hiv8si2;
21861 halfmode = V8HImode;
21862 extract
21863 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21864 break;
21865 case V16SImode:
21866 if (unsigned_p)
21867 unpack = gen_avx512f_zero_extendv8siv8di2;
21868 else
21869 unpack = gen_avx512f_sign_extendv8siv8di2;
21870 halfmode = V8SImode;
21871 extract
21872 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21873 break;
21874 case V8SImode:
21875 if (unsigned_p)
21876 unpack = gen_avx2_zero_extendv4siv4di2;
21877 else
21878 unpack = gen_avx2_sign_extendv4siv4di2;
21879 halfmode = V4SImode;
21880 extract
21881 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21882 break;
21883 case V16QImode:
21884 if (unsigned_p)
21885 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21886 else
21887 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21888 break;
21889 case V8HImode:
21890 if (unsigned_p)
21891 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21892 else
21893 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21894 break;
21895 case V4SImode:
21896 if (unsigned_p)
21897 unpack = gen_sse4_1_zero_extendv2siv2di2;
21898 else
21899 unpack = gen_sse4_1_sign_extendv2siv2di2;
21900 break;
21901 default:
21902 gcc_unreachable ();
21903 }
21904
21905 if (GET_MODE_SIZE (imode) >= 32)
21906 {
21907 tmp = gen_reg_rtx (halfmode);
21908 emit_insn (extract (tmp, src));
21909 }
21910 else if (high_p)
21911 {
21912 /* Shift higher 8 bytes to lower 8 bytes. */
21913 tmp = gen_reg_rtx (V1TImode);
21914 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21915 GEN_INT (64)));
21916 tmp = gen_lowpart (imode, tmp);
21917 }
21918 else
21919 tmp = src;
21920
21921 emit_insn (unpack (dest, tmp));
21922 }
21923 else
21924 {
21925 rtx (*unpack)(rtx, rtx, rtx);
21926
21927 switch (imode)
21928 {
21929 case V16QImode:
21930 if (high_p)
21931 unpack = gen_vec_interleave_highv16qi;
21932 else
21933 unpack = gen_vec_interleave_lowv16qi;
21934 break;
21935 case V8HImode:
21936 if (high_p)
21937 unpack = gen_vec_interleave_highv8hi;
21938 else
21939 unpack = gen_vec_interleave_lowv8hi;
21940 break;
21941 case V4SImode:
21942 if (high_p)
21943 unpack = gen_vec_interleave_highv4si;
21944 else
21945 unpack = gen_vec_interleave_lowv4si;
21946 break;
21947 default:
21948 gcc_unreachable ();
21949 }
21950
21951 if (unsigned_p)
21952 tmp = force_reg (imode, CONST0_RTX (imode));
21953 else
21954 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21955 src, pc_rtx, pc_rtx);
21956
21957 rtx tmp2 = gen_reg_rtx (imode);
21958 emit_insn (unpack (tmp2, src, tmp));
21959 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21960 }
21961 }
21962
21963 /* Expand conditional increment or decrement using adb/sbb instructions.
21964 The default case using setcc followed by the conditional move can be
21965 done by generic code. */
21966 bool
21967 ix86_expand_int_addcc (rtx operands[])
21968 {
21969 enum rtx_code code = GET_CODE (operands[1]);
21970 rtx flags;
21971 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21972 rtx compare_op;
21973 rtx val = const0_rtx;
21974 bool fpcmp = false;
21975 enum machine_mode mode;
21976 rtx op0 = XEXP (operands[1], 0);
21977 rtx op1 = XEXP (operands[1], 1);
21978
21979 if (operands[3] != const1_rtx
21980 && operands[3] != constm1_rtx)
21981 return false;
21982 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21983 return false;
21984 code = GET_CODE (compare_op);
21985
21986 flags = XEXP (compare_op, 0);
21987
21988 if (GET_MODE (flags) == CCFPmode
21989 || GET_MODE (flags) == CCFPUmode)
21990 {
21991 fpcmp = true;
21992 code = ix86_fp_compare_code_to_integer (code);
21993 }
21994
21995 if (code != LTU)
21996 {
21997 val = constm1_rtx;
21998 if (fpcmp)
21999 PUT_CODE (compare_op,
22000 reverse_condition_maybe_unordered
22001 (GET_CODE (compare_op)));
22002 else
22003 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22004 }
22005
22006 mode = GET_MODE (operands[0]);
22007
22008 /* Construct either adc or sbb insn. */
22009 if ((code == LTU) == (operands[3] == constm1_rtx))
22010 {
22011 switch (mode)
22012 {
22013 case QImode:
22014 insn = gen_subqi3_carry;
22015 break;
22016 case HImode:
22017 insn = gen_subhi3_carry;
22018 break;
22019 case SImode:
22020 insn = gen_subsi3_carry;
22021 break;
22022 case DImode:
22023 insn = gen_subdi3_carry;
22024 break;
22025 default:
22026 gcc_unreachable ();
22027 }
22028 }
22029 else
22030 {
22031 switch (mode)
22032 {
22033 case QImode:
22034 insn = gen_addqi3_carry;
22035 break;
22036 case HImode:
22037 insn = gen_addhi3_carry;
22038 break;
22039 case SImode:
22040 insn = gen_addsi3_carry;
22041 break;
22042 case DImode:
22043 insn = gen_adddi3_carry;
22044 break;
22045 default:
22046 gcc_unreachable ();
22047 }
22048 }
22049 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22050
22051 return true;
22052 }
22053
22054
22055 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
22056 but works for floating pointer parameters and nonoffsetable memories.
22057 For pushes, it returns just stack offsets; the values will be saved
22058 in the right order. Maximally three parts are generated. */
22059
22060 static int
22061 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
22062 {
22063 int size;
22064
22065 if (!TARGET_64BIT)
22066 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22067 else
22068 size = (GET_MODE_SIZE (mode) + 4) / 8;
22069
22070 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22071 gcc_assert (size >= 2 && size <= 4);
22072
22073 /* Optimize constant pool reference to immediates. This is used by fp
22074 moves, that force all constants to memory to allow combining. */
22075 if (MEM_P (operand) && MEM_READONLY_P (operand))
22076 {
22077 rtx tmp = maybe_get_pool_constant (operand);
22078 if (tmp)
22079 operand = tmp;
22080 }
22081
22082 if (MEM_P (operand) && !offsettable_memref_p (operand))
22083 {
22084 /* The only non-offsetable memories we handle are pushes. */
22085 int ok = push_operand (operand, VOIDmode);
22086
22087 gcc_assert (ok);
22088
22089 operand = copy_rtx (operand);
22090 PUT_MODE (operand, word_mode);
22091 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22092 return size;
22093 }
22094
22095 if (GET_CODE (operand) == CONST_VECTOR)
22096 {
22097 enum machine_mode imode = int_mode_for_mode (mode);
22098 /* Caution: if we looked through a constant pool memory above,
22099 the operand may actually have a different mode now. That's
22100 ok, since we want to pun this all the way back to an integer. */
22101 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22102 gcc_assert (operand != NULL);
22103 mode = imode;
22104 }
22105
22106 if (!TARGET_64BIT)
22107 {
22108 if (mode == DImode)
22109 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22110 else
22111 {
22112 int i;
22113
22114 if (REG_P (operand))
22115 {
22116 gcc_assert (reload_completed);
22117 for (i = 0; i < size; i++)
22118 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22119 }
22120 else if (offsettable_memref_p (operand))
22121 {
22122 operand = adjust_address (operand, SImode, 0);
22123 parts[0] = operand;
22124 for (i = 1; i < size; i++)
22125 parts[i] = adjust_address (operand, SImode, 4 * i);
22126 }
22127 else if (GET_CODE (operand) == CONST_DOUBLE)
22128 {
22129 REAL_VALUE_TYPE r;
22130 long l[4];
22131
22132 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22133 switch (mode)
22134 {
22135 case TFmode:
22136 real_to_target (l, &r, mode);
22137 parts[3] = gen_int_mode (l[3], SImode);
22138 parts[2] = gen_int_mode (l[2], SImode);
22139 break;
22140 case XFmode:
22141 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22142 long double may not be 80-bit. */
22143 real_to_target (l, &r, mode);
22144 parts[2] = gen_int_mode (l[2], SImode);
22145 break;
22146 case DFmode:
22147 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22148 break;
22149 default:
22150 gcc_unreachable ();
22151 }
22152 parts[1] = gen_int_mode (l[1], SImode);
22153 parts[0] = gen_int_mode (l[0], SImode);
22154 }
22155 else
22156 gcc_unreachable ();
22157 }
22158 }
22159 else
22160 {
22161 if (mode == TImode)
22162 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22163 if (mode == XFmode || mode == TFmode)
22164 {
22165 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22166 if (REG_P (operand))
22167 {
22168 gcc_assert (reload_completed);
22169 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22170 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22171 }
22172 else if (offsettable_memref_p (operand))
22173 {
22174 operand = adjust_address (operand, DImode, 0);
22175 parts[0] = operand;
22176 parts[1] = adjust_address (operand, upper_mode, 8);
22177 }
22178 else if (GET_CODE (operand) == CONST_DOUBLE)
22179 {
22180 REAL_VALUE_TYPE r;
22181 long l[4];
22182
22183 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22184 real_to_target (l, &r, mode);
22185
22186 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22187 if (HOST_BITS_PER_WIDE_INT >= 64)
22188 parts[0]
22189 = gen_int_mode
22190 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22191 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22192 DImode);
22193 else
22194 parts[0] = immed_double_const (l[0], l[1], DImode);
22195
22196 if (upper_mode == SImode)
22197 parts[1] = gen_int_mode (l[2], SImode);
22198 else if (HOST_BITS_PER_WIDE_INT >= 64)
22199 parts[1]
22200 = gen_int_mode
22201 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22202 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22203 DImode);
22204 else
22205 parts[1] = immed_double_const (l[2], l[3], DImode);
22206 }
22207 else
22208 gcc_unreachable ();
22209 }
22210 }
22211
22212 return size;
22213 }
22214
22215 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22216 Return false when normal moves are needed; true when all required
22217 insns have been emitted. Operands 2-4 contain the input values
22218 int the correct order; operands 5-7 contain the output values. */
22219
22220 void
22221 ix86_split_long_move (rtx operands[])
22222 {
22223 rtx part[2][4];
22224 int nparts, i, j;
22225 int push = 0;
22226 int collisions = 0;
22227 enum machine_mode mode = GET_MODE (operands[0]);
22228 bool collisionparts[4];
22229
22230 /* The DFmode expanders may ask us to move double.
22231 For 64bit target this is single move. By hiding the fact
22232 here we simplify i386.md splitters. */
22233 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22234 {
22235 /* Optimize constant pool reference to immediates. This is used by
22236 fp moves, that force all constants to memory to allow combining. */
22237
22238 if (MEM_P (operands[1])
22239 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22240 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22241 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22242 if (push_operand (operands[0], VOIDmode))
22243 {
22244 operands[0] = copy_rtx (operands[0]);
22245 PUT_MODE (operands[0], word_mode);
22246 }
22247 else
22248 operands[0] = gen_lowpart (DImode, operands[0]);
22249 operands[1] = gen_lowpart (DImode, operands[1]);
22250 emit_move_insn (operands[0], operands[1]);
22251 return;
22252 }
22253
22254 /* The only non-offsettable memory we handle is push. */
22255 if (push_operand (operands[0], VOIDmode))
22256 push = 1;
22257 else
22258 gcc_assert (!MEM_P (operands[0])
22259 || offsettable_memref_p (operands[0]));
22260
22261 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22262 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22263
22264 /* When emitting push, take care for source operands on the stack. */
22265 if (push && MEM_P (operands[1])
22266 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22267 {
22268 rtx src_base = XEXP (part[1][nparts - 1], 0);
22269
22270 /* Compensate for the stack decrement by 4. */
22271 if (!TARGET_64BIT && nparts == 3
22272 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22273 src_base = plus_constant (Pmode, src_base, 4);
22274
22275 /* src_base refers to the stack pointer and is
22276 automatically decreased by emitted push. */
22277 for (i = 0; i < nparts; i++)
22278 part[1][i] = change_address (part[1][i],
22279 GET_MODE (part[1][i]), src_base);
22280 }
22281
22282 /* We need to do copy in the right order in case an address register
22283 of the source overlaps the destination. */
22284 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22285 {
22286 rtx tmp;
22287
22288 for (i = 0; i < nparts; i++)
22289 {
22290 collisionparts[i]
22291 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22292 if (collisionparts[i])
22293 collisions++;
22294 }
22295
22296 /* Collision in the middle part can be handled by reordering. */
22297 if (collisions == 1 && nparts == 3 && collisionparts [1])
22298 {
22299 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22300 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22301 }
22302 else if (collisions == 1
22303 && nparts == 4
22304 && (collisionparts [1] || collisionparts [2]))
22305 {
22306 if (collisionparts [1])
22307 {
22308 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22309 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22310 }
22311 else
22312 {
22313 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22314 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22315 }
22316 }
22317
22318 /* If there are more collisions, we can't handle it by reordering.
22319 Do an lea to the last part and use only one colliding move. */
22320 else if (collisions > 1)
22321 {
22322 rtx base;
22323
22324 collisions = 1;
22325
22326 base = part[0][nparts - 1];
22327
22328 /* Handle the case when the last part isn't valid for lea.
22329 Happens in 64-bit mode storing the 12-byte XFmode. */
22330 if (GET_MODE (base) != Pmode)
22331 base = gen_rtx_REG (Pmode, REGNO (base));
22332
22333 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22334 part[1][0] = replace_equiv_address (part[1][0], base);
22335 for (i = 1; i < nparts; i++)
22336 {
22337 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22338 part[1][i] = replace_equiv_address (part[1][i], tmp);
22339 }
22340 }
22341 }
22342
22343 if (push)
22344 {
22345 if (!TARGET_64BIT)
22346 {
22347 if (nparts == 3)
22348 {
22349 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22350 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22351 stack_pointer_rtx, GEN_INT (-4)));
22352 emit_move_insn (part[0][2], part[1][2]);
22353 }
22354 else if (nparts == 4)
22355 {
22356 emit_move_insn (part[0][3], part[1][3]);
22357 emit_move_insn (part[0][2], part[1][2]);
22358 }
22359 }
22360 else
22361 {
22362 /* In 64bit mode we don't have 32bit push available. In case this is
22363 register, it is OK - we will just use larger counterpart. We also
22364 retype memory - these comes from attempt to avoid REX prefix on
22365 moving of second half of TFmode value. */
22366 if (GET_MODE (part[1][1]) == SImode)
22367 {
22368 switch (GET_CODE (part[1][1]))
22369 {
22370 case MEM:
22371 part[1][1] = adjust_address (part[1][1], DImode, 0);
22372 break;
22373
22374 case REG:
22375 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22376 break;
22377
22378 default:
22379 gcc_unreachable ();
22380 }
22381
22382 if (GET_MODE (part[1][0]) == SImode)
22383 part[1][0] = part[1][1];
22384 }
22385 }
22386 emit_move_insn (part[0][1], part[1][1]);
22387 emit_move_insn (part[0][0], part[1][0]);
22388 return;
22389 }
22390
22391 /* Choose correct order to not overwrite the source before it is copied. */
22392 if ((REG_P (part[0][0])
22393 && REG_P (part[1][1])
22394 && (REGNO (part[0][0]) == REGNO (part[1][1])
22395 || (nparts == 3
22396 && REGNO (part[0][0]) == REGNO (part[1][2]))
22397 || (nparts == 4
22398 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22399 || (collisions > 0
22400 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22401 {
22402 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22403 {
22404 operands[2 + i] = part[0][j];
22405 operands[6 + i] = part[1][j];
22406 }
22407 }
22408 else
22409 {
22410 for (i = 0; i < nparts; i++)
22411 {
22412 operands[2 + i] = part[0][i];
22413 operands[6 + i] = part[1][i];
22414 }
22415 }
22416
22417 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22418 if (optimize_insn_for_size_p ())
22419 {
22420 for (j = 0; j < nparts - 1; j++)
22421 if (CONST_INT_P (operands[6 + j])
22422 && operands[6 + j] != const0_rtx
22423 && REG_P (operands[2 + j]))
22424 for (i = j; i < nparts - 1; i++)
22425 if (CONST_INT_P (operands[7 + i])
22426 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22427 operands[7 + i] = operands[2 + j];
22428 }
22429
22430 for (i = 0; i < nparts; i++)
22431 emit_move_insn (operands[2 + i], operands[6 + i]);
22432
22433 return;
22434 }
22435
22436 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22437 left shift by a constant, either using a single shift or
22438 a sequence of add instructions. */
22439
22440 static void
22441 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22442 {
22443 rtx (*insn)(rtx, rtx, rtx);
22444
22445 if (count == 1
22446 || (count * ix86_cost->add <= ix86_cost->shift_const
22447 && !optimize_insn_for_size_p ()))
22448 {
22449 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22450 while (count-- > 0)
22451 emit_insn (insn (operand, operand, operand));
22452 }
22453 else
22454 {
22455 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22456 emit_insn (insn (operand, operand, GEN_INT (count)));
22457 }
22458 }
22459
22460 void
22461 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22462 {
22463 rtx (*gen_ashl3)(rtx, rtx, rtx);
22464 rtx (*gen_shld)(rtx, rtx, rtx);
22465 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22466
22467 rtx low[2], high[2];
22468 int count;
22469
22470 if (CONST_INT_P (operands[2]))
22471 {
22472 split_double_mode (mode, operands, 2, low, high);
22473 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22474
22475 if (count >= half_width)
22476 {
22477 emit_move_insn (high[0], low[1]);
22478 emit_move_insn (low[0], const0_rtx);
22479
22480 if (count > half_width)
22481 ix86_expand_ashl_const (high[0], count - half_width, mode);
22482 }
22483 else
22484 {
22485 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22486
22487 if (!rtx_equal_p (operands[0], operands[1]))
22488 emit_move_insn (operands[0], operands[1]);
22489
22490 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22491 ix86_expand_ashl_const (low[0], count, mode);
22492 }
22493 return;
22494 }
22495
22496 split_double_mode (mode, operands, 1, low, high);
22497
22498 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22499
22500 if (operands[1] == const1_rtx)
22501 {
22502 /* Assuming we've chosen a QImode capable registers, then 1 << N
22503 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22504 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22505 {
22506 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22507
22508 ix86_expand_clear (low[0]);
22509 ix86_expand_clear (high[0]);
22510 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22511
22512 d = gen_lowpart (QImode, low[0]);
22513 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22514 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22515 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22516
22517 d = gen_lowpart (QImode, high[0]);
22518 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22519 s = gen_rtx_NE (QImode, flags, const0_rtx);
22520 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22521 }
22522
22523 /* Otherwise, we can get the same results by manually performing
22524 a bit extract operation on bit 5/6, and then performing the two
22525 shifts. The two methods of getting 0/1 into low/high are exactly
22526 the same size. Avoiding the shift in the bit extract case helps
22527 pentium4 a bit; no one else seems to care much either way. */
22528 else
22529 {
22530 enum machine_mode half_mode;
22531 rtx (*gen_lshr3)(rtx, rtx, rtx);
22532 rtx (*gen_and3)(rtx, rtx, rtx);
22533 rtx (*gen_xor3)(rtx, rtx, rtx);
22534 HOST_WIDE_INT bits;
22535 rtx x;
22536
22537 if (mode == DImode)
22538 {
22539 half_mode = SImode;
22540 gen_lshr3 = gen_lshrsi3;
22541 gen_and3 = gen_andsi3;
22542 gen_xor3 = gen_xorsi3;
22543 bits = 5;
22544 }
22545 else
22546 {
22547 half_mode = DImode;
22548 gen_lshr3 = gen_lshrdi3;
22549 gen_and3 = gen_anddi3;
22550 gen_xor3 = gen_xordi3;
22551 bits = 6;
22552 }
22553
22554 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22555 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22556 else
22557 x = gen_lowpart (half_mode, operands[2]);
22558 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22559
22560 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22561 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22562 emit_move_insn (low[0], high[0]);
22563 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22564 }
22565
22566 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22567 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22568 return;
22569 }
22570
22571 if (operands[1] == constm1_rtx)
22572 {
22573 /* For -1 << N, we can avoid the shld instruction, because we
22574 know that we're shifting 0...31/63 ones into a -1. */
22575 emit_move_insn (low[0], constm1_rtx);
22576 if (optimize_insn_for_size_p ())
22577 emit_move_insn (high[0], low[0]);
22578 else
22579 emit_move_insn (high[0], constm1_rtx);
22580 }
22581 else
22582 {
22583 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22584
22585 if (!rtx_equal_p (operands[0], operands[1]))
22586 emit_move_insn (operands[0], operands[1]);
22587
22588 split_double_mode (mode, operands, 1, low, high);
22589 emit_insn (gen_shld (high[0], low[0], operands[2]));
22590 }
22591
22592 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22593
22594 if (TARGET_CMOVE && scratch)
22595 {
22596 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22597 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22598
22599 ix86_expand_clear (scratch);
22600 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22601 }
22602 else
22603 {
22604 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22605 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22606
22607 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22608 }
22609 }
22610
22611 void
22612 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22613 {
22614 rtx (*gen_ashr3)(rtx, rtx, rtx)
22615 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22616 rtx (*gen_shrd)(rtx, rtx, rtx);
22617 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22618
22619 rtx low[2], high[2];
22620 int count;
22621
22622 if (CONST_INT_P (operands[2]))
22623 {
22624 split_double_mode (mode, operands, 2, low, high);
22625 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22626
22627 if (count == GET_MODE_BITSIZE (mode) - 1)
22628 {
22629 emit_move_insn (high[0], high[1]);
22630 emit_insn (gen_ashr3 (high[0], high[0],
22631 GEN_INT (half_width - 1)));
22632 emit_move_insn (low[0], high[0]);
22633
22634 }
22635 else if (count >= half_width)
22636 {
22637 emit_move_insn (low[0], high[1]);
22638 emit_move_insn (high[0], low[0]);
22639 emit_insn (gen_ashr3 (high[0], high[0],
22640 GEN_INT (half_width - 1)));
22641
22642 if (count > half_width)
22643 emit_insn (gen_ashr3 (low[0], low[0],
22644 GEN_INT (count - half_width)));
22645 }
22646 else
22647 {
22648 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22649
22650 if (!rtx_equal_p (operands[0], operands[1]))
22651 emit_move_insn (operands[0], operands[1]);
22652
22653 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22654 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22655 }
22656 }
22657 else
22658 {
22659 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22660
22661 if (!rtx_equal_p (operands[0], operands[1]))
22662 emit_move_insn (operands[0], operands[1]);
22663
22664 split_double_mode (mode, operands, 1, low, high);
22665
22666 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22667 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22668
22669 if (TARGET_CMOVE && scratch)
22670 {
22671 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22672 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22673
22674 emit_move_insn (scratch, high[0]);
22675 emit_insn (gen_ashr3 (scratch, scratch,
22676 GEN_INT (half_width - 1)));
22677 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22678 scratch));
22679 }
22680 else
22681 {
22682 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22683 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22684
22685 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22686 }
22687 }
22688 }
22689
22690 void
22691 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22692 {
22693 rtx (*gen_lshr3)(rtx, rtx, rtx)
22694 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22695 rtx (*gen_shrd)(rtx, rtx, rtx);
22696 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22697
22698 rtx low[2], high[2];
22699 int count;
22700
22701 if (CONST_INT_P (operands[2]))
22702 {
22703 split_double_mode (mode, operands, 2, low, high);
22704 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22705
22706 if (count >= half_width)
22707 {
22708 emit_move_insn (low[0], high[1]);
22709 ix86_expand_clear (high[0]);
22710
22711 if (count > half_width)
22712 emit_insn (gen_lshr3 (low[0], low[0],
22713 GEN_INT (count - half_width)));
22714 }
22715 else
22716 {
22717 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22718
22719 if (!rtx_equal_p (operands[0], operands[1]))
22720 emit_move_insn (operands[0], operands[1]);
22721
22722 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22723 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22724 }
22725 }
22726 else
22727 {
22728 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22729
22730 if (!rtx_equal_p (operands[0], operands[1]))
22731 emit_move_insn (operands[0], operands[1]);
22732
22733 split_double_mode (mode, operands, 1, low, high);
22734
22735 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22736 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22737
22738 if (TARGET_CMOVE && scratch)
22739 {
22740 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22741 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22742
22743 ix86_expand_clear (scratch);
22744 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22745 scratch));
22746 }
22747 else
22748 {
22749 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22750 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22751
22752 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22753 }
22754 }
22755 }
22756
22757 /* Predict just emitted jump instruction to be taken with probability PROB. */
22758 static void
22759 predict_jump (int prob)
22760 {
22761 rtx insn = get_last_insn ();
22762 gcc_assert (JUMP_P (insn));
22763 add_int_reg_note (insn, REG_BR_PROB, prob);
22764 }
22765
22766 /* Helper function for the string operations below. Dest VARIABLE whether
22767 it is aligned to VALUE bytes. If true, jump to the label. */
22768 static rtx_code_label *
22769 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22770 {
22771 rtx_code_label *label = gen_label_rtx ();
22772 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22773 if (GET_MODE (variable) == DImode)
22774 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22775 else
22776 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22777 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22778 1, label);
22779 if (epilogue)
22780 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22781 else
22782 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22783 return label;
22784 }
22785
22786 /* Adjust COUNTER by the VALUE. */
22787 static void
22788 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22789 {
22790 rtx (*gen_add)(rtx, rtx, rtx)
22791 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22792
22793 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22794 }
22795
22796 /* Zero extend possibly SImode EXP to Pmode register. */
22797 rtx
22798 ix86_zero_extend_to_Pmode (rtx exp)
22799 {
22800 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22801 }
22802
22803 /* Divide COUNTREG by SCALE. */
22804 static rtx
22805 scale_counter (rtx countreg, int scale)
22806 {
22807 rtx sc;
22808
22809 if (scale == 1)
22810 return countreg;
22811 if (CONST_INT_P (countreg))
22812 return GEN_INT (INTVAL (countreg) / scale);
22813 gcc_assert (REG_P (countreg));
22814
22815 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22816 GEN_INT (exact_log2 (scale)),
22817 NULL, 1, OPTAB_DIRECT);
22818 return sc;
22819 }
22820
22821 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22822 DImode for constant loop counts. */
22823
22824 static enum machine_mode
22825 counter_mode (rtx count_exp)
22826 {
22827 if (GET_MODE (count_exp) != VOIDmode)
22828 return GET_MODE (count_exp);
22829 if (!CONST_INT_P (count_exp))
22830 return Pmode;
22831 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22832 return DImode;
22833 return SImode;
22834 }
22835
22836 /* Copy the address to a Pmode register. This is used for x32 to
22837 truncate DImode TLS address to a SImode register. */
22838
22839 static rtx
22840 ix86_copy_addr_to_reg (rtx addr)
22841 {
22842 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22843 return copy_addr_to_reg (addr);
22844 else
22845 {
22846 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22847 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22848 }
22849 }
22850
22851 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22852 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22853 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22854 memory by VALUE (supposed to be in MODE).
22855
22856 The size is rounded down to whole number of chunk size moved at once.
22857 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22858
22859
22860 static void
22861 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22862 rtx destptr, rtx srcptr, rtx value,
22863 rtx count, enum machine_mode mode, int unroll,
22864 int expected_size, bool issetmem)
22865 {
22866 rtx_code_label *out_label, *top_label;
22867 rtx iter, tmp;
22868 enum machine_mode iter_mode = counter_mode (count);
22869 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22870 rtx piece_size = GEN_INT (piece_size_n);
22871 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22872 rtx size;
22873 int i;
22874
22875 top_label = gen_label_rtx ();
22876 out_label = gen_label_rtx ();
22877 iter = gen_reg_rtx (iter_mode);
22878
22879 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22880 NULL, 1, OPTAB_DIRECT);
22881 /* Those two should combine. */
22882 if (piece_size == const1_rtx)
22883 {
22884 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22885 true, out_label);
22886 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22887 }
22888 emit_move_insn (iter, const0_rtx);
22889
22890 emit_label (top_label);
22891
22892 tmp = convert_modes (Pmode, iter_mode, iter, true);
22893
22894 /* This assert could be relaxed - in this case we'll need to compute
22895 smallest power of two, containing in PIECE_SIZE_N and pass it to
22896 offset_address. */
22897 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22898 destmem = offset_address (destmem, tmp, piece_size_n);
22899 destmem = adjust_address (destmem, mode, 0);
22900
22901 if (!issetmem)
22902 {
22903 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22904 srcmem = adjust_address (srcmem, mode, 0);
22905
22906 /* When unrolling for chips that reorder memory reads and writes,
22907 we can save registers by using single temporary.
22908 Also using 4 temporaries is overkill in 32bit mode. */
22909 if (!TARGET_64BIT && 0)
22910 {
22911 for (i = 0; i < unroll; i++)
22912 {
22913 if (i)
22914 {
22915 destmem =
22916 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22917 srcmem =
22918 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22919 }
22920 emit_move_insn (destmem, srcmem);
22921 }
22922 }
22923 else
22924 {
22925 rtx tmpreg[4];
22926 gcc_assert (unroll <= 4);
22927 for (i = 0; i < unroll; i++)
22928 {
22929 tmpreg[i] = gen_reg_rtx (mode);
22930 if (i)
22931 {
22932 srcmem =
22933 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22934 }
22935 emit_move_insn (tmpreg[i], srcmem);
22936 }
22937 for (i = 0; i < unroll; i++)
22938 {
22939 if (i)
22940 {
22941 destmem =
22942 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22943 }
22944 emit_move_insn (destmem, tmpreg[i]);
22945 }
22946 }
22947 }
22948 else
22949 for (i = 0; i < unroll; i++)
22950 {
22951 if (i)
22952 destmem =
22953 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22954 emit_move_insn (destmem, value);
22955 }
22956
22957 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22958 true, OPTAB_LIB_WIDEN);
22959 if (tmp != iter)
22960 emit_move_insn (iter, tmp);
22961
22962 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22963 true, top_label);
22964 if (expected_size != -1)
22965 {
22966 expected_size /= GET_MODE_SIZE (mode) * unroll;
22967 if (expected_size == 0)
22968 predict_jump (0);
22969 else if (expected_size > REG_BR_PROB_BASE)
22970 predict_jump (REG_BR_PROB_BASE - 1);
22971 else
22972 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22973 }
22974 else
22975 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22976 iter = ix86_zero_extend_to_Pmode (iter);
22977 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22978 true, OPTAB_LIB_WIDEN);
22979 if (tmp != destptr)
22980 emit_move_insn (destptr, tmp);
22981 if (!issetmem)
22982 {
22983 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22984 true, OPTAB_LIB_WIDEN);
22985 if (tmp != srcptr)
22986 emit_move_insn (srcptr, tmp);
22987 }
22988 emit_label (out_label);
22989 }
22990
22991 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22992 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22993 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22994 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22995 ORIG_VALUE is the original value passed to memset to fill the memory with.
22996 Other arguments have same meaning as for previous function. */
22997
22998 static void
22999 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23000 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23001 rtx count,
23002 enum machine_mode mode, bool issetmem)
23003 {
23004 rtx destexp;
23005 rtx srcexp;
23006 rtx countreg;
23007 HOST_WIDE_INT rounded_count;
23008
23009 /* If possible, it is shorter to use rep movs.
23010 TODO: Maybe it is better to move this logic to decide_alg. */
23011 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23012 && (!issetmem || orig_value == const0_rtx))
23013 mode = SImode;
23014
23015 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23016 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23017
23018 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23019 GET_MODE_SIZE (mode)));
23020 if (mode != QImode)
23021 {
23022 destexp = gen_rtx_ASHIFT (Pmode, countreg,
23023 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23024 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23025 }
23026 else
23027 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23028 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23029 {
23030 rounded_count = (INTVAL (count)
23031 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23032 destmem = shallow_copy_rtx (destmem);
23033 set_mem_size (destmem, rounded_count);
23034 }
23035 else if (MEM_SIZE_KNOWN_P (destmem))
23036 clear_mem_size (destmem);
23037
23038 if (issetmem)
23039 {
23040 value = force_reg (mode, gen_lowpart (mode, value));
23041 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23042 }
23043 else
23044 {
23045 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23046 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23047 if (mode != QImode)
23048 {
23049 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23050 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23051 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23052 }
23053 else
23054 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23055 if (CONST_INT_P (count))
23056 {
23057 rounded_count = (INTVAL (count)
23058 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23059 srcmem = shallow_copy_rtx (srcmem);
23060 set_mem_size (srcmem, rounded_count);
23061 }
23062 else
23063 {
23064 if (MEM_SIZE_KNOWN_P (srcmem))
23065 clear_mem_size (srcmem);
23066 }
23067 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23068 destexp, srcexp));
23069 }
23070 }
23071
23072 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23073 DESTMEM.
23074 SRC is passed by pointer to be updated on return.
23075 Return value is updated DST. */
23076 static rtx
23077 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23078 HOST_WIDE_INT size_to_move)
23079 {
23080 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23081 enum insn_code code;
23082 enum machine_mode move_mode;
23083 int piece_size, i;
23084
23085 /* Find the widest mode in which we could perform moves.
23086 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23087 it until move of such size is supported. */
23088 piece_size = 1 << floor_log2 (size_to_move);
23089 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23090 code = optab_handler (mov_optab, move_mode);
23091 while (code == CODE_FOR_nothing && piece_size > 1)
23092 {
23093 piece_size >>= 1;
23094 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23095 code = optab_handler (mov_optab, move_mode);
23096 }
23097
23098 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23099 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23100 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23101 {
23102 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23103 move_mode = mode_for_vector (word_mode, nunits);
23104 code = optab_handler (mov_optab, move_mode);
23105 if (code == CODE_FOR_nothing)
23106 {
23107 move_mode = word_mode;
23108 piece_size = GET_MODE_SIZE (move_mode);
23109 code = optab_handler (mov_optab, move_mode);
23110 }
23111 }
23112 gcc_assert (code != CODE_FOR_nothing);
23113
23114 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23115 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23116
23117 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23118 gcc_assert (size_to_move % piece_size == 0);
23119 adjust = GEN_INT (piece_size);
23120 for (i = 0; i < size_to_move; i += piece_size)
23121 {
23122 /* We move from memory to memory, so we'll need to do it via
23123 a temporary register. */
23124 tempreg = gen_reg_rtx (move_mode);
23125 emit_insn (GEN_FCN (code) (tempreg, src));
23126 emit_insn (GEN_FCN (code) (dst, tempreg));
23127
23128 emit_move_insn (destptr,
23129 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23130 emit_move_insn (srcptr,
23131 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23132
23133 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23134 piece_size);
23135 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23136 piece_size);
23137 }
23138
23139 /* Update DST and SRC rtx. */
23140 *srcmem = src;
23141 return dst;
23142 }
23143
23144 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23145 static void
23146 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23147 rtx destptr, rtx srcptr, rtx count, int max_size)
23148 {
23149 rtx src, dest;
23150 if (CONST_INT_P (count))
23151 {
23152 HOST_WIDE_INT countval = INTVAL (count);
23153 HOST_WIDE_INT epilogue_size = countval % max_size;
23154 int i;
23155
23156 /* For now MAX_SIZE should be a power of 2. This assert could be
23157 relaxed, but it'll require a bit more complicated epilogue
23158 expanding. */
23159 gcc_assert ((max_size & (max_size - 1)) == 0);
23160 for (i = max_size; i >= 1; i >>= 1)
23161 {
23162 if (epilogue_size & i)
23163 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23164 }
23165 return;
23166 }
23167 if (max_size > 8)
23168 {
23169 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23170 count, 1, OPTAB_DIRECT);
23171 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23172 count, QImode, 1, 4, false);
23173 return;
23174 }
23175
23176 /* When there are stringops, we can cheaply increase dest and src pointers.
23177 Otherwise we save code size by maintaining offset (zero is readily
23178 available from preceding rep operation) and using x86 addressing modes.
23179 */
23180 if (TARGET_SINGLE_STRINGOP)
23181 {
23182 if (max_size > 4)
23183 {
23184 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23185 src = change_address (srcmem, SImode, srcptr);
23186 dest = change_address (destmem, SImode, destptr);
23187 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23188 emit_label (label);
23189 LABEL_NUSES (label) = 1;
23190 }
23191 if (max_size > 2)
23192 {
23193 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23194 src = change_address (srcmem, HImode, srcptr);
23195 dest = change_address (destmem, HImode, destptr);
23196 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23197 emit_label (label);
23198 LABEL_NUSES (label) = 1;
23199 }
23200 if (max_size > 1)
23201 {
23202 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23203 src = change_address (srcmem, QImode, srcptr);
23204 dest = change_address (destmem, QImode, destptr);
23205 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23206 emit_label (label);
23207 LABEL_NUSES (label) = 1;
23208 }
23209 }
23210 else
23211 {
23212 rtx offset = force_reg (Pmode, const0_rtx);
23213 rtx tmp;
23214
23215 if (max_size > 4)
23216 {
23217 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23218 src = change_address (srcmem, SImode, srcptr);
23219 dest = change_address (destmem, SImode, destptr);
23220 emit_move_insn (dest, src);
23221 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23222 true, OPTAB_LIB_WIDEN);
23223 if (tmp != offset)
23224 emit_move_insn (offset, tmp);
23225 emit_label (label);
23226 LABEL_NUSES (label) = 1;
23227 }
23228 if (max_size > 2)
23229 {
23230 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23231 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23232 src = change_address (srcmem, HImode, tmp);
23233 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23234 dest = change_address (destmem, HImode, tmp);
23235 emit_move_insn (dest, src);
23236 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23237 true, OPTAB_LIB_WIDEN);
23238 if (tmp != offset)
23239 emit_move_insn (offset, tmp);
23240 emit_label (label);
23241 LABEL_NUSES (label) = 1;
23242 }
23243 if (max_size > 1)
23244 {
23245 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23246 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23247 src = change_address (srcmem, QImode, tmp);
23248 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23249 dest = change_address (destmem, QImode, tmp);
23250 emit_move_insn (dest, src);
23251 emit_label (label);
23252 LABEL_NUSES (label) = 1;
23253 }
23254 }
23255 }
23256
23257 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23258 with value PROMOTED_VAL.
23259 SRC is passed by pointer to be updated on return.
23260 Return value is updated DST. */
23261 static rtx
23262 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23263 HOST_WIDE_INT size_to_move)
23264 {
23265 rtx dst = destmem, adjust;
23266 enum insn_code code;
23267 enum machine_mode move_mode;
23268 int piece_size, i;
23269
23270 /* Find the widest mode in which we could perform moves.
23271 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23272 it until move of such size is supported. */
23273 move_mode = GET_MODE (promoted_val);
23274 if (move_mode == VOIDmode)
23275 move_mode = QImode;
23276 if (size_to_move < GET_MODE_SIZE (move_mode))
23277 {
23278 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23279 promoted_val = gen_lowpart (move_mode, promoted_val);
23280 }
23281 piece_size = GET_MODE_SIZE (move_mode);
23282 code = optab_handler (mov_optab, move_mode);
23283 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23284
23285 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23286
23287 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23288 gcc_assert (size_to_move % piece_size == 0);
23289 adjust = GEN_INT (piece_size);
23290 for (i = 0; i < size_to_move; i += piece_size)
23291 {
23292 if (piece_size <= GET_MODE_SIZE (word_mode))
23293 {
23294 emit_insn (gen_strset (destptr, dst, promoted_val));
23295 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23296 piece_size);
23297 continue;
23298 }
23299
23300 emit_insn (GEN_FCN (code) (dst, promoted_val));
23301
23302 emit_move_insn (destptr,
23303 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23304
23305 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23306 piece_size);
23307 }
23308
23309 /* Update DST rtx. */
23310 return dst;
23311 }
23312 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23313 static void
23314 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23315 rtx count, int max_size)
23316 {
23317 count =
23318 expand_simple_binop (counter_mode (count), AND, count,
23319 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23320 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23321 gen_lowpart (QImode, value), count, QImode,
23322 1, max_size / 2, true);
23323 }
23324
23325 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23326 static void
23327 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23328 rtx count, int max_size)
23329 {
23330 rtx dest;
23331
23332 if (CONST_INT_P (count))
23333 {
23334 HOST_WIDE_INT countval = INTVAL (count);
23335 HOST_WIDE_INT epilogue_size = countval % max_size;
23336 int i;
23337
23338 /* For now MAX_SIZE should be a power of 2. This assert could be
23339 relaxed, but it'll require a bit more complicated epilogue
23340 expanding. */
23341 gcc_assert ((max_size & (max_size - 1)) == 0);
23342 for (i = max_size; i >= 1; i >>= 1)
23343 {
23344 if (epilogue_size & i)
23345 {
23346 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23347 destmem = emit_memset (destmem, destptr, vec_value, i);
23348 else
23349 destmem = emit_memset (destmem, destptr, value, i);
23350 }
23351 }
23352 return;
23353 }
23354 if (max_size > 32)
23355 {
23356 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23357 return;
23358 }
23359 if (max_size > 16)
23360 {
23361 rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
23362 if (TARGET_64BIT)
23363 {
23364 dest = change_address (destmem, DImode, destptr);
23365 emit_insn (gen_strset (destptr, dest, value));
23366 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23367 emit_insn (gen_strset (destptr, dest, value));
23368 }
23369 else
23370 {
23371 dest = change_address (destmem, SImode, destptr);
23372 emit_insn (gen_strset (destptr, dest, value));
23373 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23374 emit_insn (gen_strset (destptr, dest, value));
23375 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23376 emit_insn (gen_strset (destptr, dest, value));
23377 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23378 emit_insn (gen_strset (destptr, dest, value));
23379 }
23380 emit_label (label);
23381 LABEL_NUSES (label) = 1;
23382 }
23383 if (max_size > 8)
23384 {
23385 rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
23386 if (TARGET_64BIT)
23387 {
23388 dest = change_address (destmem, DImode, destptr);
23389 emit_insn (gen_strset (destptr, dest, value));
23390 }
23391 else
23392 {
23393 dest = change_address (destmem, SImode, destptr);
23394 emit_insn (gen_strset (destptr, dest, value));
23395 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23396 emit_insn (gen_strset (destptr, dest, value));
23397 }
23398 emit_label (label);
23399 LABEL_NUSES (label) = 1;
23400 }
23401 if (max_size > 4)
23402 {
23403 rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23404 dest = change_address (destmem, SImode, destptr);
23405 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23406 emit_label (label);
23407 LABEL_NUSES (label) = 1;
23408 }
23409 if (max_size > 2)
23410 {
23411 rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23412 dest = change_address (destmem, HImode, destptr);
23413 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23414 emit_label (label);
23415 LABEL_NUSES (label) = 1;
23416 }
23417 if (max_size > 1)
23418 {
23419 rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
23420 dest = change_address (destmem, QImode, destptr);
23421 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23422 emit_label (label);
23423 LABEL_NUSES (label) = 1;
23424 }
23425 }
23426
23427 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23428 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23429 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23430 ignored.
23431 Return value is updated DESTMEM. */
23432 static rtx
23433 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23434 rtx destptr, rtx srcptr, rtx value,
23435 rtx vec_value, rtx count, int align,
23436 int desired_alignment, bool issetmem)
23437 {
23438 int i;
23439 for (i = 1; i < desired_alignment; i <<= 1)
23440 {
23441 if (align <= i)
23442 {
23443 rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
23444 if (issetmem)
23445 {
23446 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23447 destmem = emit_memset (destmem, destptr, vec_value, i);
23448 else
23449 destmem = emit_memset (destmem, destptr, value, i);
23450 }
23451 else
23452 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23453 ix86_adjust_counter (count, i);
23454 emit_label (label);
23455 LABEL_NUSES (label) = 1;
23456 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23457 }
23458 }
23459 return destmem;
23460 }
23461
23462 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23463 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23464 and jump to DONE_LABEL. */
23465 static void
23466 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23467 rtx destptr, rtx srcptr,
23468 rtx value, rtx vec_value,
23469 rtx count, int size,
23470 rtx done_label, bool issetmem)
23471 {
23472 rtx_code_label *label = ix86_expand_aligntest (count, size, false);
23473 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23474 rtx modesize;
23475 int n;
23476
23477 /* If we do not have vector value to copy, we must reduce size. */
23478 if (issetmem)
23479 {
23480 if (!vec_value)
23481 {
23482 if (GET_MODE (value) == VOIDmode && size > 8)
23483 mode = Pmode;
23484 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23485 mode = GET_MODE (value);
23486 }
23487 else
23488 mode = GET_MODE (vec_value), value = vec_value;
23489 }
23490 else
23491 {
23492 /* Choose appropriate vector mode. */
23493 if (size >= 32)
23494 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23495 else if (size >= 16)
23496 mode = TARGET_SSE ? V16QImode : DImode;
23497 srcmem = change_address (srcmem, mode, srcptr);
23498 }
23499 destmem = change_address (destmem, mode, destptr);
23500 modesize = GEN_INT (GET_MODE_SIZE (mode));
23501 gcc_assert (GET_MODE_SIZE (mode) <= size);
23502 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23503 {
23504 if (issetmem)
23505 emit_move_insn (destmem, gen_lowpart (mode, value));
23506 else
23507 {
23508 emit_move_insn (destmem, srcmem);
23509 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23510 }
23511 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23512 }
23513
23514 destmem = offset_address (destmem, count, 1);
23515 destmem = offset_address (destmem, GEN_INT (-2 * size),
23516 GET_MODE_SIZE (mode));
23517 if (!issetmem)
23518 {
23519 srcmem = offset_address (srcmem, count, 1);
23520 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23521 GET_MODE_SIZE (mode));
23522 }
23523 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23524 {
23525 if (issetmem)
23526 emit_move_insn (destmem, gen_lowpart (mode, value));
23527 else
23528 {
23529 emit_move_insn (destmem, srcmem);
23530 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23531 }
23532 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23533 }
23534 emit_jump_insn (gen_jump (done_label));
23535 emit_barrier ();
23536
23537 emit_label (label);
23538 LABEL_NUSES (label) = 1;
23539 }
23540
23541 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23542 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23543 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23544 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23545 DONE_LABEL is a label after the whole copying sequence. The label is created
23546 on demand if *DONE_LABEL is NULL.
23547 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23548 bounds after the initial copies.
23549
23550 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23551 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23552 we will dispatch to a library call for large blocks.
23553
23554 In pseudocode we do:
23555
23556 if (COUNT < SIZE)
23557 {
23558 Assume that SIZE is 4. Bigger sizes are handled analogously
23559 if (COUNT & 4)
23560 {
23561 copy 4 bytes from SRCPTR to DESTPTR
23562 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23563 goto done_label
23564 }
23565 if (!COUNT)
23566 goto done_label;
23567 copy 1 byte from SRCPTR to DESTPTR
23568 if (COUNT & 2)
23569 {
23570 copy 2 bytes from SRCPTR to DESTPTR
23571 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23572 }
23573 }
23574 else
23575 {
23576 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23577 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23578
23579 OLD_DESPTR = DESTPTR;
23580 Align DESTPTR up to DESIRED_ALIGN
23581 SRCPTR += DESTPTR - OLD_DESTPTR
23582 COUNT -= DEST_PTR - OLD_DESTPTR
23583 if (DYNAMIC_CHECK)
23584 Round COUNT down to multiple of SIZE
23585 << optional caller supplied zero size guard is here >>
23586 << optional caller suppplied dynamic check is here >>
23587 << caller supplied main copy loop is here >>
23588 }
23589 done_label:
23590 */
23591 static void
23592 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23593 rtx *destptr, rtx *srcptr,
23594 enum machine_mode mode,
23595 rtx value, rtx vec_value,
23596 rtx *count,
23597 rtx_code_label **done_label,
23598 int size,
23599 int desired_align,
23600 int align,
23601 unsigned HOST_WIDE_INT *min_size,
23602 bool dynamic_check,
23603 bool issetmem)
23604 {
23605 rtx_code_label *loop_label = NULL, *label;
23606 int n;
23607 rtx modesize;
23608 int prolog_size = 0;
23609 rtx mode_value;
23610
23611 /* Chose proper value to copy. */
23612 if (issetmem && VECTOR_MODE_P (mode))
23613 mode_value = vec_value;
23614 else
23615 mode_value = value;
23616 gcc_assert (GET_MODE_SIZE (mode) <= size);
23617
23618 /* See if block is big or small, handle small blocks. */
23619 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23620 {
23621 int size2 = size;
23622 loop_label = gen_label_rtx ();
23623
23624 if (!*done_label)
23625 *done_label = gen_label_rtx ();
23626
23627 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23628 1, loop_label);
23629 size2 >>= 1;
23630
23631 /* Handle sizes > 3. */
23632 for (;size2 > 2; size2 >>= 1)
23633 expand_small_movmem_or_setmem (destmem, srcmem,
23634 *destptr, *srcptr,
23635 value, vec_value,
23636 *count,
23637 size2, *done_label, issetmem);
23638 /* Nothing to copy? Jump to DONE_LABEL if so */
23639 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23640 1, *done_label);
23641
23642 /* Do a byte copy. */
23643 destmem = change_address (destmem, QImode, *destptr);
23644 if (issetmem)
23645 emit_move_insn (destmem, gen_lowpart (QImode, value));
23646 else
23647 {
23648 srcmem = change_address (srcmem, QImode, *srcptr);
23649 emit_move_insn (destmem, srcmem);
23650 }
23651
23652 /* Handle sizes 2 and 3. */
23653 label = ix86_expand_aligntest (*count, 2, false);
23654 destmem = change_address (destmem, HImode, *destptr);
23655 destmem = offset_address (destmem, *count, 1);
23656 destmem = offset_address (destmem, GEN_INT (-2), 2);
23657 if (issetmem)
23658 emit_move_insn (destmem, gen_lowpart (HImode, value));
23659 else
23660 {
23661 srcmem = change_address (srcmem, HImode, *srcptr);
23662 srcmem = offset_address (srcmem, *count, 1);
23663 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23664 emit_move_insn (destmem, srcmem);
23665 }
23666
23667 emit_label (label);
23668 LABEL_NUSES (label) = 1;
23669 emit_jump_insn (gen_jump (*done_label));
23670 emit_barrier ();
23671 }
23672 else
23673 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23674 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23675
23676 /* Start memcpy for COUNT >= SIZE. */
23677 if (loop_label)
23678 {
23679 emit_label (loop_label);
23680 LABEL_NUSES (loop_label) = 1;
23681 }
23682
23683 /* Copy first desired_align bytes. */
23684 if (!issetmem)
23685 srcmem = change_address (srcmem, mode, *srcptr);
23686 destmem = change_address (destmem, mode, *destptr);
23687 modesize = GEN_INT (GET_MODE_SIZE (mode));
23688 for (n = 0; prolog_size < desired_align - align; n++)
23689 {
23690 if (issetmem)
23691 emit_move_insn (destmem, mode_value);
23692 else
23693 {
23694 emit_move_insn (destmem, srcmem);
23695 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23696 }
23697 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23698 prolog_size += GET_MODE_SIZE (mode);
23699 }
23700
23701
23702 /* Copy last SIZE bytes. */
23703 destmem = offset_address (destmem, *count, 1);
23704 destmem = offset_address (destmem,
23705 GEN_INT (-size - prolog_size),
23706 1);
23707 if (issetmem)
23708 emit_move_insn (destmem, mode_value);
23709 else
23710 {
23711 srcmem = offset_address (srcmem, *count, 1);
23712 srcmem = offset_address (srcmem,
23713 GEN_INT (-size - prolog_size),
23714 1);
23715 emit_move_insn (destmem, srcmem);
23716 }
23717 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23718 {
23719 destmem = offset_address (destmem, modesize, 1);
23720 if (issetmem)
23721 emit_move_insn (destmem, mode_value);
23722 else
23723 {
23724 srcmem = offset_address (srcmem, modesize, 1);
23725 emit_move_insn (destmem, srcmem);
23726 }
23727 }
23728
23729 /* Align destination. */
23730 if (desired_align > 1 && desired_align > align)
23731 {
23732 rtx saveddest = *destptr;
23733
23734 gcc_assert (desired_align <= size);
23735 /* Align destptr up, place it to new register. */
23736 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23737 GEN_INT (prolog_size),
23738 NULL_RTX, 1, OPTAB_DIRECT);
23739 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23740 GEN_INT (-desired_align),
23741 *destptr, 1, OPTAB_DIRECT);
23742 /* See how many bytes we skipped. */
23743 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23744 *destptr,
23745 saveddest, 1, OPTAB_DIRECT);
23746 /* Adjust srcptr and count. */
23747 if (!issetmem)
23748 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23749 *srcptr, 1, OPTAB_DIRECT);
23750 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23751 saveddest, *count, 1, OPTAB_DIRECT);
23752 /* We copied at most size + prolog_size. */
23753 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23754 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23755 else
23756 *min_size = 0;
23757
23758 /* Our loops always round down the bock size, but for dispatch to library
23759 we need precise value. */
23760 if (dynamic_check)
23761 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23762 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23763 }
23764 else
23765 {
23766 gcc_assert (prolog_size == 0);
23767 /* Decrease count, so we won't end up copying last word twice. */
23768 if (!CONST_INT_P (*count))
23769 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23770 constm1_rtx, *count, 1, OPTAB_DIRECT);
23771 else
23772 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23773 if (*min_size)
23774 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23775 }
23776 }
23777
23778
23779 /* This function is like the previous one, except here we know how many bytes
23780 need to be copied. That allows us to update alignment not only of DST, which
23781 is returned, but also of SRC, which is passed as a pointer for that
23782 reason. */
23783 static rtx
23784 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23785 rtx srcreg, rtx value, rtx vec_value,
23786 int desired_align, int align_bytes,
23787 bool issetmem)
23788 {
23789 rtx src = NULL;
23790 rtx orig_dst = dst;
23791 rtx orig_src = NULL;
23792 int piece_size = 1;
23793 int copied_bytes = 0;
23794
23795 if (!issetmem)
23796 {
23797 gcc_assert (srcp != NULL);
23798 src = *srcp;
23799 orig_src = src;
23800 }
23801
23802 for (piece_size = 1;
23803 piece_size <= desired_align && copied_bytes < align_bytes;
23804 piece_size <<= 1)
23805 {
23806 if (align_bytes & piece_size)
23807 {
23808 if (issetmem)
23809 {
23810 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23811 dst = emit_memset (dst, destreg, vec_value, piece_size);
23812 else
23813 dst = emit_memset (dst, destreg, value, piece_size);
23814 }
23815 else
23816 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23817 copied_bytes += piece_size;
23818 }
23819 }
23820 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23821 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23822 if (MEM_SIZE_KNOWN_P (orig_dst))
23823 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23824
23825 if (!issetmem)
23826 {
23827 int src_align_bytes = get_mem_align_offset (src, desired_align
23828 * BITS_PER_UNIT);
23829 if (src_align_bytes >= 0)
23830 src_align_bytes = desired_align - src_align_bytes;
23831 if (src_align_bytes >= 0)
23832 {
23833 unsigned int src_align;
23834 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23835 {
23836 if ((src_align_bytes & (src_align - 1))
23837 == (align_bytes & (src_align - 1)))
23838 break;
23839 }
23840 if (src_align > (unsigned int) desired_align)
23841 src_align = desired_align;
23842 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23843 set_mem_align (src, src_align * BITS_PER_UNIT);
23844 }
23845 if (MEM_SIZE_KNOWN_P (orig_src))
23846 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23847 *srcp = src;
23848 }
23849
23850 return dst;
23851 }
23852
23853 /* Return true if ALG can be used in current context.
23854 Assume we expand memset if MEMSET is true. */
23855 static bool
23856 alg_usable_p (enum stringop_alg alg, bool memset)
23857 {
23858 if (alg == no_stringop)
23859 return false;
23860 if (alg == vector_loop)
23861 return TARGET_SSE || TARGET_AVX;
23862 /* Algorithms using the rep prefix want at least edi and ecx;
23863 additionally, memset wants eax and memcpy wants esi. Don't
23864 consider such algorithms if the user has appropriated those
23865 registers for their own purposes. */
23866 if (alg == rep_prefix_1_byte
23867 || alg == rep_prefix_4_byte
23868 || alg == rep_prefix_8_byte)
23869 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23870 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23871 return true;
23872 }
23873
23874 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23875 static enum stringop_alg
23876 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23877 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23878 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23879 {
23880 const struct stringop_algs * algs;
23881 bool optimize_for_speed;
23882 int max = 0;
23883 const struct processor_costs *cost;
23884 int i;
23885 bool any_alg_usable_p = false;
23886
23887 *noalign = false;
23888 *dynamic_check = -1;
23889
23890 /* Even if the string operation call is cold, we still might spend a lot
23891 of time processing large blocks. */
23892 if (optimize_function_for_size_p (cfun)
23893 || (optimize_insn_for_size_p ()
23894 && (max_size < 256
23895 || (expected_size != -1 && expected_size < 256))))
23896 optimize_for_speed = false;
23897 else
23898 optimize_for_speed = true;
23899
23900 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23901 if (memset)
23902 algs = &cost->memset[TARGET_64BIT != 0];
23903 else
23904 algs = &cost->memcpy[TARGET_64BIT != 0];
23905
23906 /* See maximal size for user defined algorithm. */
23907 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23908 {
23909 enum stringop_alg candidate = algs->size[i].alg;
23910 bool usable = alg_usable_p (candidate, memset);
23911 any_alg_usable_p |= usable;
23912
23913 if (candidate != libcall && candidate && usable)
23914 max = algs->size[i].max;
23915 }
23916
23917 /* If expected size is not known but max size is small enough
23918 so inline version is a win, set expected size into
23919 the range. */
23920 if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
23921 && expected_size == -1)
23922 expected_size = min_size / 2 + max_size / 2;
23923
23924 /* If user specified the algorithm, honnor it if possible. */
23925 if (ix86_stringop_alg != no_stringop
23926 && alg_usable_p (ix86_stringop_alg, memset))
23927 return ix86_stringop_alg;
23928 /* rep; movq or rep; movl is the smallest variant. */
23929 else if (!optimize_for_speed)
23930 {
23931 *noalign = true;
23932 if (!count || (count & 3) || (memset && !zero_memset))
23933 return alg_usable_p (rep_prefix_1_byte, memset)
23934 ? rep_prefix_1_byte : loop_1_byte;
23935 else
23936 return alg_usable_p (rep_prefix_4_byte, memset)
23937 ? rep_prefix_4_byte : loop;
23938 }
23939 /* Very tiny blocks are best handled via the loop, REP is expensive to
23940 setup. */
23941 else if (expected_size != -1 && expected_size < 4)
23942 return loop_1_byte;
23943 else if (expected_size != -1)
23944 {
23945 enum stringop_alg alg = libcall;
23946 bool alg_noalign = false;
23947 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23948 {
23949 /* We get here if the algorithms that were not libcall-based
23950 were rep-prefix based and we are unable to use rep prefixes
23951 based on global register usage. Break out of the loop and
23952 use the heuristic below. */
23953 if (algs->size[i].max == 0)
23954 break;
23955 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23956 {
23957 enum stringop_alg candidate = algs->size[i].alg;
23958
23959 if (candidate != libcall && alg_usable_p (candidate, memset))
23960 {
23961 alg = candidate;
23962 alg_noalign = algs->size[i].noalign;
23963 }
23964 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23965 last non-libcall inline algorithm. */
23966 if (TARGET_INLINE_ALL_STRINGOPS)
23967 {
23968 /* When the current size is best to be copied by a libcall,
23969 but we are still forced to inline, run the heuristic below
23970 that will pick code for medium sized blocks. */
23971 if (alg != libcall)
23972 {
23973 *noalign = alg_noalign;
23974 return alg;
23975 }
23976 break;
23977 }
23978 else if (alg_usable_p (candidate, memset))
23979 {
23980 *noalign = algs->size[i].noalign;
23981 return candidate;
23982 }
23983 }
23984 }
23985 }
23986 /* When asked to inline the call anyway, try to pick meaningful choice.
23987 We look for maximal size of block that is faster to copy by hand and
23988 take blocks of at most of that size guessing that average size will
23989 be roughly half of the block.
23990
23991 If this turns out to be bad, we might simply specify the preferred
23992 choice in ix86_costs. */
23993 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23994 && (algs->unknown_size == libcall
23995 || !alg_usable_p (algs->unknown_size, memset)))
23996 {
23997 enum stringop_alg alg;
23998
23999 /* If there aren't any usable algorithms, then recursing on
24000 smaller sizes isn't going to find anything. Just return the
24001 simple byte-at-a-time copy loop. */
24002 if (!any_alg_usable_p)
24003 {
24004 /* Pick something reasonable. */
24005 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24006 *dynamic_check = 128;
24007 return loop_1_byte;
24008 }
24009 if (max <= 0)
24010 max = 4096;
24011 alg = decide_alg (count, max / 2, min_size, max_size, memset,
24012 zero_memset, dynamic_check, noalign);
24013 gcc_assert (*dynamic_check == -1);
24014 gcc_assert (alg != libcall);
24015 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24016 *dynamic_check = max;
24017 return alg;
24018 }
24019 return (alg_usable_p (algs->unknown_size, memset)
24020 ? algs->unknown_size : libcall);
24021 }
24022
24023 /* Decide on alignment. We know that the operand is already aligned to ALIGN
24024 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
24025 static int
24026 decide_alignment (int align,
24027 enum stringop_alg alg,
24028 int expected_size,
24029 enum machine_mode move_mode)
24030 {
24031 int desired_align = 0;
24032
24033 gcc_assert (alg != no_stringop);
24034
24035 if (alg == libcall)
24036 return 0;
24037 if (move_mode == VOIDmode)
24038 return 0;
24039
24040 desired_align = GET_MODE_SIZE (move_mode);
24041 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24042 copying whole cacheline at once. */
24043 if (TARGET_PENTIUMPRO
24044 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24045 desired_align = 8;
24046
24047 if (optimize_size)
24048 desired_align = 1;
24049 if (desired_align < align)
24050 desired_align = align;
24051 if (expected_size != -1 && expected_size < 4)
24052 desired_align = align;
24053
24054 return desired_align;
24055 }
24056
24057
24058 /* Helper function for memcpy. For QImode value 0xXY produce
24059 0xXYXYXYXY of wide specified by MODE. This is essentially
24060 a * 0x10101010, but we can do slightly better than
24061 synth_mult by unwinding the sequence by hand on CPUs with
24062 slow multiply. */
24063 static rtx
24064 promote_duplicated_reg (enum machine_mode mode, rtx val)
24065 {
24066 enum machine_mode valmode = GET_MODE (val);
24067 rtx tmp;
24068 int nops = mode == DImode ? 3 : 2;
24069
24070 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24071 if (val == const0_rtx)
24072 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24073 if (CONST_INT_P (val))
24074 {
24075 HOST_WIDE_INT v = INTVAL (val) & 255;
24076
24077 v |= v << 8;
24078 v |= v << 16;
24079 if (mode == DImode)
24080 v |= (v << 16) << 16;
24081 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24082 }
24083
24084 if (valmode == VOIDmode)
24085 valmode = QImode;
24086 if (valmode != QImode)
24087 val = gen_lowpart (QImode, val);
24088 if (mode == QImode)
24089 return val;
24090 if (!TARGET_PARTIAL_REG_STALL)
24091 nops--;
24092 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24093 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24094 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24095 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24096 {
24097 rtx reg = convert_modes (mode, QImode, val, true);
24098 tmp = promote_duplicated_reg (mode, const1_rtx);
24099 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24100 OPTAB_DIRECT);
24101 }
24102 else
24103 {
24104 rtx reg = convert_modes (mode, QImode, val, true);
24105
24106 if (!TARGET_PARTIAL_REG_STALL)
24107 if (mode == SImode)
24108 emit_insn (gen_movsi_insv_1 (reg, reg));
24109 else
24110 emit_insn (gen_movdi_insv_1 (reg, reg));
24111 else
24112 {
24113 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24114 NULL, 1, OPTAB_DIRECT);
24115 reg =
24116 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24117 }
24118 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24119 NULL, 1, OPTAB_DIRECT);
24120 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24121 if (mode == SImode)
24122 return reg;
24123 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24124 NULL, 1, OPTAB_DIRECT);
24125 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24126 return reg;
24127 }
24128 }
24129
24130 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24131 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24132 alignment from ALIGN to DESIRED_ALIGN. */
24133 static rtx
24134 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24135 int align)
24136 {
24137 rtx promoted_val;
24138
24139 if (TARGET_64BIT
24140 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24141 promoted_val = promote_duplicated_reg (DImode, val);
24142 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24143 promoted_val = promote_duplicated_reg (SImode, val);
24144 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24145 promoted_val = promote_duplicated_reg (HImode, val);
24146 else
24147 promoted_val = val;
24148
24149 return promoted_val;
24150 }
24151
24152 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24153 operations when profitable. The code depends upon architecture, block size
24154 and alignment, but always has one of the following overall structures:
24155
24156 Aligned move sequence:
24157
24158 1) Prologue guard: Conditional that jumps up to epilogues for small
24159 blocks that can be handled by epilogue alone. This is faster
24160 but also needed for correctness, since prologue assume the block
24161 is larger than the desired alignment.
24162
24163 Optional dynamic check for size and libcall for large
24164 blocks is emitted here too, with -minline-stringops-dynamically.
24165
24166 2) Prologue: copy first few bytes in order to get destination
24167 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24168 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24169 copied. We emit either a jump tree on power of two sized
24170 blocks, or a byte loop.
24171
24172 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24173 with specified algorithm.
24174
24175 4) Epilogue: code copying tail of the block that is too small to be
24176 handled by main body (or up to size guarded by prologue guard).
24177
24178 Misaligned move sequence
24179
24180 1) missaligned move prologue/epilogue containing:
24181 a) Prologue handling small memory blocks and jumping to done_label
24182 (skipped if blocks are known to be large enough)
24183 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24184 needed by single possibly misaligned move
24185 (skipped if alignment is not needed)
24186 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24187
24188 2) Zero size guard dispatching to done_label, if needed
24189
24190 3) dispatch to library call, if needed,
24191
24192 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24193 with specified algorithm. */
24194 bool
24195 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24196 rtx align_exp, rtx expected_align_exp,
24197 rtx expected_size_exp, rtx min_size_exp,
24198 rtx max_size_exp, rtx probable_max_size_exp,
24199 bool issetmem)
24200 {
24201 rtx destreg;
24202 rtx srcreg = NULL;
24203 rtx_code_label *label = NULL;
24204 rtx tmp;
24205 rtx_code_label *jump_around_label = NULL;
24206 HOST_WIDE_INT align = 1;
24207 unsigned HOST_WIDE_INT count = 0;
24208 HOST_WIDE_INT expected_size = -1;
24209 int size_needed = 0, epilogue_size_needed;
24210 int desired_align = 0, align_bytes = 0;
24211 enum stringop_alg alg;
24212 rtx promoted_val = NULL;
24213 rtx vec_promoted_val = NULL;
24214 bool force_loopy_epilogue = false;
24215 int dynamic_check;
24216 bool need_zero_guard = false;
24217 bool noalign;
24218 enum machine_mode move_mode = VOIDmode;
24219 int unroll_factor = 1;
24220 /* TODO: Once value ranges are available, fill in proper data. */
24221 unsigned HOST_WIDE_INT min_size = 0;
24222 unsigned HOST_WIDE_INT max_size = -1;
24223 unsigned HOST_WIDE_INT probable_max_size = -1;
24224 bool misaligned_prologue_used = false;
24225
24226 if (CONST_INT_P (align_exp))
24227 align = INTVAL (align_exp);
24228 /* i386 can do misaligned access on reasonably increased cost. */
24229 if (CONST_INT_P (expected_align_exp)
24230 && INTVAL (expected_align_exp) > align)
24231 align = INTVAL (expected_align_exp);
24232 /* ALIGN is the minimum of destination and source alignment, but we care here
24233 just about destination alignment. */
24234 else if (!issetmem
24235 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24236 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24237
24238 if (CONST_INT_P (count_exp))
24239 {
24240 min_size = max_size = probable_max_size = count = expected_size
24241 = INTVAL (count_exp);
24242 /* When COUNT is 0, there is nothing to do. */
24243 if (!count)
24244 return true;
24245 }
24246 else
24247 {
24248 if (min_size_exp)
24249 min_size = INTVAL (min_size_exp);
24250 if (max_size_exp)
24251 max_size = INTVAL (max_size_exp);
24252 if (probable_max_size_exp)
24253 probable_max_size = INTVAL (probable_max_size_exp);
24254 if (CONST_INT_P (expected_size_exp))
24255 expected_size = INTVAL (expected_size_exp);
24256 }
24257
24258 /* Make sure we don't need to care about overflow later on. */
24259 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24260 return false;
24261
24262 /* Step 0: Decide on preferred algorithm, desired alignment and
24263 size of chunks to be copied by main loop. */
24264 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24265 issetmem,
24266 issetmem && val_exp == const0_rtx,
24267 &dynamic_check, &noalign);
24268 if (alg == libcall)
24269 return false;
24270 gcc_assert (alg != no_stringop);
24271
24272 /* For now vector-version of memset is generated only for memory zeroing, as
24273 creating of promoted vector value is very cheap in this case. */
24274 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24275 alg = unrolled_loop;
24276
24277 if (!count)
24278 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24279 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24280 if (!issetmem)
24281 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24282
24283 unroll_factor = 1;
24284 move_mode = word_mode;
24285 switch (alg)
24286 {
24287 case libcall:
24288 case no_stringop:
24289 case last_alg:
24290 gcc_unreachable ();
24291 case loop_1_byte:
24292 need_zero_guard = true;
24293 move_mode = QImode;
24294 break;
24295 case loop:
24296 need_zero_guard = true;
24297 break;
24298 case unrolled_loop:
24299 need_zero_guard = true;
24300 unroll_factor = (TARGET_64BIT ? 4 : 2);
24301 break;
24302 case vector_loop:
24303 need_zero_guard = true;
24304 unroll_factor = 4;
24305 /* Find the widest supported mode. */
24306 move_mode = word_mode;
24307 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24308 != CODE_FOR_nothing)
24309 move_mode = GET_MODE_WIDER_MODE (move_mode);
24310
24311 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24312 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24313 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24314 {
24315 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24316 move_mode = mode_for_vector (word_mode, nunits);
24317 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24318 move_mode = word_mode;
24319 }
24320 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24321 break;
24322 case rep_prefix_8_byte:
24323 move_mode = DImode;
24324 break;
24325 case rep_prefix_4_byte:
24326 move_mode = SImode;
24327 break;
24328 case rep_prefix_1_byte:
24329 move_mode = QImode;
24330 break;
24331 }
24332 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24333 epilogue_size_needed = size_needed;
24334
24335 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24336 if (!TARGET_ALIGN_STRINGOPS || noalign)
24337 align = desired_align;
24338
24339 /* Step 1: Prologue guard. */
24340
24341 /* Alignment code needs count to be in register. */
24342 if (CONST_INT_P (count_exp) && desired_align > align)
24343 {
24344 if (INTVAL (count_exp) > desired_align
24345 && INTVAL (count_exp) > size_needed)
24346 {
24347 align_bytes
24348 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24349 if (align_bytes <= 0)
24350 align_bytes = 0;
24351 else
24352 align_bytes = desired_align - align_bytes;
24353 }
24354 if (align_bytes == 0)
24355 count_exp = force_reg (counter_mode (count_exp), count_exp);
24356 }
24357 gcc_assert (desired_align >= 1 && align >= 1);
24358
24359 /* Misaligned move sequences handle both prologue and epilogue at once.
24360 Default code generation results in a smaller code for large alignments
24361 and also avoids redundant job when sizes are known precisely. */
24362 misaligned_prologue_used
24363 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24364 && MAX (desired_align, epilogue_size_needed) <= 32
24365 && desired_align <= epilogue_size_needed
24366 && ((desired_align > align && !align_bytes)
24367 || (!count && epilogue_size_needed > 1)));
24368
24369 /* Do the cheap promotion to allow better CSE across the
24370 main loop and epilogue (ie one load of the big constant in the
24371 front of all code.
24372 For now the misaligned move sequences do not have fast path
24373 without broadcasting. */
24374 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24375 {
24376 if (alg == vector_loop)
24377 {
24378 gcc_assert (val_exp == const0_rtx);
24379 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24380 promoted_val = promote_duplicated_reg_to_size (val_exp,
24381 GET_MODE_SIZE (word_mode),
24382 desired_align, align);
24383 }
24384 else
24385 {
24386 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24387 desired_align, align);
24388 }
24389 }
24390 /* Misaligned move sequences handles both prologues and epilogues at once.
24391 Default code generation results in smaller code for large alignments and
24392 also avoids redundant job when sizes are known precisely. */
24393 if (misaligned_prologue_used)
24394 {
24395 /* Misaligned move prologue handled small blocks by itself. */
24396 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24397 (dst, src, &destreg, &srcreg,
24398 move_mode, promoted_val, vec_promoted_val,
24399 &count_exp,
24400 &jump_around_label,
24401 desired_align < align
24402 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24403 desired_align, align, &min_size, dynamic_check, issetmem);
24404 if (!issetmem)
24405 src = change_address (src, BLKmode, srcreg);
24406 dst = change_address (dst, BLKmode, destreg);
24407 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24408 epilogue_size_needed = 0;
24409 if (need_zero_guard && !min_size)
24410 {
24411 /* It is possible that we copied enough so the main loop will not
24412 execute. */
24413 gcc_assert (size_needed > 1);
24414 if (jump_around_label == NULL_RTX)
24415 jump_around_label = gen_label_rtx ();
24416 emit_cmp_and_jump_insns (count_exp,
24417 GEN_INT (size_needed),
24418 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24419 if (expected_size == -1
24420 || expected_size < (desired_align - align) / 2 + size_needed)
24421 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24422 else
24423 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24424 }
24425 }
24426 /* Ensure that alignment prologue won't copy past end of block. */
24427 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24428 {
24429 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24430 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24431 Make sure it is power of 2. */
24432 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24433
24434 /* To improve performance of small blocks, we jump around the VAL
24435 promoting mode. This mean that if the promoted VAL is not constant,
24436 we might not use it in the epilogue and have to use byte
24437 loop variant. */
24438 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24439 force_loopy_epilogue = true;
24440 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24441 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24442 {
24443 /* If main algorithm works on QImode, no epilogue is needed.
24444 For small sizes just don't align anything. */
24445 if (size_needed == 1)
24446 desired_align = align;
24447 else
24448 goto epilogue;
24449 }
24450 else if (!count
24451 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24452 {
24453 label = gen_label_rtx ();
24454 emit_cmp_and_jump_insns (count_exp,
24455 GEN_INT (epilogue_size_needed),
24456 LTU, 0, counter_mode (count_exp), 1, label);
24457 if (expected_size == -1 || expected_size < epilogue_size_needed)
24458 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24459 else
24460 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24461 }
24462 }
24463
24464 /* Emit code to decide on runtime whether library call or inline should be
24465 used. */
24466 if (dynamic_check != -1)
24467 {
24468 if (!issetmem && CONST_INT_P (count_exp))
24469 {
24470 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24471 {
24472 emit_block_move_via_libcall (dst, src, count_exp, false);
24473 count_exp = const0_rtx;
24474 goto epilogue;
24475 }
24476 }
24477 else
24478 {
24479 rtx_code_label *hot_label = gen_label_rtx ();
24480 if (jump_around_label == NULL_RTX)
24481 jump_around_label = gen_label_rtx ();
24482 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24483 LEU, 0, counter_mode (count_exp),
24484 1, hot_label);
24485 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24486 if (issetmem)
24487 set_storage_via_libcall (dst, count_exp, val_exp, false);
24488 else
24489 emit_block_move_via_libcall (dst, src, count_exp, false);
24490 emit_jump (jump_around_label);
24491 emit_label (hot_label);
24492 }
24493 }
24494
24495 /* Step 2: Alignment prologue. */
24496 /* Do the expensive promotion once we branched off the small blocks. */
24497 if (issetmem && !promoted_val)
24498 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24499 desired_align, align);
24500
24501 if (desired_align > align && !misaligned_prologue_used)
24502 {
24503 if (align_bytes == 0)
24504 {
24505 /* Except for the first move in prologue, we no longer know
24506 constant offset in aliasing info. It don't seems to worth
24507 the pain to maintain it for the first move, so throw away
24508 the info early. */
24509 dst = change_address (dst, BLKmode, destreg);
24510 if (!issetmem)
24511 src = change_address (src, BLKmode, srcreg);
24512 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24513 promoted_val, vec_promoted_val,
24514 count_exp, align, desired_align,
24515 issetmem);
24516 /* At most desired_align - align bytes are copied. */
24517 if (min_size < (unsigned)(desired_align - align))
24518 min_size = 0;
24519 else
24520 min_size -= desired_align - align;
24521 }
24522 else
24523 {
24524 /* If we know how many bytes need to be stored before dst is
24525 sufficiently aligned, maintain aliasing info accurately. */
24526 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24527 srcreg,
24528 promoted_val,
24529 vec_promoted_val,
24530 desired_align,
24531 align_bytes,
24532 issetmem);
24533
24534 count_exp = plus_constant (counter_mode (count_exp),
24535 count_exp, -align_bytes);
24536 count -= align_bytes;
24537 min_size -= align_bytes;
24538 max_size -= align_bytes;
24539 }
24540 if (need_zero_guard
24541 && !min_size
24542 && (count < (unsigned HOST_WIDE_INT) size_needed
24543 || (align_bytes == 0
24544 && count < ((unsigned HOST_WIDE_INT) size_needed
24545 + desired_align - align))))
24546 {
24547 /* It is possible that we copied enough so the main loop will not
24548 execute. */
24549 gcc_assert (size_needed > 1);
24550 if (label == NULL_RTX)
24551 label = gen_label_rtx ();
24552 emit_cmp_and_jump_insns (count_exp,
24553 GEN_INT (size_needed),
24554 LTU, 0, counter_mode (count_exp), 1, label);
24555 if (expected_size == -1
24556 || expected_size < (desired_align - align) / 2 + size_needed)
24557 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24558 else
24559 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24560 }
24561 }
24562 if (label && size_needed == 1)
24563 {
24564 emit_label (label);
24565 LABEL_NUSES (label) = 1;
24566 label = NULL;
24567 epilogue_size_needed = 1;
24568 if (issetmem)
24569 promoted_val = val_exp;
24570 }
24571 else if (label == NULL_RTX && !misaligned_prologue_used)
24572 epilogue_size_needed = size_needed;
24573
24574 /* Step 3: Main loop. */
24575
24576 switch (alg)
24577 {
24578 case libcall:
24579 case no_stringop:
24580 case last_alg:
24581 gcc_unreachable ();
24582 case loop_1_byte:
24583 case loop:
24584 case unrolled_loop:
24585 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24586 count_exp, move_mode, unroll_factor,
24587 expected_size, issetmem);
24588 break;
24589 case vector_loop:
24590 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24591 vec_promoted_val, count_exp, move_mode,
24592 unroll_factor, expected_size, issetmem);
24593 break;
24594 case rep_prefix_8_byte:
24595 case rep_prefix_4_byte:
24596 case rep_prefix_1_byte:
24597 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24598 val_exp, count_exp, move_mode, issetmem);
24599 break;
24600 }
24601 /* Adjust properly the offset of src and dest memory for aliasing. */
24602 if (CONST_INT_P (count_exp))
24603 {
24604 if (!issetmem)
24605 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24606 (count / size_needed) * size_needed);
24607 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24608 (count / size_needed) * size_needed);
24609 }
24610 else
24611 {
24612 if (!issetmem)
24613 src = change_address (src, BLKmode, srcreg);
24614 dst = change_address (dst, BLKmode, destreg);
24615 }
24616
24617 /* Step 4: Epilogue to copy the remaining bytes. */
24618 epilogue:
24619 if (label)
24620 {
24621 /* When the main loop is done, COUNT_EXP might hold original count,
24622 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24623 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24624 bytes. Compensate if needed. */
24625
24626 if (size_needed < epilogue_size_needed)
24627 {
24628 tmp =
24629 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24630 GEN_INT (size_needed - 1), count_exp, 1,
24631 OPTAB_DIRECT);
24632 if (tmp != count_exp)
24633 emit_move_insn (count_exp, tmp);
24634 }
24635 emit_label (label);
24636 LABEL_NUSES (label) = 1;
24637 }
24638
24639 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24640 {
24641 if (force_loopy_epilogue)
24642 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24643 epilogue_size_needed);
24644 else
24645 {
24646 if (issetmem)
24647 expand_setmem_epilogue (dst, destreg, promoted_val,
24648 vec_promoted_val, count_exp,
24649 epilogue_size_needed);
24650 else
24651 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24652 epilogue_size_needed);
24653 }
24654 }
24655 if (jump_around_label)
24656 emit_label (jump_around_label);
24657 return true;
24658 }
24659
24660
24661 /* Expand the appropriate insns for doing strlen if not just doing
24662 repnz; scasb
24663
24664 out = result, initialized with the start address
24665 align_rtx = alignment of the address.
24666 scratch = scratch register, initialized with the startaddress when
24667 not aligned, otherwise undefined
24668
24669 This is just the body. It needs the initializations mentioned above and
24670 some address computing at the end. These things are done in i386.md. */
24671
24672 static void
24673 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24674 {
24675 int align;
24676 rtx tmp;
24677 rtx_code_label *align_2_label = NULL;
24678 rtx_code_label *align_3_label = NULL;
24679 rtx_code_label *align_4_label = gen_label_rtx ();
24680 rtx_code_label *end_0_label = gen_label_rtx ();
24681 rtx mem;
24682 rtx tmpreg = gen_reg_rtx (SImode);
24683 rtx scratch = gen_reg_rtx (SImode);
24684 rtx cmp;
24685
24686 align = 0;
24687 if (CONST_INT_P (align_rtx))
24688 align = INTVAL (align_rtx);
24689
24690 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24691
24692 /* Is there a known alignment and is it less than 4? */
24693 if (align < 4)
24694 {
24695 rtx scratch1 = gen_reg_rtx (Pmode);
24696 emit_move_insn (scratch1, out);
24697 /* Is there a known alignment and is it not 2? */
24698 if (align != 2)
24699 {
24700 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24701 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24702
24703 /* Leave just the 3 lower bits. */
24704 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24705 NULL_RTX, 0, OPTAB_WIDEN);
24706
24707 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24708 Pmode, 1, align_4_label);
24709 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24710 Pmode, 1, align_2_label);
24711 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24712 Pmode, 1, align_3_label);
24713 }
24714 else
24715 {
24716 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24717 check if is aligned to 4 - byte. */
24718
24719 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24720 NULL_RTX, 0, OPTAB_WIDEN);
24721
24722 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24723 Pmode, 1, align_4_label);
24724 }
24725
24726 mem = change_address (src, QImode, out);
24727
24728 /* Now compare the bytes. */
24729
24730 /* Compare the first n unaligned byte on a byte per byte basis. */
24731 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24732 QImode, 1, end_0_label);
24733
24734 /* Increment the address. */
24735 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24736
24737 /* Not needed with an alignment of 2 */
24738 if (align != 2)
24739 {
24740 emit_label (align_2_label);
24741
24742 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24743 end_0_label);
24744
24745 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24746
24747 emit_label (align_3_label);
24748 }
24749
24750 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24751 end_0_label);
24752
24753 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24754 }
24755
24756 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24757 align this loop. It gives only huge programs, but does not help to
24758 speed up. */
24759 emit_label (align_4_label);
24760
24761 mem = change_address (src, SImode, out);
24762 emit_move_insn (scratch, mem);
24763 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24764
24765 /* This formula yields a nonzero result iff one of the bytes is zero.
24766 This saves three branches inside loop and many cycles. */
24767
24768 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24769 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24770 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24771 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24772 gen_int_mode (0x80808080, SImode)));
24773 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24774 align_4_label);
24775
24776 if (TARGET_CMOVE)
24777 {
24778 rtx reg = gen_reg_rtx (SImode);
24779 rtx reg2 = gen_reg_rtx (Pmode);
24780 emit_move_insn (reg, tmpreg);
24781 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24782
24783 /* If zero is not in the first two bytes, move two bytes forward. */
24784 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24785 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24786 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24787 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24788 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24789 reg,
24790 tmpreg)));
24791 /* Emit lea manually to avoid clobbering of flags. */
24792 emit_insn (gen_rtx_SET (SImode, reg2,
24793 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24794
24795 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24796 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24797 emit_insn (gen_rtx_SET (VOIDmode, out,
24798 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24799 reg2,
24800 out)));
24801 }
24802 else
24803 {
24804 rtx_code_label *end_2_label = gen_label_rtx ();
24805 /* Is zero in the first two bytes? */
24806
24807 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24808 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24809 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24810 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24811 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24812 pc_rtx);
24813 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24814 JUMP_LABEL (tmp) = end_2_label;
24815
24816 /* Not in the first two. Move two bytes forward. */
24817 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24818 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24819
24820 emit_label (end_2_label);
24821
24822 }
24823
24824 /* Avoid branch in fixing the byte. */
24825 tmpreg = gen_lowpart (QImode, tmpreg);
24826 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24827 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24828 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24829 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24830
24831 emit_label (end_0_label);
24832 }
24833
24834 /* Expand strlen. */
24835
24836 bool
24837 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24838 {
24839 rtx addr, scratch1, scratch2, scratch3, scratch4;
24840
24841 /* The generic case of strlen expander is long. Avoid it's
24842 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24843
24844 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24845 && !TARGET_INLINE_ALL_STRINGOPS
24846 && !optimize_insn_for_size_p ()
24847 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24848 return false;
24849
24850 addr = force_reg (Pmode, XEXP (src, 0));
24851 scratch1 = gen_reg_rtx (Pmode);
24852
24853 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24854 && !optimize_insn_for_size_p ())
24855 {
24856 /* Well it seems that some optimizer does not combine a call like
24857 foo(strlen(bar), strlen(bar));
24858 when the move and the subtraction is done here. It does calculate
24859 the length just once when these instructions are done inside of
24860 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24861 often used and I use one fewer register for the lifetime of
24862 output_strlen_unroll() this is better. */
24863
24864 emit_move_insn (out, addr);
24865
24866 ix86_expand_strlensi_unroll_1 (out, src, align);
24867
24868 /* strlensi_unroll_1 returns the address of the zero at the end of
24869 the string, like memchr(), so compute the length by subtracting
24870 the start address. */
24871 emit_insn (ix86_gen_sub3 (out, out, addr));
24872 }
24873 else
24874 {
24875 rtx unspec;
24876
24877 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24878 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24879 return false;
24880
24881 scratch2 = gen_reg_rtx (Pmode);
24882 scratch3 = gen_reg_rtx (Pmode);
24883 scratch4 = force_reg (Pmode, constm1_rtx);
24884
24885 emit_move_insn (scratch3, addr);
24886 eoschar = force_reg (QImode, eoschar);
24887
24888 src = replace_equiv_address_nv (src, scratch3);
24889
24890 /* If .md starts supporting :P, this can be done in .md. */
24891 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24892 scratch4), UNSPEC_SCAS);
24893 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24894 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24895 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24896 }
24897 return true;
24898 }
24899
24900 /* For given symbol (function) construct code to compute address of it's PLT
24901 entry in large x86-64 PIC model. */
24902 static rtx
24903 construct_plt_address (rtx symbol)
24904 {
24905 rtx tmp, unspec;
24906
24907 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24908 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24909 gcc_assert (Pmode == DImode);
24910
24911 tmp = gen_reg_rtx (Pmode);
24912 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24913
24914 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24915 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24916 return tmp;
24917 }
24918
24919 rtx
24920 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24921 rtx callarg2,
24922 rtx pop, bool sibcall)
24923 {
24924 rtx vec[3];
24925 rtx use = NULL, call;
24926 unsigned int vec_len = 0;
24927
24928 if (pop == const0_rtx)
24929 pop = NULL;
24930 gcc_assert (!TARGET_64BIT || !pop);
24931
24932 if (TARGET_MACHO && !TARGET_64BIT)
24933 {
24934 #if TARGET_MACHO
24935 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24936 fnaddr = machopic_indirect_call_target (fnaddr);
24937 #endif
24938 }
24939 else
24940 {
24941 /* Static functions and indirect calls don't need the pic register. */
24942 if (flag_pic
24943 && (!TARGET_64BIT
24944 || (ix86_cmodel == CM_LARGE_PIC
24945 && DEFAULT_ABI != MS_ABI))
24946 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24947 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24948 {
24949 use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
24950 if (ix86_use_pseudo_pic_reg ())
24951 emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
24952 pic_offset_table_rtx);
24953 }
24954 }
24955
24956 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24957 {
24958 rtx al = gen_rtx_REG (QImode, AX_REG);
24959 emit_move_insn (al, callarg2);
24960 use_reg (&use, al);
24961 }
24962
24963 if (ix86_cmodel == CM_LARGE_PIC
24964 && !TARGET_PECOFF
24965 && MEM_P (fnaddr)
24966 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24967 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24968 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24969 else if (sibcall
24970 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24971 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24972 {
24973 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24974 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24975 }
24976
24977 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24978 if (retval)
24979 call = gen_rtx_SET (VOIDmode, retval, call);
24980 vec[vec_len++] = call;
24981
24982 if (pop)
24983 {
24984 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24985 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24986 vec[vec_len++] = pop;
24987 }
24988
24989 if (TARGET_64BIT_MS_ABI
24990 && (!callarg2 || INTVAL (callarg2) != -2))
24991 {
24992 int const cregs_size
24993 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24994 int i;
24995
24996 for (i = 0; i < cregs_size; i++)
24997 {
24998 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24999 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25000
25001 clobber_reg (&use, gen_rtx_REG (mode, regno));
25002 }
25003 }
25004
25005 if (vec_len > 1)
25006 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25007 call = emit_call_insn (call);
25008 if (use)
25009 CALL_INSN_FUNCTION_USAGE (call) = use;
25010
25011 return call;
25012 }
25013
25014 /* Output the assembly for a call instruction. */
25015
25016 const char *
25017 ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25018 {
25019 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25020 bool seh_nop_p = false;
25021 const char *xasm;
25022
25023 if (SIBLING_CALL_P (insn))
25024 {
25025 if (direct_p)
25026 xasm = "jmp\t%P0";
25027 /* SEH epilogue detection requires the indirect branch case
25028 to include REX.W. */
25029 else if (TARGET_SEH)
25030 xasm = "rex.W jmp %A0";
25031 else
25032 xasm = "jmp\t%A0";
25033
25034 output_asm_insn (xasm, &call_op);
25035 return "";
25036 }
25037
25038 /* SEH unwinding can require an extra nop to be emitted in several
25039 circumstances. Determine if we have one of those. */
25040 if (TARGET_SEH)
25041 {
25042 rtx_insn *i;
25043
25044 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25045 {
25046 /* If we get to another real insn, we don't need the nop. */
25047 if (INSN_P (i))
25048 break;
25049
25050 /* If we get to the epilogue note, prevent a catch region from
25051 being adjacent to the standard epilogue sequence. If non-
25052 call-exceptions, we'll have done this during epilogue emission. */
25053 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25054 && !flag_non_call_exceptions
25055 && !can_throw_internal (insn))
25056 {
25057 seh_nop_p = true;
25058 break;
25059 }
25060 }
25061
25062 /* If we didn't find a real insn following the call, prevent the
25063 unwinder from looking into the next function. */
25064 if (i == NULL)
25065 seh_nop_p = true;
25066 }
25067
25068 if (direct_p)
25069 xasm = "call\t%P0";
25070 else
25071 xasm = "call\t%A0";
25072
25073 output_asm_insn (xasm, &call_op);
25074
25075 if (seh_nop_p)
25076 return "nop";
25077
25078 return "";
25079 }
25080 \f
25081 /* Clear stack slot assignments remembered from previous functions.
25082 This is called from INIT_EXPANDERS once before RTL is emitted for each
25083 function. */
25084
25085 static struct machine_function *
25086 ix86_init_machine_status (void)
25087 {
25088 struct machine_function *f;
25089
25090 f = ggc_cleared_alloc<machine_function> ();
25091 f->use_fast_prologue_epilogue_nregs = -1;
25092 f->call_abi = ix86_abi;
25093
25094 return f;
25095 }
25096
25097 /* Return a MEM corresponding to a stack slot with mode MODE.
25098 Allocate a new slot if necessary.
25099
25100 The RTL for a function can have several slots available: N is
25101 which slot to use. */
25102
25103 rtx
25104 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25105 {
25106 struct stack_local_entry *s;
25107
25108 gcc_assert (n < MAX_386_STACK_LOCALS);
25109
25110 for (s = ix86_stack_locals; s; s = s->next)
25111 if (s->mode == mode && s->n == n)
25112 return validize_mem (copy_rtx (s->rtl));
25113
25114 s = ggc_alloc<stack_local_entry> ();
25115 s->n = n;
25116 s->mode = mode;
25117 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25118
25119 s->next = ix86_stack_locals;
25120 ix86_stack_locals = s;
25121 return validize_mem (copy_rtx (s->rtl));
25122 }
25123
25124 static void
25125 ix86_instantiate_decls (void)
25126 {
25127 struct stack_local_entry *s;
25128
25129 for (s = ix86_stack_locals; s; s = s->next)
25130 if (s->rtl != NULL_RTX)
25131 instantiate_decl_rtl (s->rtl);
25132 }
25133 \f
25134 /* Check whether x86 address PARTS is a pc-relative address. */
25135
25136 static bool
25137 rip_relative_addr_p (struct ix86_address *parts)
25138 {
25139 rtx base, index, disp;
25140
25141 base = parts->base;
25142 index = parts->index;
25143 disp = parts->disp;
25144
25145 if (disp && !base && !index)
25146 {
25147 if (TARGET_64BIT)
25148 {
25149 rtx symbol = disp;
25150
25151 if (GET_CODE (disp) == CONST)
25152 symbol = XEXP (disp, 0);
25153 if (GET_CODE (symbol) == PLUS
25154 && CONST_INT_P (XEXP (symbol, 1)))
25155 symbol = XEXP (symbol, 0);
25156
25157 if (GET_CODE (symbol) == LABEL_REF
25158 || (GET_CODE (symbol) == SYMBOL_REF
25159 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25160 || (GET_CODE (symbol) == UNSPEC
25161 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25162 || XINT (symbol, 1) == UNSPEC_PCREL
25163 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25164 return true;
25165 }
25166 }
25167 return false;
25168 }
25169
25170 /* Calculate the length of the memory address in the instruction encoding.
25171 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25172 or other prefixes. We never generate addr32 prefix for LEA insn. */
25173
25174 int
25175 memory_address_length (rtx addr, bool lea)
25176 {
25177 struct ix86_address parts;
25178 rtx base, index, disp;
25179 int len;
25180 int ok;
25181
25182 if (GET_CODE (addr) == PRE_DEC
25183 || GET_CODE (addr) == POST_INC
25184 || GET_CODE (addr) == PRE_MODIFY
25185 || GET_CODE (addr) == POST_MODIFY)
25186 return 0;
25187
25188 ok = ix86_decompose_address (addr, &parts);
25189 gcc_assert (ok);
25190
25191 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25192
25193 /* If this is not LEA instruction, add the length of addr32 prefix. */
25194 if (TARGET_64BIT && !lea
25195 && (SImode_address_operand (addr, VOIDmode)
25196 || (parts.base && GET_MODE (parts.base) == SImode)
25197 || (parts.index && GET_MODE (parts.index) == SImode)))
25198 len++;
25199
25200 base = parts.base;
25201 index = parts.index;
25202 disp = parts.disp;
25203
25204 if (base && GET_CODE (base) == SUBREG)
25205 base = SUBREG_REG (base);
25206 if (index && GET_CODE (index) == SUBREG)
25207 index = SUBREG_REG (index);
25208
25209 gcc_assert (base == NULL_RTX || REG_P (base));
25210 gcc_assert (index == NULL_RTX || REG_P (index));
25211
25212 /* Rule of thumb:
25213 - esp as the base always wants an index,
25214 - ebp as the base always wants a displacement,
25215 - r12 as the base always wants an index,
25216 - r13 as the base always wants a displacement. */
25217
25218 /* Register Indirect. */
25219 if (base && !index && !disp)
25220 {
25221 /* esp (for its index) and ebp (for its displacement) need
25222 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25223 code. */
25224 if (base == arg_pointer_rtx
25225 || base == frame_pointer_rtx
25226 || REGNO (base) == SP_REG
25227 || REGNO (base) == BP_REG
25228 || REGNO (base) == R12_REG
25229 || REGNO (base) == R13_REG)
25230 len++;
25231 }
25232
25233 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25234 is not disp32, but disp32(%rip), so for disp32
25235 SIB byte is needed, unless print_operand_address
25236 optimizes it into disp32(%rip) or (%rip) is implied
25237 by UNSPEC. */
25238 else if (disp && !base && !index)
25239 {
25240 len += 4;
25241 if (rip_relative_addr_p (&parts))
25242 len++;
25243 }
25244 else
25245 {
25246 /* Find the length of the displacement constant. */
25247 if (disp)
25248 {
25249 if (base && satisfies_constraint_K (disp))
25250 len += 1;
25251 else
25252 len += 4;
25253 }
25254 /* ebp always wants a displacement. Similarly r13. */
25255 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25256 len++;
25257
25258 /* An index requires the two-byte modrm form.... */
25259 if (index
25260 /* ...like esp (or r12), which always wants an index. */
25261 || base == arg_pointer_rtx
25262 || base == frame_pointer_rtx
25263 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25264 len++;
25265 }
25266
25267 return len;
25268 }
25269
25270 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25271 is set, expect that insn have 8bit immediate alternative. */
25272 int
25273 ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
25274 {
25275 int len = 0;
25276 int i;
25277 extract_insn_cached (insn);
25278 for (i = recog_data.n_operands - 1; i >= 0; --i)
25279 if (CONSTANT_P (recog_data.operand[i]))
25280 {
25281 enum attr_mode mode = get_attr_mode (insn);
25282
25283 gcc_assert (!len);
25284 if (shortform && CONST_INT_P (recog_data.operand[i]))
25285 {
25286 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25287 switch (mode)
25288 {
25289 case MODE_QI:
25290 len = 1;
25291 continue;
25292 case MODE_HI:
25293 ival = trunc_int_for_mode (ival, HImode);
25294 break;
25295 case MODE_SI:
25296 ival = trunc_int_for_mode (ival, SImode);
25297 break;
25298 default:
25299 break;
25300 }
25301 if (IN_RANGE (ival, -128, 127))
25302 {
25303 len = 1;
25304 continue;
25305 }
25306 }
25307 switch (mode)
25308 {
25309 case MODE_QI:
25310 len = 1;
25311 break;
25312 case MODE_HI:
25313 len = 2;
25314 break;
25315 case MODE_SI:
25316 len = 4;
25317 break;
25318 /* Immediates for DImode instructions are encoded
25319 as 32bit sign extended values. */
25320 case MODE_DI:
25321 len = 4;
25322 break;
25323 default:
25324 fatal_insn ("unknown insn mode", insn);
25325 }
25326 }
25327 return len;
25328 }
25329
25330 /* Compute default value for "length_address" attribute. */
25331 int
25332 ix86_attr_length_address_default (rtx_insn *insn)
25333 {
25334 int i;
25335
25336 if (get_attr_type (insn) == TYPE_LEA)
25337 {
25338 rtx set = PATTERN (insn), addr;
25339
25340 if (GET_CODE (set) == PARALLEL)
25341 set = XVECEXP (set, 0, 0);
25342
25343 gcc_assert (GET_CODE (set) == SET);
25344
25345 addr = SET_SRC (set);
25346
25347 return memory_address_length (addr, true);
25348 }
25349
25350 extract_insn_cached (insn);
25351 for (i = recog_data.n_operands - 1; i >= 0; --i)
25352 if (MEM_P (recog_data.operand[i]))
25353 {
25354 constrain_operands_cached (reload_completed);
25355 if (which_alternative != -1)
25356 {
25357 const char *constraints = recog_data.constraints[i];
25358 int alt = which_alternative;
25359
25360 while (*constraints == '=' || *constraints == '+')
25361 constraints++;
25362 while (alt-- > 0)
25363 while (*constraints++ != ',')
25364 ;
25365 /* Skip ignored operands. */
25366 if (*constraints == 'X')
25367 continue;
25368 }
25369 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25370 }
25371 return 0;
25372 }
25373
25374 /* Compute default value for "length_vex" attribute. It includes
25375 2 or 3 byte VEX prefix and 1 opcode byte. */
25376
25377 int
25378 ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
25379 bool has_vex_w)
25380 {
25381 int i;
25382
25383 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25384 byte VEX prefix. */
25385 if (!has_0f_opcode || has_vex_w)
25386 return 3 + 1;
25387
25388 /* We can always use 2 byte VEX prefix in 32bit. */
25389 if (!TARGET_64BIT)
25390 return 2 + 1;
25391
25392 extract_insn_cached (insn);
25393
25394 for (i = recog_data.n_operands - 1; i >= 0; --i)
25395 if (REG_P (recog_data.operand[i]))
25396 {
25397 /* REX.W bit uses 3 byte VEX prefix. */
25398 if (GET_MODE (recog_data.operand[i]) == DImode
25399 && GENERAL_REG_P (recog_data.operand[i]))
25400 return 3 + 1;
25401 }
25402 else
25403 {
25404 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25405 if (MEM_P (recog_data.operand[i])
25406 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25407 return 3 + 1;
25408 }
25409
25410 return 2 + 1;
25411 }
25412 \f
25413 /* Return the maximum number of instructions a cpu can issue. */
25414
25415 static int
25416 ix86_issue_rate (void)
25417 {
25418 switch (ix86_tune)
25419 {
25420 case PROCESSOR_PENTIUM:
25421 case PROCESSOR_BONNELL:
25422 case PROCESSOR_SILVERMONT:
25423 case PROCESSOR_INTEL:
25424 case PROCESSOR_K6:
25425 case PROCESSOR_BTVER2:
25426 case PROCESSOR_PENTIUM4:
25427 case PROCESSOR_NOCONA:
25428 return 2;
25429
25430 case PROCESSOR_PENTIUMPRO:
25431 case PROCESSOR_ATHLON:
25432 case PROCESSOR_K8:
25433 case PROCESSOR_AMDFAM10:
25434 case PROCESSOR_GENERIC:
25435 case PROCESSOR_BTVER1:
25436 return 3;
25437
25438 case PROCESSOR_BDVER1:
25439 case PROCESSOR_BDVER2:
25440 case PROCESSOR_BDVER3:
25441 case PROCESSOR_BDVER4:
25442 case PROCESSOR_CORE2:
25443 case PROCESSOR_NEHALEM:
25444 case PROCESSOR_SANDYBRIDGE:
25445 case PROCESSOR_HASWELL:
25446 return 4;
25447
25448 default:
25449 return 1;
25450 }
25451 }
25452
25453 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25454 by DEP_INSN and nothing set by DEP_INSN. */
25455
25456 static bool
25457 ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
25458 {
25459 rtx set, set2;
25460
25461 /* Simplify the test for uninteresting insns. */
25462 if (insn_type != TYPE_SETCC
25463 && insn_type != TYPE_ICMOV
25464 && insn_type != TYPE_FCMOV
25465 && insn_type != TYPE_IBR)
25466 return false;
25467
25468 if ((set = single_set (dep_insn)) != 0)
25469 {
25470 set = SET_DEST (set);
25471 set2 = NULL_RTX;
25472 }
25473 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25474 && XVECLEN (PATTERN (dep_insn), 0) == 2
25475 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25476 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25477 {
25478 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25479 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25480 }
25481 else
25482 return false;
25483
25484 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25485 return false;
25486
25487 /* This test is true if the dependent insn reads the flags but
25488 not any other potentially set register. */
25489 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25490 return false;
25491
25492 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25493 return false;
25494
25495 return true;
25496 }
25497
25498 /* Return true iff USE_INSN has a memory address with operands set by
25499 SET_INSN. */
25500
25501 bool
25502 ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
25503 {
25504 int i;
25505 extract_insn_cached (use_insn);
25506 for (i = recog_data.n_operands - 1; i >= 0; --i)
25507 if (MEM_P (recog_data.operand[i]))
25508 {
25509 rtx addr = XEXP (recog_data.operand[i], 0);
25510 return modified_in_p (addr, set_insn) != 0;
25511 }
25512 return false;
25513 }
25514
25515 /* Helper function for exact_store_load_dependency.
25516 Return true if addr is found in insn. */
25517 static bool
25518 exact_dependency_1 (rtx addr, rtx insn)
25519 {
25520 enum rtx_code code;
25521 const char *format_ptr;
25522 int i, j;
25523
25524 code = GET_CODE (insn);
25525 switch (code)
25526 {
25527 case MEM:
25528 if (rtx_equal_p (addr, insn))
25529 return true;
25530 break;
25531 case REG:
25532 CASE_CONST_ANY:
25533 case SYMBOL_REF:
25534 case CODE_LABEL:
25535 case PC:
25536 case CC0:
25537 case EXPR_LIST:
25538 return false;
25539 default:
25540 break;
25541 }
25542
25543 format_ptr = GET_RTX_FORMAT (code);
25544 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25545 {
25546 switch (*format_ptr++)
25547 {
25548 case 'e':
25549 if (exact_dependency_1 (addr, XEXP (insn, i)))
25550 return true;
25551 break;
25552 case 'E':
25553 for (j = 0; j < XVECLEN (insn, i); j++)
25554 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25555 return true;
25556 break;
25557 }
25558 }
25559 return false;
25560 }
25561
25562 /* Return true if there exists exact dependency for store & load, i.e.
25563 the same memory address is used in them. */
25564 static bool
25565 exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
25566 {
25567 rtx set1, set2;
25568
25569 set1 = single_set (store);
25570 if (!set1)
25571 return false;
25572 if (!MEM_P (SET_DEST (set1)))
25573 return false;
25574 set2 = single_set (load);
25575 if (!set2)
25576 return false;
25577 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25578 return true;
25579 return false;
25580 }
25581
25582 static int
25583 ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
25584 {
25585 enum attr_type insn_type, dep_insn_type;
25586 enum attr_memory memory;
25587 rtx set, set2;
25588 int dep_insn_code_number;
25589
25590 /* Anti and output dependencies have zero cost on all CPUs. */
25591 if (REG_NOTE_KIND (link) != 0)
25592 return 0;
25593
25594 dep_insn_code_number = recog_memoized (dep_insn);
25595
25596 /* If we can't recognize the insns, we can't really do anything. */
25597 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25598 return cost;
25599
25600 insn_type = get_attr_type (insn);
25601 dep_insn_type = get_attr_type (dep_insn);
25602
25603 switch (ix86_tune)
25604 {
25605 case PROCESSOR_PENTIUM:
25606 /* Address Generation Interlock adds a cycle of latency. */
25607 if (insn_type == TYPE_LEA)
25608 {
25609 rtx addr = PATTERN (insn);
25610
25611 if (GET_CODE (addr) == PARALLEL)
25612 addr = XVECEXP (addr, 0, 0);
25613
25614 gcc_assert (GET_CODE (addr) == SET);
25615
25616 addr = SET_SRC (addr);
25617 if (modified_in_p (addr, dep_insn))
25618 cost += 1;
25619 }
25620 else if (ix86_agi_dependent (dep_insn, insn))
25621 cost += 1;
25622
25623 /* ??? Compares pair with jump/setcc. */
25624 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25625 cost = 0;
25626
25627 /* Floating point stores require value to be ready one cycle earlier. */
25628 if (insn_type == TYPE_FMOV
25629 && get_attr_memory (insn) == MEMORY_STORE
25630 && !ix86_agi_dependent (dep_insn, insn))
25631 cost += 1;
25632 break;
25633
25634 case PROCESSOR_PENTIUMPRO:
25635 /* INT->FP conversion is expensive. */
25636 if (get_attr_fp_int_src (dep_insn))
25637 cost += 5;
25638
25639 /* There is one cycle extra latency between an FP op and a store. */
25640 if (insn_type == TYPE_FMOV
25641 && (set = single_set (dep_insn)) != NULL_RTX
25642 && (set2 = single_set (insn)) != NULL_RTX
25643 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25644 && MEM_P (SET_DEST (set2)))
25645 cost += 1;
25646
25647 memory = get_attr_memory (insn);
25648
25649 /* Show ability of reorder buffer to hide latency of load by executing
25650 in parallel with previous instruction in case
25651 previous instruction is not needed to compute the address. */
25652 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25653 && !ix86_agi_dependent (dep_insn, insn))
25654 {
25655 /* Claim moves to take one cycle, as core can issue one load
25656 at time and the next load can start cycle later. */
25657 if (dep_insn_type == TYPE_IMOV
25658 || dep_insn_type == TYPE_FMOV)
25659 cost = 1;
25660 else if (cost > 1)
25661 cost--;
25662 }
25663 break;
25664
25665 case PROCESSOR_K6:
25666 /* The esp dependency is resolved before
25667 the instruction is really finished. */
25668 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25669 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25670 return 1;
25671
25672 /* INT->FP conversion is expensive. */
25673 if (get_attr_fp_int_src (dep_insn))
25674 cost += 5;
25675
25676 memory = get_attr_memory (insn);
25677
25678 /* Show ability of reorder buffer to hide latency of load by executing
25679 in parallel with previous instruction in case
25680 previous instruction is not needed to compute the address. */
25681 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25682 && !ix86_agi_dependent (dep_insn, insn))
25683 {
25684 /* Claim moves to take one cycle, as core can issue one load
25685 at time and the next load can start cycle later. */
25686 if (dep_insn_type == TYPE_IMOV
25687 || dep_insn_type == TYPE_FMOV)
25688 cost = 1;
25689 else if (cost > 2)
25690 cost -= 2;
25691 else
25692 cost = 1;
25693 }
25694 break;
25695
25696 case PROCESSOR_AMDFAM10:
25697 case PROCESSOR_BDVER1:
25698 case PROCESSOR_BDVER2:
25699 case PROCESSOR_BDVER3:
25700 case PROCESSOR_BDVER4:
25701 case PROCESSOR_BTVER1:
25702 case PROCESSOR_BTVER2:
25703 case PROCESSOR_GENERIC:
25704 /* Stack engine allows to execute push&pop instructions in parall. */
25705 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25706 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25707 return 0;
25708 /* FALLTHRU */
25709
25710 case PROCESSOR_ATHLON:
25711 case PROCESSOR_K8:
25712 memory = get_attr_memory (insn);
25713
25714 /* Show ability of reorder buffer to hide latency of load by executing
25715 in parallel with previous instruction in case
25716 previous instruction is not needed to compute the address. */
25717 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25718 && !ix86_agi_dependent (dep_insn, insn))
25719 {
25720 enum attr_unit unit = get_attr_unit (insn);
25721 int loadcost = 3;
25722
25723 /* Because of the difference between the length of integer and
25724 floating unit pipeline preparation stages, the memory operands
25725 for floating point are cheaper.
25726
25727 ??? For Athlon it the difference is most probably 2. */
25728 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25729 loadcost = 3;
25730 else
25731 loadcost = TARGET_ATHLON ? 2 : 0;
25732
25733 if (cost >= loadcost)
25734 cost -= loadcost;
25735 else
25736 cost = 0;
25737 }
25738 break;
25739
25740 case PROCESSOR_CORE2:
25741 case PROCESSOR_NEHALEM:
25742 case PROCESSOR_SANDYBRIDGE:
25743 case PROCESSOR_HASWELL:
25744 /* Stack engine allows to execute push&pop instructions in parall. */
25745 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25746 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25747 return 0;
25748
25749 memory = get_attr_memory (insn);
25750
25751 /* Show ability of reorder buffer to hide latency of load by executing
25752 in parallel with previous instruction in case
25753 previous instruction is not needed to compute the address. */
25754 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25755 && !ix86_agi_dependent (dep_insn, insn))
25756 {
25757 if (cost >= 4)
25758 cost -= 4;
25759 else
25760 cost = 0;
25761 }
25762 break;
25763
25764 case PROCESSOR_SILVERMONT:
25765 case PROCESSOR_INTEL:
25766 if (!reload_completed)
25767 return cost;
25768
25769 /* Increase cost of integer loads. */
25770 memory = get_attr_memory (dep_insn);
25771 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25772 {
25773 enum attr_unit unit = get_attr_unit (dep_insn);
25774 if (unit == UNIT_INTEGER && cost == 1)
25775 {
25776 if (memory == MEMORY_LOAD)
25777 cost = 3;
25778 else
25779 {
25780 /* Increase cost of ld/st for short int types only
25781 because of store forwarding issue. */
25782 rtx set = single_set (dep_insn);
25783 if (set && (GET_MODE (SET_DEST (set)) == QImode
25784 || GET_MODE (SET_DEST (set)) == HImode))
25785 {
25786 /* Increase cost of store/load insn if exact
25787 dependence exists and it is load insn. */
25788 enum attr_memory insn_memory = get_attr_memory (insn);
25789 if (insn_memory == MEMORY_LOAD
25790 && exact_store_load_dependency (dep_insn, insn))
25791 cost = 3;
25792 }
25793 }
25794 }
25795 }
25796
25797 default:
25798 break;
25799 }
25800
25801 return cost;
25802 }
25803
25804 /* How many alternative schedules to try. This should be as wide as the
25805 scheduling freedom in the DFA, but no wider. Making this value too
25806 large results extra work for the scheduler. */
25807
25808 static int
25809 ia32_multipass_dfa_lookahead (void)
25810 {
25811 switch (ix86_tune)
25812 {
25813 case PROCESSOR_PENTIUM:
25814 return 2;
25815
25816 case PROCESSOR_PENTIUMPRO:
25817 case PROCESSOR_K6:
25818 return 1;
25819
25820 case PROCESSOR_BDVER1:
25821 case PROCESSOR_BDVER2:
25822 case PROCESSOR_BDVER3:
25823 case PROCESSOR_BDVER4:
25824 /* We use lookahead value 4 for BD both before and after reload
25825 schedules. Plan is to have value 8 included for O3. */
25826 return 4;
25827
25828 case PROCESSOR_CORE2:
25829 case PROCESSOR_NEHALEM:
25830 case PROCESSOR_SANDYBRIDGE:
25831 case PROCESSOR_HASWELL:
25832 case PROCESSOR_BONNELL:
25833 case PROCESSOR_SILVERMONT:
25834 case PROCESSOR_INTEL:
25835 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25836 as many instructions can be executed on a cycle, i.e.,
25837 issue_rate. I wonder why tuning for many CPUs does not do this. */
25838 if (reload_completed)
25839 return ix86_issue_rate ();
25840 /* Don't use lookahead for pre-reload schedule to save compile time. */
25841 return 0;
25842
25843 default:
25844 return 0;
25845 }
25846 }
25847
25848 /* Return true if target platform supports macro-fusion. */
25849
25850 static bool
25851 ix86_macro_fusion_p ()
25852 {
25853 return TARGET_FUSE_CMP_AND_BRANCH;
25854 }
25855
25856 /* Check whether current microarchitecture support macro fusion
25857 for insn pair "CONDGEN + CONDJMP". Refer to
25858 "Intel Architectures Optimization Reference Manual". */
25859
25860 static bool
25861 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
25862 {
25863 rtx src, dest;
25864 enum rtx_code ccode;
25865 rtx compare_set = NULL_RTX, test_if, cond;
25866 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25867
25868 if (!any_condjump_p (condjmp))
25869 return false;
25870
25871 if (get_attr_type (condgen) != TYPE_TEST
25872 && get_attr_type (condgen) != TYPE_ICMP
25873 && get_attr_type (condgen) != TYPE_INCDEC
25874 && get_attr_type (condgen) != TYPE_ALU)
25875 return false;
25876
25877 compare_set = single_set (condgen);
25878 if (compare_set == NULL_RTX
25879 && !TARGET_FUSE_ALU_AND_BRANCH)
25880 return false;
25881
25882 if (compare_set == NULL_RTX)
25883 {
25884 int i;
25885 rtx pat = PATTERN (condgen);
25886 for (i = 0; i < XVECLEN (pat, 0); i++)
25887 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25888 {
25889 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25890 if (GET_CODE (set_src) == COMPARE)
25891 compare_set = XVECEXP (pat, 0, i);
25892 else
25893 alu_set = XVECEXP (pat, 0, i);
25894 }
25895 }
25896 if (compare_set == NULL_RTX)
25897 return false;
25898 src = SET_SRC (compare_set);
25899 if (GET_CODE (src) != COMPARE)
25900 return false;
25901
25902 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25903 supported. */
25904 if ((MEM_P (XEXP (src, 0))
25905 && CONST_INT_P (XEXP (src, 1)))
25906 || (MEM_P (XEXP (src, 1))
25907 && CONST_INT_P (XEXP (src, 0))))
25908 return false;
25909
25910 /* No fusion for RIP-relative address. */
25911 if (MEM_P (XEXP (src, 0)))
25912 addr = XEXP (XEXP (src, 0), 0);
25913 else if (MEM_P (XEXP (src, 1)))
25914 addr = XEXP (XEXP (src, 1), 0);
25915
25916 if (addr) {
25917 ix86_address parts;
25918 int ok = ix86_decompose_address (addr, &parts);
25919 gcc_assert (ok);
25920
25921 if (rip_relative_addr_p (&parts))
25922 return false;
25923 }
25924
25925 test_if = SET_SRC (pc_set (condjmp));
25926 cond = XEXP (test_if, 0);
25927 ccode = GET_CODE (cond);
25928 /* Check whether conditional jump use Sign or Overflow Flags. */
25929 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25930 && (ccode == GE
25931 || ccode == GT
25932 || ccode == LE
25933 || ccode == LT))
25934 return false;
25935
25936 /* Return true for TYPE_TEST and TYPE_ICMP. */
25937 if (get_attr_type (condgen) == TYPE_TEST
25938 || get_attr_type (condgen) == TYPE_ICMP)
25939 return true;
25940
25941 /* The following is the case that macro-fusion for alu + jmp. */
25942 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25943 return false;
25944
25945 /* No fusion for alu op with memory destination operand. */
25946 dest = SET_DEST (alu_set);
25947 if (MEM_P (dest))
25948 return false;
25949
25950 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25951 supported. */
25952 if (get_attr_type (condgen) == TYPE_INCDEC
25953 && (ccode == GEU
25954 || ccode == GTU
25955 || ccode == LEU
25956 || ccode == LTU))
25957 return false;
25958
25959 return true;
25960 }
25961
25962 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25963 execution. It is applied if
25964 (1) IMUL instruction is on the top of list;
25965 (2) There exists the only producer of independent IMUL instruction in
25966 ready list.
25967 Return index of IMUL producer if it was found and -1 otherwise. */
25968 static int
25969 do_reorder_for_imul (rtx_insn **ready, int n_ready)
25970 {
25971 rtx_insn *insn;
25972 rtx set, insn1, insn2;
25973 sd_iterator_def sd_it;
25974 dep_t dep;
25975 int index = -1;
25976 int i;
25977
25978 if (!TARGET_BONNELL)
25979 return index;
25980
25981 /* Check that IMUL instruction is on the top of ready list. */
25982 insn = ready[n_ready - 1];
25983 set = single_set (insn);
25984 if (!set)
25985 return index;
25986 if (!(GET_CODE (SET_SRC (set)) == MULT
25987 && GET_MODE (SET_SRC (set)) == SImode))
25988 return index;
25989
25990 /* Search for producer of independent IMUL instruction. */
25991 for (i = n_ready - 2; i >= 0; i--)
25992 {
25993 insn = ready[i];
25994 if (!NONDEBUG_INSN_P (insn))
25995 continue;
25996 /* Skip IMUL instruction. */
25997 insn2 = PATTERN (insn);
25998 if (GET_CODE (insn2) == PARALLEL)
25999 insn2 = XVECEXP (insn2, 0, 0);
26000 if (GET_CODE (insn2) == SET
26001 && GET_CODE (SET_SRC (insn2)) == MULT
26002 && GET_MODE (SET_SRC (insn2)) == SImode)
26003 continue;
26004
26005 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26006 {
26007 rtx con;
26008 con = DEP_CON (dep);
26009 if (!NONDEBUG_INSN_P (con))
26010 continue;
26011 insn1 = PATTERN (con);
26012 if (GET_CODE (insn1) == PARALLEL)
26013 insn1 = XVECEXP (insn1, 0, 0);
26014
26015 if (GET_CODE (insn1) == SET
26016 && GET_CODE (SET_SRC (insn1)) == MULT
26017 && GET_MODE (SET_SRC (insn1)) == SImode)
26018 {
26019 sd_iterator_def sd_it1;
26020 dep_t dep1;
26021 /* Check if there is no other dependee for IMUL. */
26022 index = i;
26023 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26024 {
26025 rtx pro;
26026 pro = DEP_PRO (dep1);
26027 if (!NONDEBUG_INSN_P (pro))
26028 continue;
26029 if (pro != insn)
26030 index = -1;
26031 }
26032 if (index >= 0)
26033 break;
26034 }
26035 }
26036 if (index >= 0)
26037 break;
26038 }
26039 return index;
26040 }
26041
26042 /* Try to find the best candidate on the top of ready list if two insns
26043 have the same priority - candidate is best if its dependees were
26044 scheduled earlier. Applied for Silvermont only.
26045 Return true if top 2 insns must be interchanged. */
26046 static bool
26047 swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26048 {
26049 rtx_insn *top = ready[n_ready - 1];
26050 rtx_insn *next = ready[n_ready - 2];
26051 rtx set;
26052 sd_iterator_def sd_it;
26053 dep_t dep;
26054 int clock1 = -1;
26055 int clock2 = -1;
26056 #define INSN_TICK(INSN) (HID (INSN)->tick)
26057
26058 if (!TARGET_SILVERMONT && !TARGET_INTEL)
26059 return false;
26060
26061 if (!NONDEBUG_INSN_P (top))
26062 return false;
26063 if (!NONJUMP_INSN_P (top))
26064 return false;
26065 if (!NONDEBUG_INSN_P (next))
26066 return false;
26067 if (!NONJUMP_INSN_P (next))
26068 return false;
26069 set = single_set (top);
26070 if (!set)
26071 return false;
26072 set = single_set (next);
26073 if (!set)
26074 return false;
26075
26076 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26077 {
26078 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26079 return false;
26080 /* Determine winner more precise. */
26081 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26082 {
26083 rtx pro;
26084 pro = DEP_PRO (dep);
26085 if (!NONDEBUG_INSN_P (pro))
26086 continue;
26087 if (INSN_TICK (pro) > clock1)
26088 clock1 = INSN_TICK (pro);
26089 }
26090 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26091 {
26092 rtx pro;
26093 pro = DEP_PRO (dep);
26094 if (!NONDEBUG_INSN_P (pro))
26095 continue;
26096 if (INSN_TICK (pro) > clock2)
26097 clock2 = INSN_TICK (pro);
26098 }
26099
26100 if (clock1 == clock2)
26101 {
26102 /* Determine winner - load must win. */
26103 enum attr_memory memory1, memory2;
26104 memory1 = get_attr_memory (top);
26105 memory2 = get_attr_memory (next);
26106 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26107 return true;
26108 }
26109 return (bool) (clock2 < clock1);
26110 }
26111 return false;
26112 #undef INSN_TICK
26113 }
26114
26115 /* Perform possible reodering of ready list for Atom/Silvermont only.
26116 Return issue rate. */
26117 static int
26118 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26119 int *pn_ready, int clock_var)
26120 {
26121 int issue_rate = -1;
26122 int n_ready = *pn_ready;
26123 int i;
26124 rtx_insn *insn;
26125 int index = -1;
26126
26127 /* Set up issue rate. */
26128 issue_rate = ix86_issue_rate ();
26129
26130 /* Do reodering for BONNELL/SILVERMONT only. */
26131 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26132 return issue_rate;
26133
26134 /* Nothing to do if ready list contains only 1 instruction. */
26135 if (n_ready <= 1)
26136 return issue_rate;
26137
26138 /* Do reodering for post-reload scheduler only. */
26139 if (!reload_completed)
26140 return issue_rate;
26141
26142 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26143 {
26144 if (sched_verbose > 1)
26145 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26146 INSN_UID (ready[index]));
26147
26148 /* Put IMUL producer (ready[index]) at the top of ready list. */
26149 insn = ready[index];
26150 for (i = index; i < n_ready - 1; i++)
26151 ready[i] = ready[i + 1];
26152 ready[n_ready - 1] = insn;
26153 return issue_rate;
26154 }
26155 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26156 {
26157 if (sched_verbose > 1)
26158 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26159 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26160 /* Swap 2 top elements of ready list. */
26161 insn = ready[n_ready - 1];
26162 ready[n_ready - 1] = ready[n_ready - 2];
26163 ready[n_ready - 2] = insn;
26164 }
26165 return issue_rate;
26166 }
26167
26168 static bool
26169 ix86_class_likely_spilled_p (reg_class_t);
26170
26171 /* Returns true if lhs of insn is HW function argument register and set up
26172 is_spilled to true if it is likely spilled HW register. */
26173 static bool
26174 insn_is_function_arg (rtx insn, bool* is_spilled)
26175 {
26176 rtx dst;
26177
26178 if (!NONDEBUG_INSN_P (insn))
26179 return false;
26180 /* Call instructions are not movable, ignore it. */
26181 if (CALL_P (insn))
26182 return false;
26183 insn = PATTERN (insn);
26184 if (GET_CODE (insn) == PARALLEL)
26185 insn = XVECEXP (insn, 0, 0);
26186 if (GET_CODE (insn) != SET)
26187 return false;
26188 dst = SET_DEST (insn);
26189 if (REG_P (dst) && HARD_REGISTER_P (dst)
26190 && ix86_function_arg_regno_p (REGNO (dst)))
26191 {
26192 /* Is it likely spilled HW register? */
26193 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26194 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26195 *is_spilled = true;
26196 return true;
26197 }
26198 return false;
26199 }
26200
26201 /* Add output dependencies for chain of function adjacent arguments if only
26202 there is a move to likely spilled HW register. Return first argument
26203 if at least one dependence was added or NULL otherwise. */
26204 static rtx_insn *
26205 add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
26206 {
26207 rtx_insn *insn;
26208 rtx_insn *last = call;
26209 rtx_insn *first_arg = NULL;
26210 bool is_spilled = false;
26211
26212 head = PREV_INSN (head);
26213
26214 /* Find nearest to call argument passing instruction. */
26215 while (true)
26216 {
26217 last = PREV_INSN (last);
26218 if (last == head)
26219 return NULL;
26220 if (!NONDEBUG_INSN_P (last))
26221 continue;
26222 if (insn_is_function_arg (last, &is_spilled))
26223 break;
26224 return NULL;
26225 }
26226
26227 first_arg = last;
26228 while (true)
26229 {
26230 insn = PREV_INSN (last);
26231 if (!INSN_P (insn))
26232 break;
26233 if (insn == head)
26234 break;
26235 if (!NONDEBUG_INSN_P (insn))
26236 {
26237 last = insn;
26238 continue;
26239 }
26240 if (insn_is_function_arg (insn, &is_spilled))
26241 {
26242 /* Add output depdendence between two function arguments if chain
26243 of output arguments contains likely spilled HW registers. */
26244 if (is_spilled)
26245 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26246 first_arg = last = insn;
26247 }
26248 else
26249 break;
26250 }
26251 if (!is_spilled)
26252 return NULL;
26253 return first_arg;
26254 }
26255
26256 /* Add output or anti dependency from insn to first_arg to restrict its code
26257 motion. */
26258 static void
26259 avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
26260 {
26261 rtx set;
26262 rtx tmp;
26263
26264 set = single_set (insn);
26265 if (!set)
26266 return;
26267 tmp = SET_DEST (set);
26268 if (REG_P (tmp))
26269 {
26270 /* Add output dependency to the first function argument. */
26271 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26272 return;
26273 }
26274 /* Add anti dependency. */
26275 add_dependence (first_arg, insn, REG_DEP_ANTI);
26276 }
26277
26278 /* Avoid cross block motion of function argument through adding dependency
26279 from the first non-jump instruction in bb. */
26280 static void
26281 add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
26282 {
26283 rtx_insn *insn = BB_END (bb);
26284
26285 while (insn)
26286 {
26287 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26288 {
26289 rtx set = single_set (insn);
26290 if (set)
26291 {
26292 avoid_func_arg_motion (arg, insn);
26293 return;
26294 }
26295 }
26296 if (insn == BB_HEAD (bb))
26297 return;
26298 insn = PREV_INSN (insn);
26299 }
26300 }
26301
26302 /* Hook for pre-reload schedule - avoid motion of function arguments
26303 passed in likely spilled HW registers. */
26304 static void
26305 ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
26306 {
26307 rtx_insn *insn;
26308 rtx_insn *first_arg = NULL;
26309 if (reload_completed)
26310 return;
26311 while (head != tail && DEBUG_INSN_P (head))
26312 head = NEXT_INSN (head);
26313 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26314 if (INSN_P (insn) && CALL_P (insn))
26315 {
26316 first_arg = add_parameter_dependencies (insn, head);
26317 if (first_arg)
26318 {
26319 /* Add dependee for first argument to predecessors if only
26320 region contains more than one block. */
26321 basic_block bb = BLOCK_FOR_INSN (insn);
26322 int rgn = CONTAINING_RGN (bb->index);
26323 int nr_blks = RGN_NR_BLOCKS (rgn);
26324 /* Skip trivial regions and region head blocks that can have
26325 predecessors outside of region. */
26326 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26327 {
26328 edge e;
26329 edge_iterator ei;
26330
26331 /* Regions are SCCs with the exception of selective
26332 scheduling with pipelining of outer blocks enabled.
26333 So also check that immediate predecessors of a non-head
26334 block are in the same region. */
26335 FOR_EACH_EDGE (e, ei, bb->preds)
26336 {
26337 /* Avoid creating of loop-carried dependencies through
26338 using topological ordering in the region. */
26339 if (rgn == CONTAINING_RGN (e->src->index)
26340 && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26341 add_dependee_for_func_arg (first_arg, e->src);
26342 }
26343 }
26344 insn = first_arg;
26345 if (insn == head)
26346 break;
26347 }
26348 }
26349 else if (first_arg)
26350 avoid_func_arg_motion (first_arg, insn);
26351 }
26352
26353 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26354 HW registers to maximum, to schedule them at soon as possible. These are
26355 moves from function argument registers at the top of the function entry
26356 and moves from function return value registers after call. */
26357 static int
26358 ix86_adjust_priority (rtx_insn *insn, int priority)
26359 {
26360 rtx set;
26361
26362 if (reload_completed)
26363 return priority;
26364
26365 if (!NONDEBUG_INSN_P (insn))
26366 return priority;
26367
26368 set = single_set (insn);
26369 if (set)
26370 {
26371 rtx tmp = SET_SRC (set);
26372 if (REG_P (tmp)
26373 && HARD_REGISTER_P (tmp)
26374 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26375 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26376 return current_sched_info->sched_max_insns_priority;
26377 }
26378
26379 return priority;
26380 }
26381
26382 /* Model decoder of Core 2/i7.
26383 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26384 track the instruction fetch block boundaries and make sure that long
26385 (9+ bytes) instructions are assigned to D0. */
26386
26387 /* Maximum length of an insn that can be handled by
26388 a secondary decoder unit. '8' for Core 2/i7. */
26389 static int core2i7_secondary_decoder_max_insn_size;
26390
26391 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26392 '16' for Core 2/i7. */
26393 static int core2i7_ifetch_block_size;
26394
26395 /* Maximum number of instructions decoder can handle per cycle.
26396 '6' for Core 2/i7. */
26397 static int core2i7_ifetch_block_max_insns;
26398
26399 typedef struct ix86_first_cycle_multipass_data_ *
26400 ix86_first_cycle_multipass_data_t;
26401 typedef const struct ix86_first_cycle_multipass_data_ *
26402 const_ix86_first_cycle_multipass_data_t;
26403
26404 /* A variable to store target state across calls to max_issue within
26405 one cycle. */
26406 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26407 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26408
26409 /* Initialize DATA. */
26410 static void
26411 core2i7_first_cycle_multipass_init (void *_data)
26412 {
26413 ix86_first_cycle_multipass_data_t data
26414 = (ix86_first_cycle_multipass_data_t) _data;
26415
26416 data->ifetch_block_len = 0;
26417 data->ifetch_block_n_insns = 0;
26418 data->ready_try_change = NULL;
26419 data->ready_try_change_size = 0;
26420 }
26421
26422 /* Advancing the cycle; reset ifetch block counts. */
26423 static void
26424 core2i7_dfa_post_advance_cycle (void)
26425 {
26426 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26427
26428 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26429
26430 data->ifetch_block_len = 0;
26431 data->ifetch_block_n_insns = 0;
26432 }
26433
26434 static int min_insn_size (rtx_insn *);
26435
26436 /* Filter out insns from ready_try that the core will not be able to issue
26437 on current cycle due to decoder. */
26438 static void
26439 core2i7_first_cycle_multipass_filter_ready_try
26440 (const_ix86_first_cycle_multipass_data_t data,
26441 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
26442 {
26443 while (n_ready--)
26444 {
26445 rtx_insn *insn;
26446 int insn_size;
26447
26448 if (ready_try[n_ready])
26449 continue;
26450
26451 insn = get_ready_element (n_ready);
26452 insn_size = min_insn_size (insn);
26453
26454 if (/* If this is a too long an insn for a secondary decoder ... */
26455 (!first_cycle_insn_p
26456 && insn_size > core2i7_secondary_decoder_max_insn_size)
26457 /* ... or it would not fit into the ifetch block ... */
26458 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26459 /* ... or the decoder is full already ... */
26460 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26461 /* ... mask the insn out. */
26462 {
26463 ready_try[n_ready] = 1;
26464
26465 if (data->ready_try_change)
26466 bitmap_set_bit (data->ready_try_change, n_ready);
26467 }
26468 }
26469 }
26470
26471 /* Prepare for a new round of multipass lookahead scheduling. */
26472 static void
26473 core2i7_first_cycle_multipass_begin (void *_data,
26474 signed char *ready_try, int n_ready,
26475 bool first_cycle_insn_p)
26476 {
26477 ix86_first_cycle_multipass_data_t data
26478 = (ix86_first_cycle_multipass_data_t) _data;
26479 const_ix86_first_cycle_multipass_data_t prev_data
26480 = ix86_first_cycle_multipass_data;
26481
26482 /* Restore the state from the end of the previous round. */
26483 data->ifetch_block_len = prev_data->ifetch_block_len;
26484 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26485
26486 /* Filter instructions that cannot be issued on current cycle due to
26487 decoder restrictions. */
26488 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26489 first_cycle_insn_p);
26490 }
26491
26492 /* INSN is being issued in current solution. Account for its impact on
26493 the decoder model. */
26494 static void
26495 core2i7_first_cycle_multipass_issue (void *_data,
26496 signed char *ready_try, int n_ready,
26497 rtx_insn *insn, const void *_prev_data)
26498 {
26499 ix86_first_cycle_multipass_data_t data
26500 = (ix86_first_cycle_multipass_data_t) _data;
26501 const_ix86_first_cycle_multipass_data_t prev_data
26502 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26503
26504 int insn_size = min_insn_size (insn);
26505
26506 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26507 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26508 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26509 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26510
26511 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26512 if (!data->ready_try_change)
26513 {
26514 data->ready_try_change = sbitmap_alloc (n_ready);
26515 data->ready_try_change_size = n_ready;
26516 }
26517 else if (data->ready_try_change_size < n_ready)
26518 {
26519 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26520 n_ready, 0);
26521 data->ready_try_change_size = n_ready;
26522 }
26523 bitmap_clear (data->ready_try_change);
26524
26525 /* Filter out insns from ready_try that the core will not be able to issue
26526 on current cycle due to decoder. */
26527 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26528 false);
26529 }
26530
26531 /* Revert the effect on ready_try. */
26532 static void
26533 core2i7_first_cycle_multipass_backtrack (const void *_data,
26534 signed char *ready_try,
26535 int n_ready ATTRIBUTE_UNUSED)
26536 {
26537 const_ix86_first_cycle_multipass_data_t data
26538 = (const_ix86_first_cycle_multipass_data_t) _data;
26539 unsigned int i = 0;
26540 sbitmap_iterator sbi;
26541
26542 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26543 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26544 {
26545 ready_try[i] = 0;
26546 }
26547 }
26548
26549 /* Save the result of multipass lookahead scheduling for the next round. */
26550 static void
26551 core2i7_first_cycle_multipass_end (const void *_data)
26552 {
26553 const_ix86_first_cycle_multipass_data_t data
26554 = (const_ix86_first_cycle_multipass_data_t) _data;
26555 ix86_first_cycle_multipass_data_t next_data
26556 = ix86_first_cycle_multipass_data;
26557
26558 if (data != NULL)
26559 {
26560 next_data->ifetch_block_len = data->ifetch_block_len;
26561 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26562 }
26563 }
26564
26565 /* Deallocate target data. */
26566 static void
26567 core2i7_first_cycle_multipass_fini (void *_data)
26568 {
26569 ix86_first_cycle_multipass_data_t data
26570 = (ix86_first_cycle_multipass_data_t) _data;
26571
26572 if (data->ready_try_change)
26573 {
26574 sbitmap_free (data->ready_try_change);
26575 data->ready_try_change = NULL;
26576 data->ready_try_change_size = 0;
26577 }
26578 }
26579
26580 /* Prepare for scheduling pass. */
26581 static void
26582 ix86_sched_init_global (FILE *, int, int)
26583 {
26584 /* Install scheduling hooks for current CPU. Some of these hooks are used
26585 in time-critical parts of the scheduler, so we only set them up when
26586 they are actually used. */
26587 switch (ix86_tune)
26588 {
26589 case PROCESSOR_CORE2:
26590 case PROCESSOR_NEHALEM:
26591 case PROCESSOR_SANDYBRIDGE:
26592 case PROCESSOR_HASWELL:
26593 /* Do not perform multipass scheduling for pre-reload schedule
26594 to save compile time. */
26595 if (reload_completed)
26596 {
26597 targetm.sched.dfa_post_advance_cycle
26598 = core2i7_dfa_post_advance_cycle;
26599 targetm.sched.first_cycle_multipass_init
26600 = core2i7_first_cycle_multipass_init;
26601 targetm.sched.first_cycle_multipass_begin
26602 = core2i7_first_cycle_multipass_begin;
26603 targetm.sched.first_cycle_multipass_issue
26604 = core2i7_first_cycle_multipass_issue;
26605 targetm.sched.first_cycle_multipass_backtrack
26606 = core2i7_first_cycle_multipass_backtrack;
26607 targetm.sched.first_cycle_multipass_end
26608 = core2i7_first_cycle_multipass_end;
26609 targetm.sched.first_cycle_multipass_fini
26610 = core2i7_first_cycle_multipass_fini;
26611
26612 /* Set decoder parameters. */
26613 core2i7_secondary_decoder_max_insn_size = 8;
26614 core2i7_ifetch_block_size = 16;
26615 core2i7_ifetch_block_max_insns = 6;
26616 break;
26617 }
26618 /* ... Fall through ... */
26619 default:
26620 targetm.sched.dfa_post_advance_cycle = NULL;
26621 targetm.sched.first_cycle_multipass_init = NULL;
26622 targetm.sched.first_cycle_multipass_begin = NULL;
26623 targetm.sched.first_cycle_multipass_issue = NULL;
26624 targetm.sched.first_cycle_multipass_backtrack = NULL;
26625 targetm.sched.first_cycle_multipass_end = NULL;
26626 targetm.sched.first_cycle_multipass_fini = NULL;
26627 break;
26628 }
26629 }
26630
26631 \f
26632 /* Compute the alignment given to a constant that is being placed in memory.
26633 EXP is the constant and ALIGN is the alignment that the object would
26634 ordinarily have.
26635 The value of this function is used instead of that alignment to align
26636 the object. */
26637
26638 int
26639 ix86_constant_alignment (tree exp, int align)
26640 {
26641 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26642 || TREE_CODE (exp) == INTEGER_CST)
26643 {
26644 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26645 return 64;
26646 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26647 return 128;
26648 }
26649 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26650 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26651 return BITS_PER_WORD;
26652
26653 return align;
26654 }
26655
26656 /* Compute the alignment for a static variable.
26657 TYPE is the data type, and ALIGN is the alignment that
26658 the object would ordinarily have. The value of this function is used
26659 instead of that alignment to align the object. */
26660
26661 int
26662 ix86_data_alignment (tree type, int align, bool opt)
26663 {
26664 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26665 for symbols from other compilation units or symbols that don't need
26666 to bind locally. In order to preserve some ABI compatibility with
26667 those compilers, ensure we don't decrease alignment from what we
26668 used to assume. */
26669
26670 int max_align_compat
26671 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26672
26673 /* A data structure, equal or greater than the size of a cache line
26674 (64 bytes in the Pentium 4 and other recent Intel processors, including
26675 processors based on Intel Core microarchitecture) should be aligned
26676 so that its base address is a multiple of a cache line size. */
26677
26678 int max_align
26679 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26680
26681 if (max_align < BITS_PER_WORD)
26682 max_align = BITS_PER_WORD;
26683
26684 if (opt
26685 && AGGREGATE_TYPE_P (type)
26686 && TYPE_SIZE (type)
26687 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26688 {
26689 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26690 && align < max_align_compat)
26691 align = max_align_compat;
26692 if (wi::geu_p (TYPE_SIZE (type), max_align)
26693 && align < max_align)
26694 align = max_align;
26695 }
26696
26697 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26698 to 16byte boundary. */
26699 if (TARGET_64BIT)
26700 {
26701 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26702 && TYPE_SIZE (type)
26703 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26704 && wi::geu_p (TYPE_SIZE (type), 128)
26705 && align < 128)
26706 return 128;
26707 }
26708
26709 if (!opt)
26710 return align;
26711
26712 if (TREE_CODE (type) == ARRAY_TYPE)
26713 {
26714 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26715 return 64;
26716 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26717 return 128;
26718 }
26719 else if (TREE_CODE (type) == COMPLEX_TYPE)
26720 {
26721
26722 if (TYPE_MODE (type) == DCmode && align < 64)
26723 return 64;
26724 if ((TYPE_MODE (type) == XCmode
26725 || TYPE_MODE (type) == TCmode) && align < 128)
26726 return 128;
26727 }
26728 else if ((TREE_CODE (type) == RECORD_TYPE
26729 || TREE_CODE (type) == UNION_TYPE
26730 || TREE_CODE (type) == QUAL_UNION_TYPE)
26731 && TYPE_FIELDS (type))
26732 {
26733 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26734 return 64;
26735 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26736 return 128;
26737 }
26738 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26739 || TREE_CODE (type) == INTEGER_TYPE)
26740 {
26741 if (TYPE_MODE (type) == DFmode && align < 64)
26742 return 64;
26743 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26744 return 128;
26745 }
26746
26747 return align;
26748 }
26749
26750 /* Compute the alignment for a local variable or a stack slot. EXP is
26751 the data type or decl itself, MODE is the widest mode available and
26752 ALIGN is the alignment that the object would ordinarily have. The
26753 value of this macro is used instead of that alignment to align the
26754 object. */
26755
26756 unsigned int
26757 ix86_local_alignment (tree exp, enum machine_mode mode,
26758 unsigned int align)
26759 {
26760 tree type, decl;
26761
26762 if (exp && DECL_P (exp))
26763 {
26764 type = TREE_TYPE (exp);
26765 decl = exp;
26766 }
26767 else
26768 {
26769 type = exp;
26770 decl = NULL;
26771 }
26772
26773 /* Don't do dynamic stack realignment for long long objects with
26774 -mpreferred-stack-boundary=2. */
26775 if (!TARGET_64BIT
26776 && align == 64
26777 && ix86_preferred_stack_boundary < 64
26778 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26779 && (!type || !TYPE_USER_ALIGN (type))
26780 && (!decl || !DECL_USER_ALIGN (decl)))
26781 align = 32;
26782
26783 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26784 register in MODE. We will return the largest alignment of XF
26785 and DF. */
26786 if (!type)
26787 {
26788 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26789 align = GET_MODE_ALIGNMENT (DFmode);
26790 return align;
26791 }
26792
26793 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26794 to 16byte boundary. Exact wording is:
26795
26796 An array uses the same alignment as its elements, except that a local or
26797 global array variable of length at least 16 bytes or
26798 a C99 variable-length array variable always has alignment of at least 16 bytes.
26799
26800 This was added to allow use of aligned SSE instructions at arrays. This
26801 rule is meant for static storage (where compiler can not do the analysis
26802 by itself). We follow it for automatic variables only when convenient.
26803 We fully control everything in the function compiled and functions from
26804 other unit can not rely on the alignment.
26805
26806 Exclude va_list type. It is the common case of local array where
26807 we can not benefit from the alignment.
26808
26809 TODO: Probably one should optimize for size only when var is not escaping. */
26810 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26811 && TARGET_SSE)
26812 {
26813 if (AGGREGATE_TYPE_P (type)
26814 && (va_list_type_node == NULL_TREE
26815 || (TYPE_MAIN_VARIANT (type)
26816 != TYPE_MAIN_VARIANT (va_list_type_node)))
26817 && TYPE_SIZE (type)
26818 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26819 && wi::geu_p (TYPE_SIZE (type), 16)
26820 && align < 128)
26821 return 128;
26822 }
26823 if (TREE_CODE (type) == ARRAY_TYPE)
26824 {
26825 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26826 return 64;
26827 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26828 return 128;
26829 }
26830 else if (TREE_CODE (type) == COMPLEX_TYPE)
26831 {
26832 if (TYPE_MODE (type) == DCmode && align < 64)
26833 return 64;
26834 if ((TYPE_MODE (type) == XCmode
26835 || TYPE_MODE (type) == TCmode) && align < 128)
26836 return 128;
26837 }
26838 else if ((TREE_CODE (type) == RECORD_TYPE
26839 || TREE_CODE (type) == UNION_TYPE
26840 || TREE_CODE (type) == QUAL_UNION_TYPE)
26841 && TYPE_FIELDS (type))
26842 {
26843 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26844 return 64;
26845 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26846 return 128;
26847 }
26848 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26849 || TREE_CODE (type) == INTEGER_TYPE)
26850 {
26851
26852 if (TYPE_MODE (type) == DFmode && align < 64)
26853 return 64;
26854 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26855 return 128;
26856 }
26857 return align;
26858 }
26859
26860 /* Compute the minimum required alignment for dynamic stack realignment
26861 purposes for a local variable, parameter or a stack slot. EXP is
26862 the data type or decl itself, MODE is its mode and ALIGN is the
26863 alignment that the object would ordinarily have. */
26864
26865 unsigned int
26866 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26867 unsigned int align)
26868 {
26869 tree type, decl;
26870
26871 if (exp && DECL_P (exp))
26872 {
26873 type = TREE_TYPE (exp);
26874 decl = exp;
26875 }
26876 else
26877 {
26878 type = exp;
26879 decl = NULL;
26880 }
26881
26882 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26883 return align;
26884
26885 /* Don't do dynamic stack realignment for long long objects with
26886 -mpreferred-stack-boundary=2. */
26887 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26888 && (!type || !TYPE_USER_ALIGN (type))
26889 && (!decl || !DECL_USER_ALIGN (decl)))
26890 return 32;
26891
26892 return align;
26893 }
26894 \f
26895 /* Find a location for the static chain incoming to a nested function.
26896 This is a register, unless all free registers are used by arguments. */
26897
26898 static rtx
26899 ix86_static_chain (const_tree fndecl, bool incoming_p)
26900 {
26901 unsigned regno;
26902
26903 if (!DECL_STATIC_CHAIN (fndecl))
26904 return NULL;
26905
26906 if (TARGET_64BIT)
26907 {
26908 /* We always use R10 in 64-bit mode. */
26909 regno = R10_REG;
26910 }
26911 else
26912 {
26913 tree fntype;
26914 unsigned int ccvt;
26915
26916 /* By default in 32-bit mode we use ECX to pass the static chain. */
26917 regno = CX_REG;
26918
26919 fntype = TREE_TYPE (fndecl);
26920 ccvt = ix86_get_callcvt (fntype);
26921 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26922 {
26923 /* Fastcall functions use ecx/edx for arguments, which leaves
26924 us with EAX for the static chain.
26925 Thiscall functions use ecx for arguments, which also
26926 leaves us with EAX for the static chain. */
26927 regno = AX_REG;
26928 }
26929 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26930 {
26931 /* Thiscall functions use ecx for arguments, which leaves
26932 us with EAX and EDX for the static chain.
26933 We are using for abi-compatibility EAX. */
26934 regno = AX_REG;
26935 }
26936 else if (ix86_function_regparm (fntype, fndecl) == 3)
26937 {
26938 /* For regparm 3, we have no free call-clobbered registers in
26939 which to store the static chain. In order to implement this,
26940 we have the trampoline push the static chain to the stack.
26941 However, we can't push a value below the return address when
26942 we call the nested function directly, so we have to use an
26943 alternate entry point. For this we use ESI, and have the
26944 alternate entry point push ESI, so that things appear the
26945 same once we're executing the nested function. */
26946 if (incoming_p)
26947 {
26948 if (fndecl == current_function_decl)
26949 ix86_static_chain_on_stack = true;
26950 return gen_frame_mem (SImode,
26951 plus_constant (Pmode,
26952 arg_pointer_rtx, -8));
26953 }
26954 regno = SI_REG;
26955 }
26956 }
26957
26958 return gen_rtx_REG (Pmode, regno);
26959 }
26960
26961 /* Emit RTL insns to initialize the variable parts of a trampoline.
26962 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26963 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26964 to be passed to the target function. */
26965
26966 static void
26967 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26968 {
26969 rtx mem, fnaddr;
26970 int opcode;
26971 int offset = 0;
26972
26973 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26974
26975 if (TARGET_64BIT)
26976 {
26977 int size;
26978
26979 /* Load the function address to r11. Try to load address using
26980 the shorter movl instead of movabs. We may want to support
26981 movq for kernel mode, but kernel does not use trampolines at
26982 the moment. FNADDR is a 32bit address and may not be in
26983 DImode when ptr_mode == SImode. Always use movl in this
26984 case. */
26985 if (ptr_mode == SImode
26986 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26987 {
26988 fnaddr = copy_addr_to_reg (fnaddr);
26989
26990 mem = adjust_address (m_tramp, HImode, offset);
26991 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26992
26993 mem = adjust_address (m_tramp, SImode, offset + 2);
26994 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26995 offset += 6;
26996 }
26997 else
26998 {
26999 mem = adjust_address (m_tramp, HImode, offset);
27000 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27001
27002 mem = adjust_address (m_tramp, DImode, offset + 2);
27003 emit_move_insn (mem, fnaddr);
27004 offset += 10;
27005 }
27006
27007 /* Load static chain using movabs to r10. Use the shorter movl
27008 instead of movabs when ptr_mode == SImode. */
27009 if (ptr_mode == SImode)
27010 {
27011 opcode = 0xba41;
27012 size = 6;
27013 }
27014 else
27015 {
27016 opcode = 0xba49;
27017 size = 10;
27018 }
27019
27020 mem = adjust_address (m_tramp, HImode, offset);
27021 emit_move_insn (mem, gen_int_mode (opcode, HImode));
27022
27023 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27024 emit_move_insn (mem, chain_value);
27025 offset += size;
27026
27027 /* Jump to r11; the last (unused) byte is a nop, only there to
27028 pad the write out to a single 32-bit store. */
27029 mem = adjust_address (m_tramp, SImode, offset);
27030 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27031 offset += 4;
27032 }
27033 else
27034 {
27035 rtx disp, chain;
27036
27037 /* Depending on the static chain location, either load a register
27038 with a constant, or push the constant to the stack. All of the
27039 instructions are the same size. */
27040 chain = ix86_static_chain (fndecl, true);
27041 if (REG_P (chain))
27042 {
27043 switch (REGNO (chain))
27044 {
27045 case AX_REG:
27046 opcode = 0xb8; break;
27047 case CX_REG:
27048 opcode = 0xb9; break;
27049 default:
27050 gcc_unreachable ();
27051 }
27052 }
27053 else
27054 opcode = 0x68;
27055
27056 mem = adjust_address (m_tramp, QImode, offset);
27057 emit_move_insn (mem, gen_int_mode (opcode, QImode));
27058
27059 mem = adjust_address (m_tramp, SImode, offset + 1);
27060 emit_move_insn (mem, chain_value);
27061 offset += 5;
27062
27063 mem = adjust_address (m_tramp, QImode, offset);
27064 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27065
27066 mem = adjust_address (m_tramp, SImode, offset + 1);
27067
27068 /* Compute offset from the end of the jmp to the target function.
27069 In the case in which the trampoline stores the static chain on
27070 the stack, we need to skip the first insn which pushes the
27071 (call-saved) register static chain; this push is 1 byte. */
27072 offset += 5;
27073 disp = expand_binop (SImode, sub_optab, fnaddr,
27074 plus_constant (Pmode, XEXP (m_tramp, 0),
27075 offset - (MEM_P (chain) ? 1 : 0)),
27076 NULL_RTX, 1, OPTAB_DIRECT);
27077 emit_move_insn (mem, disp);
27078 }
27079
27080 gcc_assert (offset <= TRAMPOLINE_SIZE);
27081
27082 #ifdef HAVE_ENABLE_EXECUTE_STACK
27083 #ifdef CHECK_EXECUTE_STACK_ENABLED
27084 if (CHECK_EXECUTE_STACK_ENABLED)
27085 #endif
27086 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27087 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27088 #endif
27089 }
27090 \f
27091 /* The following file contains several enumerations and data structures
27092 built from the definitions in i386-builtin-types.def. */
27093
27094 #include "i386-builtin-types.inc"
27095
27096 /* Table for the ix86 builtin non-function types. */
27097 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27098
27099 /* Retrieve an element from the above table, building some of
27100 the types lazily. */
27101
27102 static tree
27103 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27104 {
27105 unsigned int index;
27106 tree type, itype;
27107
27108 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27109
27110 type = ix86_builtin_type_tab[(int) tcode];
27111 if (type != NULL)
27112 return type;
27113
27114 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27115 if (tcode <= IX86_BT_LAST_VECT)
27116 {
27117 enum machine_mode mode;
27118
27119 index = tcode - IX86_BT_LAST_PRIM - 1;
27120 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27121 mode = ix86_builtin_type_vect_mode[index];
27122
27123 type = build_vector_type_for_mode (itype, mode);
27124 }
27125 else
27126 {
27127 int quals;
27128
27129 index = tcode - IX86_BT_LAST_VECT - 1;
27130 if (tcode <= IX86_BT_LAST_PTR)
27131 quals = TYPE_UNQUALIFIED;
27132 else
27133 quals = TYPE_QUAL_CONST;
27134
27135 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27136 if (quals != TYPE_UNQUALIFIED)
27137 itype = build_qualified_type (itype, quals);
27138
27139 type = build_pointer_type (itype);
27140 }
27141
27142 ix86_builtin_type_tab[(int) tcode] = type;
27143 return type;
27144 }
27145
27146 /* Table for the ix86 builtin function types. */
27147 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27148
27149 /* Retrieve an element from the above table, building some of
27150 the types lazily. */
27151
27152 static tree
27153 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27154 {
27155 tree type;
27156
27157 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27158
27159 type = ix86_builtin_func_type_tab[(int) tcode];
27160 if (type != NULL)
27161 return type;
27162
27163 if (tcode <= IX86_BT_LAST_FUNC)
27164 {
27165 unsigned start = ix86_builtin_func_start[(int) tcode];
27166 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27167 tree rtype, atype, args = void_list_node;
27168 unsigned i;
27169
27170 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27171 for (i = after - 1; i > start; --i)
27172 {
27173 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27174 args = tree_cons (NULL, atype, args);
27175 }
27176
27177 type = build_function_type (rtype, args);
27178 }
27179 else
27180 {
27181 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27182 enum ix86_builtin_func_type icode;
27183
27184 icode = ix86_builtin_func_alias_base[index];
27185 type = ix86_get_builtin_func_type (icode);
27186 }
27187
27188 ix86_builtin_func_type_tab[(int) tcode] = type;
27189 return type;
27190 }
27191
27192
27193 /* Codes for all the SSE/MMX builtins. */
27194 enum ix86_builtins
27195 {
27196 IX86_BUILTIN_ADDPS,
27197 IX86_BUILTIN_ADDSS,
27198 IX86_BUILTIN_DIVPS,
27199 IX86_BUILTIN_DIVSS,
27200 IX86_BUILTIN_MULPS,
27201 IX86_BUILTIN_MULSS,
27202 IX86_BUILTIN_SUBPS,
27203 IX86_BUILTIN_SUBSS,
27204
27205 IX86_BUILTIN_CMPEQPS,
27206 IX86_BUILTIN_CMPLTPS,
27207 IX86_BUILTIN_CMPLEPS,
27208 IX86_BUILTIN_CMPGTPS,
27209 IX86_BUILTIN_CMPGEPS,
27210 IX86_BUILTIN_CMPNEQPS,
27211 IX86_BUILTIN_CMPNLTPS,
27212 IX86_BUILTIN_CMPNLEPS,
27213 IX86_BUILTIN_CMPNGTPS,
27214 IX86_BUILTIN_CMPNGEPS,
27215 IX86_BUILTIN_CMPORDPS,
27216 IX86_BUILTIN_CMPUNORDPS,
27217 IX86_BUILTIN_CMPEQSS,
27218 IX86_BUILTIN_CMPLTSS,
27219 IX86_BUILTIN_CMPLESS,
27220 IX86_BUILTIN_CMPNEQSS,
27221 IX86_BUILTIN_CMPNLTSS,
27222 IX86_BUILTIN_CMPNLESS,
27223 IX86_BUILTIN_CMPORDSS,
27224 IX86_BUILTIN_CMPUNORDSS,
27225
27226 IX86_BUILTIN_COMIEQSS,
27227 IX86_BUILTIN_COMILTSS,
27228 IX86_BUILTIN_COMILESS,
27229 IX86_BUILTIN_COMIGTSS,
27230 IX86_BUILTIN_COMIGESS,
27231 IX86_BUILTIN_COMINEQSS,
27232 IX86_BUILTIN_UCOMIEQSS,
27233 IX86_BUILTIN_UCOMILTSS,
27234 IX86_BUILTIN_UCOMILESS,
27235 IX86_BUILTIN_UCOMIGTSS,
27236 IX86_BUILTIN_UCOMIGESS,
27237 IX86_BUILTIN_UCOMINEQSS,
27238
27239 IX86_BUILTIN_CVTPI2PS,
27240 IX86_BUILTIN_CVTPS2PI,
27241 IX86_BUILTIN_CVTSI2SS,
27242 IX86_BUILTIN_CVTSI642SS,
27243 IX86_BUILTIN_CVTSS2SI,
27244 IX86_BUILTIN_CVTSS2SI64,
27245 IX86_BUILTIN_CVTTPS2PI,
27246 IX86_BUILTIN_CVTTSS2SI,
27247 IX86_BUILTIN_CVTTSS2SI64,
27248
27249 IX86_BUILTIN_MAXPS,
27250 IX86_BUILTIN_MAXSS,
27251 IX86_BUILTIN_MINPS,
27252 IX86_BUILTIN_MINSS,
27253
27254 IX86_BUILTIN_LOADUPS,
27255 IX86_BUILTIN_STOREUPS,
27256 IX86_BUILTIN_MOVSS,
27257
27258 IX86_BUILTIN_MOVHLPS,
27259 IX86_BUILTIN_MOVLHPS,
27260 IX86_BUILTIN_LOADHPS,
27261 IX86_BUILTIN_LOADLPS,
27262 IX86_BUILTIN_STOREHPS,
27263 IX86_BUILTIN_STORELPS,
27264
27265 IX86_BUILTIN_MASKMOVQ,
27266 IX86_BUILTIN_MOVMSKPS,
27267 IX86_BUILTIN_PMOVMSKB,
27268
27269 IX86_BUILTIN_MOVNTPS,
27270 IX86_BUILTIN_MOVNTQ,
27271
27272 IX86_BUILTIN_LOADDQU,
27273 IX86_BUILTIN_STOREDQU,
27274
27275 IX86_BUILTIN_PACKSSWB,
27276 IX86_BUILTIN_PACKSSDW,
27277 IX86_BUILTIN_PACKUSWB,
27278
27279 IX86_BUILTIN_PADDB,
27280 IX86_BUILTIN_PADDW,
27281 IX86_BUILTIN_PADDD,
27282 IX86_BUILTIN_PADDQ,
27283 IX86_BUILTIN_PADDSB,
27284 IX86_BUILTIN_PADDSW,
27285 IX86_BUILTIN_PADDUSB,
27286 IX86_BUILTIN_PADDUSW,
27287 IX86_BUILTIN_PSUBB,
27288 IX86_BUILTIN_PSUBW,
27289 IX86_BUILTIN_PSUBD,
27290 IX86_BUILTIN_PSUBQ,
27291 IX86_BUILTIN_PSUBSB,
27292 IX86_BUILTIN_PSUBSW,
27293 IX86_BUILTIN_PSUBUSB,
27294 IX86_BUILTIN_PSUBUSW,
27295
27296 IX86_BUILTIN_PAND,
27297 IX86_BUILTIN_PANDN,
27298 IX86_BUILTIN_POR,
27299 IX86_BUILTIN_PXOR,
27300
27301 IX86_BUILTIN_PAVGB,
27302 IX86_BUILTIN_PAVGW,
27303
27304 IX86_BUILTIN_PCMPEQB,
27305 IX86_BUILTIN_PCMPEQW,
27306 IX86_BUILTIN_PCMPEQD,
27307 IX86_BUILTIN_PCMPGTB,
27308 IX86_BUILTIN_PCMPGTW,
27309 IX86_BUILTIN_PCMPGTD,
27310
27311 IX86_BUILTIN_PMADDWD,
27312
27313 IX86_BUILTIN_PMAXSW,
27314 IX86_BUILTIN_PMAXUB,
27315 IX86_BUILTIN_PMINSW,
27316 IX86_BUILTIN_PMINUB,
27317
27318 IX86_BUILTIN_PMULHUW,
27319 IX86_BUILTIN_PMULHW,
27320 IX86_BUILTIN_PMULLW,
27321
27322 IX86_BUILTIN_PSADBW,
27323 IX86_BUILTIN_PSHUFW,
27324
27325 IX86_BUILTIN_PSLLW,
27326 IX86_BUILTIN_PSLLD,
27327 IX86_BUILTIN_PSLLQ,
27328 IX86_BUILTIN_PSRAW,
27329 IX86_BUILTIN_PSRAD,
27330 IX86_BUILTIN_PSRLW,
27331 IX86_BUILTIN_PSRLD,
27332 IX86_BUILTIN_PSRLQ,
27333 IX86_BUILTIN_PSLLWI,
27334 IX86_BUILTIN_PSLLDI,
27335 IX86_BUILTIN_PSLLQI,
27336 IX86_BUILTIN_PSRAWI,
27337 IX86_BUILTIN_PSRADI,
27338 IX86_BUILTIN_PSRLWI,
27339 IX86_BUILTIN_PSRLDI,
27340 IX86_BUILTIN_PSRLQI,
27341
27342 IX86_BUILTIN_PUNPCKHBW,
27343 IX86_BUILTIN_PUNPCKHWD,
27344 IX86_BUILTIN_PUNPCKHDQ,
27345 IX86_BUILTIN_PUNPCKLBW,
27346 IX86_BUILTIN_PUNPCKLWD,
27347 IX86_BUILTIN_PUNPCKLDQ,
27348
27349 IX86_BUILTIN_SHUFPS,
27350
27351 IX86_BUILTIN_RCPPS,
27352 IX86_BUILTIN_RCPSS,
27353 IX86_BUILTIN_RSQRTPS,
27354 IX86_BUILTIN_RSQRTPS_NR,
27355 IX86_BUILTIN_RSQRTSS,
27356 IX86_BUILTIN_RSQRTF,
27357 IX86_BUILTIN_SQRTPS,
27358 IX86_BUILTIN_SQRTPS_NR,
27359 IX86_BUILTIN_SQRTSS,
27360
27361 IX86_BUILTIN_UNPCKHPS,
27362 IX86_BUILTIN_UNPCKLPS,
27363
27364 IX86_BUILTIN_ANDPS,
27365 IX86_BUILTIN_ANDNPS,
27366 IX86_BUILTIN_ORPS,
27367 IX86_BUILTIN_XORPS,
27368
27369 IX86_BUILTIN_EMMS,
27370 IX86_BUILTIN_LDMXCSR,
27371 IX86_BUILTIN_STMXCSR,
27372 IX86_BUILTIN_SFENCE,
27373
27374 IX86_BUILTIN_FXSAVE,
27375 IX86_BUILTIN_FXRSTOR,
27376 IX86_BUILTIN_FXSAVE64,
27377 IX86_BUILTIN_FXRSTOR64,
27378
27379 IX86_BUILTIN_XSAVE,
27380 IX86_BUILTIN_XRSTOR,
27381 IX86_BUILTIN_XSAVE64,
27382 IX86_BUILTIN_XRSTOR64,
27383
27384 IX86_BUILTIN_XSAVEOPT,
27385 IX86_BUILTIN_XSAVEOPT64,
27386
27387 IX86_BUILTIN_XSAVEC,
27388 IX86_BUILTIN_XSAVEC64,
27389
27390 IX86_BUILTIN_XSAVES,
27391 IX86_BUILTIN_XRSTORS,
27392 IX86_BUILTIN_XSAVES64,
27393 IX86_BUILTIN_XRSTORS64,
27394
27395 /* 3DNow! Original */
27396 IX86_BUILTIN_FEMMS,
27397 IX86_BUILTIN_PAVGUSB,
27398 IX86_BUILTIN_PF2ID,
27399 IX86_BUILTIN_PFACC,
27400 IX86_BUILTIN_PFADD,
27401 IX86_BUILTIN_PFCMPEQ,
27402 IX86_BUILTIN_PFCMPGE,
27403 IX86_BUILTIN_PFCMPGT,
27404 IX86_BUILTIN_PFMAX,
27405 IX86_BUILTIN_PFMIN,
27406 IX86_BUILTIN_PFMUL,
27407 IX86_BUILTIN_PFRCP,
27408 IX86_BUILTIN_PFRCPIT1,
27409 IX86_BUILTIN_PFRCPIT2,
27410 IX86_BUILTIN_PFRSQIT1,
27411 IX86_BUILTIN_PFRSQRT,
27412 IX86_BUILTIN_PFSUB,
27413 IX86_BUILTIN_PFSUBR,
27414 IX86_BUILTIN_PI2FD,
27415 IX86_BUILTIN_PMULHRW,
27416
27417 /* 3DNow! Athlon Extensions */
27418 IX86_BUILTIN_PF2IW,
27419 IX86_BUILTIN_PFNACC,
27420 IX86_BUILTIN_PFPNACC,
27421 IX86_BUILTIN_PI2FW,
27422 IX86_BUILTIN_PSWAPDSI,
27423 IX86_BUILTIN_PSWAPDSF,
27424
27425 /* SSE2 */
27426 IX86_BUILTIN_ADDPD,
27427 IX86_BUILTIN_ADDSD,
27428 IX86_BUILTIN_DIVPD,
27429 IX86_BUILTIN_DIVSD,
27430 IX86_BUILTIN_MULPD,
27431 IX86_BUILTIN_MULSD,
27432 IX86_BUILTIN_SUBPD,
27433 IX86_BUILTIN_SUBSD,
27434
27435 IX86_BUILTIN_CMPEQPD,
27436 IX86_BUILTIN_CMPLTPD,
27437 IX86_BUILTIN_CMPLEPD,
27438 IX86_BUILTIN_CMPGTPD,
27439 IX86_BUILTIN_CMPGEPD,
27440 IX86_BUILTIN_CMPNEQPD,
27441 IX86_BUILTIN_CMPNLTPD,
27442 IX86_BUILTIN_CMPNLEPD,
27443 IX86_BUILTIN_CMPNGTPD,
27444 IX86_BUILTIN_CMPNGEPD,
27445 IX86_BUILTIN_CMPORDPD,
27446 IX86_BUILTIN_CMPUNORDPD,
27447 IX86_BUILTIN_CMPEQSD,
27448 IX86_BUILTIN_CMPLTSD,
27449 IX86_BUILTIN_CMPLESD,
27450 IX86_BUILTIN_CMPNEQSD,
27451 IX86_BUILTIN_CMPNLTSD,
27452 IX86_BUILTIN_CMPNLESD,
27453 IX86_BUILTIN_CMPORDSD,
27454 IX86_BUILTIN_CMPUNORDSD,
27455
27456 IX86_BUILTIN_COMIEQSD,
27457 IX86_BUILTIN_COMILTSD,
27458 IX86_BUILTIN_COMILESD,
27459 IX86_BUILTIN_COMIGTSD,
27460 IX86_BUILTIN_COMIGESD,
27461 IX86_BUILTIN_COMINEQSD,
27462 IX86_BUILTIN_UCOMIEQSD,
27463 IX86_BUILTIN_UCOMILTSD,
27464 IX86_BUILTIN_UCOMILESD,
27465 IX86_BUILTIN_UCOMIGTSD,
27466 IX86_BUILTIN_UCOMIGESD,
27467 IX86_BUILTIN_UCOMINEQSD,
27468
27469 IX86_BUILTIN_MAXPD,
27470 IX86_BUILTIN_MAXSD,
27471 IX86_BUILTIN_MINPD,
27472 IX86_BUILTIN_MINSD,
27473
27474 IX86_BUILTIN_ANDPD,
27475 IX86_BUILTIN_ANDNPD,
27476 IX86_BUILTIN_ORPD,
27477 IX86_BUILTIN_XORPD,
27478
27479 IX86_BUILTIN_SQRTPD,
27480 IX86_BUILTIN_SQRTSD,
27481
27482 IX86_BUILTIN_UNPCKHPD,
27483 IX86_BUILTIN_UNPCKLPD,
27484
27485 IX86_BUILTIN_SHUFPD,
27486
27487 IX86_BUILTIN_LOADUPD,
27488 IX86_BUILTIN_STOREUPD,
27489 IX86_BUILTIN_MOVSD,
27490
27491 IX86_BUILTIN_LOADHPD,
27492 IX86_BUILTIN_LOADLPD,
27493
27494 IX86_BUILTIN_CVTDQ2PD,
27495 IX86_BUILTIN_CVTDQ2PS,
27496
27497 IX86_BUILTIN_CVTPD2DQ,
27498 IX86_BUILTIN_CVTPD2PI,
27499 IX86_BUILTIN_CVTPD2PS,
27500 IX86_BUILTIN_CVTTPD2DQ,
27501 IX86_BUILTIN_CVTTPD2PI,
27502
27503 IX86_BUILTIN_CVTPI2PD,
27504 IX86_BUILTIN_CVTSI2SD,
27505 IX86_BUILTIN_CVTSI642SD,
27506
27507 IX86_BUILTIN_CVTSD2SI,
27508 IX86_BUILTIN_CVTSD2SI64,
27509 IX86_BUILTIN_CVTSD2SS,
27510 IX86_BUILTIN_CVTSS2SD,
27511 IX86_BUILTIN_CVTTSD2SI,
27512 IX86_BUILTIN_CVTTSD2SI64,
27513
27514 IX86_BUILTIN_CVTPS2DQ,
27515 IX86_BUILTIN_CVTPS2PD,
27516 IX86_BUILTIN_CVTTPS2DQ,
27517
27518 IX86_BUILTIN_MOVNTI,
27519 IX86_BUILTIN_MOVNTI64,
27520 IX86_BUILTIN_MOVNTPD,
27521 IX86_BUILTIN_MOVNTDQ,
27522
27523 IX86_BUILTIN_MOVQ128,
27524
27525 /* SSE2 MMX */
27526 IX86_BUILTIN_MASKMOVDQU,
27527 IX86_BUILTIN_MOVMSKPD,
27528 IX86_BUILTIN_PMOVMSKB128,
27529
27530 IX86_BUILTIN_PACKSSWB128,
27531 IX86_BUILTIN_PACKSSDW128,
27532 IX86_BUILTIN_PACKUSWB128,
27533
27534 IX86_BUILTIN_PADDB128,
27535 IX86_BUILTIN_PADDW128,
27536 IX86_BUILTIN_PADDD128,
27537 IX86_BUILTIN_PADDQ128,
27538 IX86_BUILTIN_PADDSB128,
27539 IX86_BUILTIN_PADDSW128,
27540 IX86_BUILTIN_PADDUSB128,
27541 IX86_BUILTIN_PADDUSW128,
27542 IX86_BUILTIN_PSUBB128,
27543 IX86_BUILTIN_PSUBW128,
27544 IX86_BUILTIN_PSUBD128,
27545 IX86_BUILTIN_PSUBQ128,
27546 IX86_BUILTIN_PSUBSB128,
27547 IX86_BUILTIN_PSUBSW128,
27548 IX86_BUILTIN_PSUBUSB128,
27549 IX86_BUILTIN_PSUBUSW128,
27550
27551 IX86_BUILTIN_PAND128,
27552 IX86_BUILTIN_PANDN128,
27553 IX86_BUILTIN_POR128,
27554 IX86_BUILTIN_PXOR128,
27555
27556 IX86_BUILTIN_PAVGB128,
27557 IX86_BUILTIN_PAVGW128,
27558
27559 IX86_BUILTIN_PCMPEQB128,
27560 IX86_BUILTIN_PCMPEQW128,
27561 IX86_BUILTIN_PCMPEQD128,
27562 IX86_BUILTIN_PCMPGTB128,
27563 IX86_BUILTIN_PCMPGTW128,
27564 IX86_BUILTIN_PCMPGTD128,
27565
27566 IX86_BUILTIN_PMADDWD128,
27567
27568 IX86_BUILTIN_PMAXSW128,
27569 IX86_BUILTIN_PMAXUB128,
27570 IX86_BUILTIN_PMINSW128,
27571 IX86_BUILTIN_PMINUB128,
27572
27573 IX86_BUILTIN_PMULUDQ,
27574 IX86_BUILTIN_PMULUDQ128,
27575 IX86_BUILTIN_PMULHUW128,
27576 IX86_BUILTIN_PMULHW128,
27577 IX86_BUILTIN_PMULLW128,
27578
27579 IX86_BUILTIN_PSADBW128,
27580 IX86_BUILTIN_PSHUFHW,
27581 IX86_BUILTIN_PSHUFLW,
27582 IX86_BUILTIN_PSHUFD,
27583
27584 IX86_BUILTIN_PSLLDQI128,
27585 IX86_BUILTIN_PSLLWI128,
27586 IX86_BUILTIN_PSLLDI128,
27587 IX86_BUILTIN_PSLLQI128,
27588 IX86_BUILTIN_PSRAWI128,
27589 IX86_BUILTIN_PSRADI128,
27590 IX86_BUILTIN_PSRLDQI128,
27591 IX86_BUILTIN_PSRLWI128,
27592 IX86_BUILTIN_PSRLDI128,
27593 IX86_BUILTIN_PSRLQI128,
27594
27595 IX86_BUILTIN_PSLLDQ128,
27596 IX86_BUILTIN_PSLLW128,
27597 IX86_BUILTIN_PSLLD128,
27598 IX86_BUILTIN_PSLLQ128,
27599 IX86_BUILTIN_PSRAW128,
27600 IX86_BUILTIN_PSRAD128,
27601 IX86_BUILTIN_PSRLW128,
27602 IX86_BUILTIN_PSRLD128,
27603 IX86_BUILTIN_PSRLQ128,
27604
27605 IX86_BUILTIN_PUNPCKHBW128,
27606 IX86_BUILTIN_PUNPCKHWD128,
27607 IX86_BUILTIN_PUNPCKHDQ128,
27608 IX86_BUILTIN_PUNPCKHQDQ128,
27609 IX86_BUILTIN_PUNPCKLBW128,
27610 IX86_BUILTIN_PUNPCKLWD128,
27611 IX86_BUILTIN_PUNPCKLDQ128,
27612 IX86_BUILTIN_PUNPCKLQDQ128,
27613
27614 IX86_BUILTIN_CLFLUSH,
27615 IX86_BUILTIN_MFENCE,
27616 IX86_BUILTIN_LFENCE,
27617 IX86_BUILTIN_PAUSE,
27618
27619 IX86_BUILTIN_FNSTENV,
27620 IX86_BUILTIN_FLDENV,
27621 IX86_BUILTIN_FNSTSW,
27622 IX86_BUILTIN_FNCLEX,
27623
27624 IX86_BUILTIN_BSRSI,
27625 IX86_BUILTIN_BSRDI,
27626 IX86_BUILTIN_RDPMC,
27627 IX86_BUILTIN_RDTSC,
27628 IX86_BUILTIN_RDTSCP,
27629 IX86_BUILTIN_ROLQI,
27630 IX86_BUILTIN_ROLHI,
27631 IX86_BUILTIN_RORQI,
27632 IX86_BUILTIN_RORHI,
27633
27634 /* SSE3. */
27635 IX86_BUILTIN_ADDSUBPS,
27636 IX86_BUILTIN_HADDPS,
27637 IX86_BUILTIN_HSUBPS,
27638 IX86_BUILTIN_MOVSHDUP,
27639 IX86_BUILTIN_MOVSLDUP,
27640 IX86_BUILTIN_ADDSUBPD,
27641 IX86_BUILTIN_HADDPD,
27642 IX86_BUILTIN_HSUBPD,
27643 IX86_BUILTIN_LDDQU,
27644
27645 IX86_BUILTIN_MONITOR,
27646 IX86_BUILTIN_MWAIT,
27647
27648 /* SSSE3. */
27649 IX86_BUILTIN_PHADDW,
27650 IX86_BUILTIN_PHADDD,
27651 IX86_BUILTIN_PHADDSW,
27652 IX86_BUILTIN_PHSUBW,
27653 IX86_BUILTIN_PHSUBD,
27654 IX86_BUILTIN_PHSUBSW,
27655 IX86_BUILTIN_PMADDUBSW,
27656 IX86_BUILTIN_PMULHRSW,
27657 IX86_BUILTIN_PSHUFB,
27658 IX86_BUILTIN_PSIGNB,
27659 IX86_BUILTIN_PSIGNW,
27660 IX86_BUILTIN_PSIGND,
27661 IX86_BUILTIN_PALIGNR,
27662 IX86_BUILTIN_PABSB,
27663 IX86_BUILTIN_PABSW,
27664 IX86_BUILTIN_PABSD,
27665
27666 IX86_BUILTIN_PHADDW128,
27667 IX86_BUILTIN_PHADDD128,
27668 IX86_BUILTIN_PHADDSW128,
27669 IX86_BUILTIN_PHSUBW128,
27670 IX86_BUILTIN_PHSUBD128,
27671 IX86_BUILTIN_PHSUBSW128,
27672 IX86_BUILTIN_PMADDUBSW128,
27673 IX86_BUILTIN_PMULHRSW128,
27674 IX86_BUILTIN_PSHUFB128,
27675 IX86_BUILTIN_PSIGNB128,
27676 IX86_BUILTIN_PSIGNW128,
27677 IX86_BUILTIN_PSIGND128,
27678 IX86_BUILTIN_PALIGNR128,
27679 IX86_BUILTIN_PABSB128,
27680 IX86_BUILTIN_PABSW128,
27681 IX86_BUILTIN_PABSD128,
27682
27683 /* AMDFAM10 - SSE4A New Instructions. */
27684 IX86_BUILTIN_MOVNTSD,
27685 IX86_BUILTIN_MOVNTSS,
27686 IX86_BUILTIN_EXTRQI,
27687 IX86_BUILTIN_EXTRQ,
27688 IX86_BUILTIN_INSERTQI,
27689 IX86_BUILTIN_INSERTQ,
27690
27691 /* SSE4.1. */
27692 IX86_BUILTIN_BLENDPD,
27693 IX86_BUILTIN_BLENDPS,
27694 IX86_BUILTIN_BLENDVPD,
27695 IX86_BUILTIN_BLENDVPS,
27696 IX86_BUILTIN_PBLENDVB128,
27697 IX86_BUILTIN_PBLENDW128,
27698
27699 IX86_BUILTIN_DPPD,
27700 IX86_BUILTIN_DPPS,
27701
27702 IX86_BUILTIN_INSERTPS128,
27703
27704 IX86_BUILTIN_MOVNTDQA,
27705 IX86_BUILTIN_MPSADBW128,
27706 IX86_BUILTIN_PACKUSDW128,
27707 IX86_BUILTIN_PCMPEQQ,
27708 IX86_BUILTIN_PHMINPOSUW128,
27709
27710 IX86_BUILTIN_PMAXSB128,
27711 IX86_BUILTIN_PMAXSD128,
27712 IX86_BUILTIN_PMAXUD128,
27713 IX86_BUILTIN_PMAXUW128,
27714
27715 IX86_BUILTIN_PMINSB128,
27716 IX86_BUILTIN_PMINSD128,
27717 IX86_BUILTIN_PMINUD128,
27718 IX86_BUILTIN_PMINUW128,
27719
27720 IX86_BUILTIN_PMOVSXBW128,
27721 IX86_BUILTIN_PMOVSXBD128,
27722 IX86_BUILTIN_PMOVSXBQ128,
27723 IX86_BUILTIN_PMOVSXWD128,
27724 IX86_BUILTIN_PMOVSXWQ128,
27725 IX86_BUILTIN_PMOVSXDQ128,
27726
27727 IX86_BUILTIN_PMOVZXBW128,
27728 IX86_BUILTIN_PMOVZXBD128,
27729 IX86_BUILTIN_PMOVZXBQ128,
27730 IX86_BUILTIN_PMOVZXWD128,
27731 IX86_BUILTIN_PMOVZXWQ128,
27732 IX86_BUILTIN_PMOVZXDQ128,
27733
27734 IX86_BUILTIN_PMULDQ128,
27735 IX86_BUILTIN_PMULLD128,
27736
27737 IX86_BUILTIN_ROUNDSD,
27738 IX86_BUILTIN_ROUNDSS,
27739
27740 IX86_BUILTIN_ROUNDPD,
27741 IX86_BUILTIN_ROUNDPS,
27742
27743 IX86_BUILTIN_FLOORPD,
27744 IX86_BUILTIN_CEILPD,
27745 IX86_BUILTIN_TRUNCPD,
27746 IX86_BUILTIN_RINTPD,
27747 IX86_BUILTIN_ROUNDPD_AZ,
27748
27749 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27750 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27751 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27752
27753 IX86_BUILTIN_FLOORPS,
27754 IX86_BUILTIN_CEILPS,
27755 IX86_BUILTIN_TRUNCPS,
27756 IX86_BUILTIN_RINTPS,
27757 IX86_BUILTIN_ROUNDPS_AZ,
27758
27759 IX86_BUILTIN_FLOORPS_SFIX,
27760 IX86_BUILTIN_CEILPS_SFIX,
27761 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27762
27763 IX86_BUILTIN_PTESTZ,
27764 IX86_BUILTIN_PTESTC,
27765 IX86_BUILTIN_PTESTNZC,
27766
27767 IX86_BUILTIN_VEC_INIT_V2SI,
27768 IX86_BUILTIN_VEC_INIT_V4HI,
27769 IX86_BUILTIN_VEC_INIT_V8QI,
27770 IX86_BUILTIN_VEC_EXT_V2DF,
27771 IX86_BUILTIN_VEC_EXT_V2DI,
27772 IX86_BUILTIN_VEC_EXT_V4SF,
27773 IX86_BUILTIN_VEC_EXT_V4SI,
27774 IX86_BUILTIN_VEC_EXT_V8HI,
27775 IX86_BUILTIN_VEC_EXT_V2SI,
27776 IX86_BUILTIN_VEC_EXT_V4HI,
27777 IX86_BUILTIN_VEC_EXT_V16QI,
27778 IX86_BUILTIN_VEC_SET_V2DI,
27779 IX86_BUILTIN_VEC_SET_V4SF,
27780 IX86_BUILTIN_VEC_SET_V4SI,
27781 IX86_BUILTIN_VEC_SET_V8HI,
27782 IX86_BUILTIN_VEC_SET_V4HI,
27783 IX86_BUILTIN_VEC_SET_V16QI,
27784
27785 IX86_BUILTIN_VEC_PACK_SFIX,
27786 IX86_BUILTIN_VEC_PACK_SFIX256,
27787
27788 /* SSE4.2. */
27789 IX86_BUILTIN_CRC32QI,
27790 IX86_BUILTIN_CRC32HI,
27791 IX86_BUILTIN_CRC32SI,
27792 IX86_BUILTIN_CRC32DI,
27793
27794 IX86_BUILTIN_PCMPESTRI128,
27795 IX86_BUILTIN_PCMPESTRM128,
27796 IX86_BUILTIN_PCMPESTRA128,
27797 IX86_BUILTIN_PCMPESTRC128,
27798 IX86_BUILTIN_PCMPESTRO128,
27799 IX86_BUILTIN_PCMPESTRS128,
27800 IX86_BUILTIN_PCMPESTRZ128,
27801 IX86_BUILTIN_PCMPISTRI128,
27802 IX86_BUILTIN_PCMPISTRM128,
27803 IX86_BUILTIN_PCMPISTRA128,
27804 IX86_BUILTIN_PCMPISTRC128,
27805 IX86_BUILTIN_PCMPISTRO128,
27806 IX86_BUILTIN_PCMPISTRS128,
27807 IX86_BUILTIN_PCMPISTRZ128,
27808
27809 IX86_BUILTIN_PCMPGTQ,
27810
27811 /* AES instructions */
27812 IX86_BUILTIN_AESENC128,
27813 IX86_BUILTIN_AESENCLAST128,
27814 IX86_BUILTIN_AESDEC128,
27815 IX86_BUILTIN_AESDECLAST128,
27816 IX86_BUILTIN_AESIMC128,
27817 IX86_BUILTIN_AESKEYGENASSIST128,
27818
27819 /* PCLMUL instruction */
27820 IX86_BUILTIN_PCLMULQDQ128,
27821
27822 /* AVX */
27823 IX86_BUILTIN_ADDPD256,
27824 IX86_BUILTIN_ADDPS256,
27825 IX86_BUILTIN_ADDSUBPD256,
27826 IX86_BUILTIN_ADDSUBPS256,
27827 IX86_BUILTIN_ANDPD256,
27828 IX86_BUILTIN_ANDPS256,
27829 IX86_BUILTIN_ANDNPD256,
27830 IX86_BUILTIN_ANDNPS256,
27831 IX86_BUILTIN_BLENDPD256,
27832 IX86_BUILTIN_BLENDPS256,
27833 IX86_BUILTIN_BLENDVPD256,
27834 IX86_BUILTIN_BLENDVPS256,
27835 IX86_BUILTIN_DIVPD256,
27836 IX86_BUILTIN_DIVPS256,
27837 IX86_BUILTIN_DPPS256,
27838 IX86_BUILTIN_HADDPD256,
27839 IX86_BUILTIN_HADDPS256,
27840 IX86_BUILTIN_HSUBPD256,
27841 IX86_BUILTIN_HSUBPS256,
27842 IX86_BUILTIN_MAXPD256,
27843 IX86_BUILTIN_MAXPS256,
27844 IX86_BUILTIN_MINPD256,
27845 IX86_BUILTIN_MINPS256,
27846 IX86_BUILTIN_MULPD256,
27847 IX86_BUILTIN_MULPS256,
27848 IX86_BUILTIN_ORPD256,
27849 IX86_BUILTIN_ORPS256,
27850 IX86_BUILTIN_SHUFPD256,
27851 IX86_BUILTIN_SHUFPS256,
27852 IX86_BUILTIN_SUBPD256,
27853 IX86_BUILTIN_SUBPS256,
27854 IX86_BUILTIN_XORPD256,
27855 IX86_BUILTIN_XORPS256,
27856 IX86_BUILTIN_CMPSD,
27857 IX86_BUILTIN_CMPSS,
27858 IX86_BUILTIN_CMPPD,
27859 IX86_BUILTIN_CMPPS,
27860 IX86_BUILTIN_CMPPD256,
27861 IX86_BUILTIN_CMPPS256,
27862 IX86_BUILTIN_CVTDQ2PD256,
27863 IX86_BUILTIN_CVTDQ2PS256,
27864 IX86_BUILTIN_CVTPD2PS256,
27865 IX86_BUILTIN_CVTPS2DQ256,
27866 IX86_BUILTIN_CVTPS2PD256,
27867 IX86_BUILTIN_CVTTPD2DQ256,
27868 IX86_BUILTIN_CVTPD2DQ256,
27869 IX86_BUILTIN_CVTTPS2DQ256,
27870 IX86_BUILTIN_EXTRACTF128PD256,
27871 IX86_BUILTIN_EXTRACTF128PS256,
27872 IX86_BUILTIN_EXTRACTF128SI256,
27873 IX86_BUILTIN_VZEROALL,
27874 IX86_BUILTIN_VZEROUPPER,
27875 IX86_BUILTIN_VPERMILVARPD,
27876 IX86_BUILTIN_VPERMILVARPS,
27877 IX86_BUILTIN_VPERMILVARPD256,
27878 IX86_BUILTIN_VPERMILVARPS256,
27879 IX86_BUILTIN_VPERMILPD,
27880 IX86_BUILTIN_VPERMILPS,
27881 IX86_BUILTIN_VPERMILPD256,
27882 IX86_BUILTIN_VPERMILPS256,
27883 IX86_BUILTIN_VPERMIL2PD,
27884 IX86_BUILTIN_VPERMIL2PS,
27885 IX86_BUILTIN_VPERMIL2PD256,
27886 IX86_BUILTIN_VPERMIL2PS256,
27887 IX86_BUILTIN_VPERM2F128PD256,
27888 IX86_BUILTIN_VPERM2F128PS256,
27889 IX86_BUILTIN_VPERM2F128SI256,
27890 IX86_BUILTIN_VBROADCASTSS,
27891 IX86_BUILTIN_VBROADCASTSD256,
27892 IX86_BUILTIN_VBROADCASTSS256,
27893 IX86_BUILTIN_VBROADCASTPD256,
27894 IX86_BUILTIN_VBROADCASTPS256,
27895 IX86_BUILTIN_VINSERTF128PD256,
27896 IX86_BUILTIN_VINSERTF128PS256,
27897 IX86_BUILTIN_VINSERTF128SI256,
27898 IX86_BUILTIN_LOADUPD256,
27899 IX86_BUILTIN_LOADUPS256,
27900 IX86_BUILTIN_STOREUPD256,
27901 IX86_BUILTIN_STOREUPS256,
27902 IX86_BUILTIN_LDDQU256,
27903 IX86_BUILTIN_MOVNTDQ256,
27904 IX86_BUILTIN_MOVNTPD256,
27905 IX86_BUILTIN_MOVNTPS256,
27906 IX86_BUILTIN_LOADDQU256,
27907 IX86_BUILTIN_STOREDQU256,
27908 IX86_BUILTIN_MASKLOADPD,
27909 IX86_BUILTIN_MASKLOADPS,
27910 IX86_BUILTIN_MASKSTOREPD,
27911 IX86_BUILTIN_MASKSTOREPS,
27912 IX86_BUILTIN_MASKLOADPD256,
27913 IX86_BUILTIN_MASKLOADPS256,
27914 IX86_BUILTIN_MASKSTOREPD256,
27915 IX86_BUILTIN_MASKSTOREPS256,
27916 IX86_BUILTIN_MOVSHDUP256,
27917 IX86_BUILTIN_MOVSLDUP256,
27918 IX86_BUILTIN_MOVDDUP256,
27919
27920 IX86_BUILTIN_SQRTPD256,
27921 IX86_BUILTIN_SQRTPS256,
27922 IX86_BUILTIN_SQRTPS_NR256,
27923 IX86_BUILTIN_RSQRTPS256,
27924 IX86_BUILTIN_RSQRTPS_NR256,
27925
27926 IX86_BUILTIN_RCPPS256,
27927
27928 IX86_BUILTIN_ROUNDPD256,
27929 IX86_BUILTIN_ROUNDPS256,
27930
27931 IX86_BUILTIN_FLOORPD256,
27932 IX86_BUILTIN_CEILPD256,
27933 IX86_BUILTIN_TRUNCPD256,
27934 IX86_BUILTIN_RINTPD256,
27935 IX86_BUILTIN_ROUNDPD_AZ256,
27936
27937 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27938 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27939 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27940
27941 IX86_BUILTIN_FLOORPS256,
27942 IX86_BUILTIN_CEILPS256,
27943 IX86_BUILTIN_TRUNCPS256,
27944 IX86_BUILTIN_RINTPS256,
27945 IX86_BUILTIN_ROUNDPS_AZ256,
27946
27947 IX86_BUILTIN_FLOORPS_SFIX256,
27948 IX86_BUILTIN_CEILPS_SFIX256,
27949 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27950
27951 IX86_BUILTIN_UNPCKHPD256,
27952 IX86_BUILTIN_UNPCKLPD256,
27953 IX86_BUILTIN_UNPCKHPS256,
27954 IX86_BUILTIN_UNPCKLPS256,
27955
27956 IX86_BUILTIN_SI256_SI,
27957 IX86_BUILTIN_PS256_PS,
27958 IX86_BUILTIN_PD256_PD,
27959 IX86_BUILTIN_SI_SI256,
27960 IX86_BUILTIN_PS_PS256,
27961 IX86_BUILTIN_PD_PD256,
27962
27963 IX86_BUILTIN_VTESTZPD,
27964 IX86_BUILTIN_VTESTCPD,
27965 IX86_BUILTIN_VTESTNZCPD,
27966 IX86_BUILTIN_VTESTZPS,
27967 IX86_BUILTIN_VTESTCPS,
27968 IX86_BUILTIN_VTESTNZCPS,
27969 IX86_BUILTIN_VTESTZPD256,
27970 IX86_BUILTIN_VTESTCPD256,
27971 IX86_BUILTIN_VTESTNZCPD256,
27972 IX86_BUILTIN_VTESTZPS256,
27973 IX86_BUILTIN_VTESTCPS256,
27974 IX86_BUILTIN_VTESTNZCPS256,
27975 IX86_BUILTIN_PTESTZ256,
27976 IX86_BUILTIN_PTESTC256,
27977 IX86_BUILTIN_PTESTNZC256,
27978
27979 IX86_BUILTIN_MOVMSKPD256,
27980 IX86_BUILTIN_MOVMSKPS256,
27981
27982 /* AVX2 */
27983 IX86_BUILTIN_MPSADBW256,
27984 IX86_BUILTIN_PABSB256,
27985 IX86_BUILTIN_PABSW256,
27986 IX86_BUILTIN_PABSD256,
27987 IX86_BUILTIN_PACKSSDW256,
27988 IX86_BUILTIN_PACKSSWB256,
27989 IX86_BUILTIN_PACKUSDW256,
27990 IX86_BUILTIN_PACKUSWB256,
27991 IX86_BUILTIN_PADDB256,
27992 IX86_BUILTIN_PADDW256,
27993 IX86_BUILTIN_PADDD256,
27994 IX86_BUILTIN_PADDQ256,
27995 IX86_BUILTIN_PADDSB256,
27996 IX86_BUILTIN_PADDSW256,
27997 IX86_BUILTIN_PADDUSB256,
27998 IX86_BUILTIN_PADDUSW256,
27999 IX86_BUILTIN_PALIGNR256,
28000 IX86_BUILTIN_AND256I,
28001 IX86_BUILTIN_ANDNOT256I,
28002 IX86_BUILTIN_PAVGB256,
28003 IX86_BUILTIN_PAVGW256,
28004 IX86_BUILTIN_PBLENDVB256,
28005 IX86_BUILTIN_PBLENDVW256,
28006 IX86_BUILTIN_PCMPEQB256,
28007 IX86_BUILTIN_PCMPEQW256,
28008 IX86_BUILTIN_PCMPEQD256,
28009 IX86_BUILTIN_PCMPEQQ256,
28010 IX86_BUILTIN_PCMPGTB256,
28011 IX86_BUILTIN_PCMPGTW256,
28012 IX86_BUILTIN_PCMPGTD256,
28013 IX86_BUILTIN_PCMPGTQ256,
28014 IX86_BUILTIN_PHADDW256,
28015 IX86_BUILTIN_PHADDD256,
28016 IX86_BUILTIN_PHADDSW256,
28017 IX86_BUILTIN_PHSUBW256,
28018 IX86_BUILTIN_PHSUBD256,
28019 IX86_BUILTIN_PHSUBSW256,
28020 IX86_BUILTIN_PMADDUBSW256,
28021 IX86_BUILTIN_PMADDWD256,
28022 IX86_BUILTIN_PMAXSB256,
28023 IX86_BUILTIN_PMAXSW256,
28024 IX86_BUILTIN_PMAXSD256,
28025 IX86_BUILTIN_PMAXUB256,
28026 IX86_BUILTIN_PMAXUW256,
28027 IX86_BUILTIN_PMAXUD256,
28028 IX86_BUILTIN_PMINSB256,
28029 IX86_BUILTIN_PMINSW256,
28030 IX86_BUILTIN_PMINSD256,
28031 IX86_BUILTIN_PMINUB256,
28032 IX86_BUILTIN_PMINUW256,
28033 IX86_BUILTIN_PMINUD256,
28034 IX86_BUILTIN_PMOVMSKB256,
28035 IX86_BUILTIN_PMOVSXBW256,
28036 IX86_BUILTIN_PMOVSXBD256,
28037 IX86_BUILTIN_PMOVSXBQ256,
28038 IX86_BUILTIN_PMOVSXWD256,
28039 IX86_BUILTIN_PMOVSXWQ256,
28040 IX86_BUILTIN_PMOVSXDQ256,
28041 IX86_BUILTIN_PMOVZXBW256,
28042 IX86_BUILTIN_PMOVZXBD256,
28043 IX86_BUILTIN_PMOVZXBQ256,
28044 IX86_BUILTIN_PMOVZXWD256,
28045 IX86_BUILTIN_PMOVZXWQ256,
28046 IX86_BUILTIN_PMOVZXDQ256,
28047 IX86_BUILTIN_PMULDQ256,
28048 IX86_BUILTIN_PMULHRSW256,
28049 IX86_BUILTIN_PMULHUW256,
28050 IX86_BUILTIN_PMULHW256,
28051 IX86_BUILTIN_PMULLW256,
28052 IX86_BUILTIN_PMULLD256,
28053 IX86_BUILTIN_PMULUDQ256,
28054 IX86_BUILTIN_POR256,
28055 IX86_BUILTIN_PSADBW256,
28056 IX86_BUILTIN_PSHUFB256,
28057 IX86_BUILTIN_PSHUFD256,
28058 IX86_BUILTIN_PSHUFHW256,
28059 IX86_BUILTIN_PSHUFLW256,
28060 IX86_BUILTIN_PSIGNB256,
28061 IX86_BUILTIN_PSIGNW256,
28062 IX86_BUILTIN_PSIGND256,
28063 IX86_BUILTIN_PSLLDQI256,
28064 IX86_BUILTIN_PSLLWI256,
28065 IX86_BUILTIN_PSLLW256,
28066 IX86_BUILTIN_PSLLDI256,
28067 IX86_BUILTIN_PSLLD256,
28068 IX86_BUILTIN_PSLLQI256,
28069 IX86_BUILTIN_PSLLQ256,
28070 IX86_BUILTIN_PSRAWI256,
28071 IX86_BUILTIN_PSRAW256,
28072 IX86_BUILTIN_PSRADI256,
28073 IX86_BUILTIN_PSRAD256,
28074 IX86_BUILTIN_PSRLDQI256,
28075 IX86_BUILTIN_PSRLWI256,
28076 IX86_BUILTIN_PSRLW256,
28077 IX86_BUILTIN_PSRLDI256,
28078 IX86_BUILTIN_PSRLD256,
28079 IX86_BUILTIN_PSRLQI256,
28080 IX86_BUILTIN_PSRLQ256,
28081 IX86_BUILTIN_PSUBB256,
28082 IX86_BUILTIN_PSUBW256,
28083 IX86_BUILTIN_PSUBD256,
28084 IX86_BUILTIN_PSUBQ256,
28085 IX86_BUILTIN_PSUBSB256,
28086 IX86_BUILTIN_PSUBSW256,
28087 IX86_BUILTIN_PSUBUSB256,
28088 IX86_BUILTIN_PSUBUSW256,
28089 IX86_BUILTIN_PUNPCKHBW256,
28090 IX86_BUILTIN_PUNPCKHWD256,
28091 IX86_BUILTIN_PUNPCKHDQ256,
28092 IX86_BUILTIN_PUNPCKHQDQ256,
28093 IX86_BUILTIN_PUNPCKLBW256,
28094 IX86_BUILTIN_PUNPCKLWD256,
28095 IX86_BUILTIN_PUNPCKLDQ256,
28096 IX86_BUILTIN_PUNPCKLQDQ256,
28097 IX86_BUILTIN_PXOR256,
28098 IX86_BUILTIN_MOVNTDQA256,
28099 IX86_BUILTIN_VBROADCASTSS_PS,
28100 IX86_BUILTIN_VBROADCASTSS_PS256,
28101 IX86_BUILTIN_VBROADCASTSD_PD256,
28102 IX86_BUILTIN_VBROADCASTSI256,
28103 IX86_BUILTIN_PBLENDD256,
28104 IX86_BUILTIN_PBLENDD128,
28105 IX86_BUILTIN_PBROADCASTB256,
28106 IX86_BUILTIN_PBROADCASTW256,
28107 IX86_BUILTIN_PBROADCASTD256,
28108 IX86_BUILTIN_PBROADCASTQ256,
28109 IX86_BUILTIN_PBROADCASTB128,
28110 IX86_BUILTIN_PBROADCASTW128,
28111 IX86_BUILTIN_PBROADCASTD128,
28112 IX86_BUILTIN_PBROADCASTQ128,
28113 IX86_BUILTIN_VPERMVARSI256,
28114 IX86_BUILTIN_VPERMDF256,
28115 IX86_BUILTIN_VPERMVARSF256,
28116 IX86_BUILTIN_VPERMDI256,
28117 IX86_BUILTIN_VPERMTI256,
28118 IX86_BUILTIN_VEXTRACT128I256,
28119 IX86_BUILTIN_VINSERT128I256,
28120 IX86_BUILTIN_MASKLOADD,
28121 IX86_BUILTIN_MASKLOADQ,
28122 IX86_BUILTIN_MASKLOADD256,
28123 IX86_BUILTIN_MASKLOADQ256,
28124 IX86_BUILTIN_MASKSTORED,
28125 IX86_BUILTIN_MASKSTOREQ,
28126 IX86_BUILTIN_MASKSTORED256,
28127 IX86_BUILTIN_MASKSTOREQ256,
28128 IX86_BUILTIN_PSLLVV4DI,
28129 IX86_BUILTIN_PSLLVV2DI,
28130 IX86_BUILTIN_PSLLVV8SI,
28131 IX86_BUILTIN_PSLLVV4SI,
28132 IX86_BUILTIN_PSRAVV8SI,
28133 IX86_BUILTIN_PSRAVV4SI,
28134 IX86_BUILTIN_PSRLVV4DI,
28135 IX86_BUILTIN_PSRLVV2DI,
28136 IX86_BUILTIN_PSRLVV8SI,
28137 IX86_BUILTIN_PSRLVV4SI,
28138
28139 IX86_BUILTIN_GATHERSIV2DF,
28140 IX86_BUILTIN_GATHERSIV4DF,
28141 IX86_BUILTIN_GATHERDIV2DF,
28142 IX86_BUILTIN_GATHERDIV4DF,
28143 IX86_BUILTIN_GATHERSIV4SF,
28144 IX86_BUILTIN_GATHERSIV8SF,
28145 IX86_BUILTIN_GATHERDIV4SF,
28146 IX86_BUILTIN_GATHERDIV8SF,
28147 IX86_BUILTIN_GATHERSIV2DI,
28148 IX86_BUILTIN_GATHERSIV4DI,
28149 IX86_BUILTIN_GATHERDIV2DI,
28150 IX86_BUILTIN_GATHERDIV4DI,
28151 IX86_BUILTIN_GATHERSIV4SI,
28152 IX86_BUILTIN_GATHERSIV8SI,
28153 IX86_BUILTIN_GATHERDIV4SI,
28154 IX86_BUILTIN_GATHERDIV8SI,
28155
28156 /* AVX512F */
28157 IX86_BUILTIN_SI512_SI256,
28158 IX86_BUILTIN_PD512_PD256,
28159 IX86_BUILTIN_PS512_PS256,
28160 IX86_BUILTIN_SI512_SI,
28161 IX86_BUILTIN_PD512_PD,
28162 IX86_BUILTIN_PS512_PS,
28163 IX86_BUILTIN_ADDPD512,
28164 IX86_BUILTIN_ADDPS512,
28165 IX86_BUILTIN_ADDSD_ROUND,
28166 IX86_BUILTIN_ADDSS_ROUND,
28167 IX86_BUILTIN_ALIGND512,
28168 IX86_BUILTIN_ALIGNQ512,
28169 IX86_BUILTIN_BLENDMD512,
28170 IX86_BUILTIN_BLENDMPD512,
28171 IX86_BUILTIN_BLENDMPS512,
28172 IX86_BUILTIN_BLENDMQ512,
28173 IX86_BUILTIN_BROADCASTF32X4_512,
28174 IX86_BUILTIN_BROADCASTF64X4_512,
28175 IX86_BUILTIN_BROADCASTI32X4_512,
28176 IX86_BUILTIN_BROADCASTI64X4_512,
28177 IX86_BUILTIN_BROADCASTSD512,
28178 IX86_BUILTIN_BROADCASTSS512,
28179 IX86_BUILTIN_CMPD512,
28180 IX86_BUILTIN_CMPPD512,
28181 IX86_BUILTIN_CMPPS512,
28182 IX86_BUILTIN_CMPQ512,
28183 IX86_BUILTIN_CMPSD_MASK,
28184 IX86_BUILTIN_CMPSS_MASK,
28185 IX86_BUILTIN_COMIDF,
28186 IX86_BUILTIN_COMISF,
28187 IX86_BUILTIN_COMPRESSPD512,
28188 IX86_BUILTIN_COMPRESSPDSTORE512,
28189 IX86_BUILTIN_COMPRESSPS512,
28190 IX86_BUILTIN_COMPRESSPSSTORE512,
28191 IX86_BUILTIN_CVTDQ2PD512,
28192 IX86_BUILTIN_CVTDQ2PS512,
28193 IX86_BUILTIN_CVTPD2DQ512,
28194 IX86_BUILTIN_CVTPD2PS512,
28195 IX86_BUILTIN_CVTPD2UDQ512,
28196 IX86_BUILTIN_CVTPH2PS512,
28197 IX86_BUILTIN_CVTPS2DQ512,
28198 IX86_BUILTIN_CVTPS2PD512,
28199 IX86_BUILTIN_CVTPS2PH512,
28200 IX86_BUILTIN_CVTPS2UDQ512,
28201 IX86_BUILTIN_CVTSD2SS_ROUND,
28202 IX86_BUILTIN_CVTSI2SD64,
28203 IX86_BUILTIN_CVTSI2SS32,
28204 IX86_BUILTIN_CVTSI2SS64,
28205 IX86_BUILTIN_CVTSS2SD_ROUND,
28206 IX86_BUILTIN_CVTTPD2DQ512,
28207 IX86_BUILTIN_CVTTPD2UDQ512,
28208 IX86_BUILTIN_CVTTPS2DQ512,
28209 IX86_BUILTIN_CVTTPS2UDQ512,
28210 IX86_BUILTIN_CVTUDQ2PD512,
28211 IX86_BUILTIN_CVTUDQ2PS512,
28212 IX86_BUILTIN_CVTUSI2SD32,
28213 IX86_BUILTIN_CVTUSI2SD64,
28214 IX86_BUILTIN_CVTUSI2SS32,
28215 IX86_BUILTIN_CVTUSI2SS64,
28216 IX86_BUILTIN_DIVPD512,
28217 IX86_BUILTIN_DIVPS512,
28218 IX86_BUILTIN_DIVSD_ROUND,
28219 IX86_BUILTIN_DIVSS_ROUND,
28220 IX86_BUILTIN_EXPANDPD512,
28221 IX86_BUILTIN_EXPANDPD512Z,
28222 IX86_BUILTIN_EXPANDPDLOAD512,
28223 IX86_BUILTIN_EXPANDPDLOAD512Z,
28224 IX86_BUILTIN_EXPANDPS512,
28225 IX86_BUILTIN_EXPANDPS512Z,
28226 IX86_BUILTIN_EXPANDPSLOAD512,
28227 IX86_BUILTIN_EXPANDPSLOAD512Z,
28228 IX86_BUILTIN_EXTRACTF32X4,
28229 IX86_BUILTIN_EXTRACTF64X4,
28230 IX86_BUILTIN_EXTRACTI32X4,
28231 IX86_BUILTIN_EXTRACTI64X4,
28232 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28233 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28234 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28235 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28236 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28237 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28238 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28239 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28240 IX86_BUILTIN_GETEXPPD512,
28241 IX86_BUILTIN_GETEXPPS512,
28242 IX86_BUILTIN_GETEXPSD128,
28243 IX86_BUILTIN_GETEXPSS128,
28244 IX86_BUILTIN_GETMANTPD512,
28245 IX86_BUILTIN_GETMANTPS512,
28246 IX86_BUILTIN_GETMANTSD128,
28247 IX86_BUILTIN_GETMANTSS128,
28248 IX86_BUILTIN_INSERTF32X4,
28249 IX86_BUILTIN_INSERTF64X4,
28250 IX86_BUILTIN_INSERTI32X4,
28251 IX86_BUILTIN_INSERTI64X4,
28252 IX86_BUILTIN_LOADAPD512,
28253 IX86_BUILTIN_LOADAPS512,
28254 IX86_BUILTIN_LOADDQUDI512,
28255 IX86_BUILTIN_LOADDQUSI512,
28256 IX86_BUILTIN_LOADUPD512,
28257 IX86_BUILTIN_LOADUPS512,
28258 IX86_BUILTIN_MAXPD512,
28259 IX86_BUILTIN_MAXPS512,
28260 IX86_BUILTIN_MAXSD_ROUND,
28261 IX86_BUILTIN_MAXSS_ROUND,
28262 IX86_BUILTIN_MINPD512,
28263 IX86_BUILTIN_MINPS512,
28264 IX86_BUILTIN_MINSD_ROUND,
28265 IX86_BUILTIN_MINSS_ROUND,
28266 IX86_BUILTIN_MOVAPD512,
28267 IX86_BUILTIN_MOVAPS512,
28268 IX86_BUILTIN_MOVDDUP512,
28269 IX86_BUILTIN_MOVDQA32LOAD512,
28270 IX86_BUILTIN_MOVDQA32STORE512,
28271 IX86_BUILTIN_MOVDQA32_512,
28272 IX86_BUILTIN_MOVDQA64LOAD512,
28273 IX86_BUILTIN_MOVDQA64STORE512,
28274 IX86_BUILTIN_MOVDQA64_512,
28275 IX86_BUILTIN_MOVNTDQ512,
28276 IX86_BUILTIN_MOVNTDQA512,
28277 IX86_BUILTIN_MOVNTPD512,
28278 IX86_BUILTIN_MOVNTPS512,
28279 IX86_BUILTIN_MOVSHDUP512,
28280 IX86_BUILTIN_MOVSLDUP512,
28281 IX86_BUILTIN_MULPD512,
28282 IX86_BUILTIN_MULPS512,
28283 IX86_BUILTIN_MULSD_ROUND,
28284 IX86_BUILTIN_MULSS_ROUND,
28285 IX86_BUILTIN_PABSD512,
28286 IX86_BUILTIN_PABSQ512,
28287 IX86_BUILTIN_PADDD512,
28288 IX86_BUILTIN_PADDQ512,
28289 IX86_BUILTIN_PANDD512,
28290 IX86_BUILTIN_PANDND512,
28291 IX86_BUILTIN_PANDNQ512,
28292 IX86_BUILTIN_PANDQ512,
28293 IX86_BUILTIN_PBROADCASTD512,
28294 IX86_BUILTIN_PBROADCASTD512_GPR,
28295 IX86_BUILTIN_PBROADCASTMB512,
28296 IX86_BUILTIN_PBROADCASTMW512,
28297 IX86_BUILTIN_PBROADCASTQ512,
28298 IX86_BUILTIN_PBROADCASTQ512_GPR,
28299 IX86_BUILTIN_PBROADCASTQ512_MEM,
28300 IX86_BUILTIN_PCMPEQD512_MASK,
28301 IX86_BUILTIN_PCMPEQQ512_MASK,
28302 IX86_BUILTIN_PCMPGTD512_MASK,
28303 IX86_BUILTIN_PCMPGTQ512_MASK,
28304 IX86_BUILTIN_PCOMPRESSD512,
28305 IX86_BUILTIN_PCOMPRESSDSTORE512,
28306 IX86_BUILTIN_PCOMPRESSQ512,
28307 IX86_BUILTIN_PCOMPRESSQSTORE512,
28308 IX86_BUILTIN_PEXPANDD512,
28309 IX86_BUILTIN_PEXPANDD512Z,
28310 IX86_BUILTIN_PEXPANDDLOAD512,
28311 IX86_BUILTIN_PEXPANDDLOAD512Z,
28312 IX86_BUILTIN_PEXPANDQ512,
28313 IX86_BUILTIN_PEXPANDQ512Z,
28314 IX86_BUILTIN_PEXPANDQLOAD512,
28315 IX86_BUILTIN_PEXPANDQLOAD512Z,
28316 IX86_BUILTIN_PMAXSD512,
28317 IX86_BUILTIN_PMAXSQ512,
28318 IX86_BUILTIN_PMAXUD512,
28319 IX86_BUILTIN_PMAXUQ512,
28320 IX86_BUILTIN_PMINSD512,
28321 IX86_BUILTIN_PMINSQ512,
28322 IX86_BUILTIN_PMINUD512,
28323 IX86_BUILTIN_PMINUQ512,
28324 IX86_BUILTIN_PMOVDB512,
28325 IX86_BUILTIN_PMOVDB512_MEM,
28326 IX86_BUILTIN_PMOVDW512,
28327 IX86_BUILTIN_PMOVDW512_MEM,
28328 IX86_BUILTIN_PMOVQB512,
28329 IX86_BUILTIN_PMOVQB512_MEM,
28330 IX86_BUILTIN_PMOVQD512,
28331 IX86_BUILTIN_PMOVQD512_MEM,
28332 IX86_BUILTIN_PMOVQW512,
28333 IX86_BUILTIN_PMOVQW512_MEM,
28334 IX86_BUILTIN_PMOVSDB512,
28335 IX86_BUILTIN_PMOVSDB512_MEM,
28336 IX86_BUILTIN_PMOVSDW512,
28337 IX86_BUILTIN_PMOVSDW512_MEM,
28338 IX86_BUILTIN_PMOVSQB512,
28339 IX86_BUILTIN_PMOVSQB512_MEM,
28340 IX86_BUILTIN_PMOVSQD512,
28341 IX86_BUILTIN_PMOVSQD512_MEM,
28342 IX86_BUILTIN_PMOVSQW512,
28343 IX86_BUILTIN_PMOVSQW512_MEM,
28344 IX86_BUILTIN_PMOVSXBD512,
28345 IX86_BUILTIN_PMOVSXBQ512,
28346 IX86_BUILTIN_PMOVSXDQ512,
28347 IX86_BUILTIN_PMOVSXWD512,
28348 IX86_BUILTIN_PMOVSXWQ512,
28349 IX86_BUILTIN_PMOVUSDB512,
28350 IX86_BUILTIN_PMOVUSDB512_MEM,
28351 IX86_BUILTIN_PMOVUSDW512,
28352 IX86_BUILTIN_PMOVUSDW512_MEM,
28353 IX86_BUILTIN_PMOVUSQB512,
28354 IX86_BUILTIN_PMOVUSQB512_MEM,
28355 IX86_BUILTIN_PMOVUSQD512,
28356 IX86_BUILTIN_PMOVUSQD512_MEM,
28357 IX86_BUILTIN_PMOVUSQW512,
28358 IX86_BUILTIN_PMOVUSQW512_MEM,
28359 IX86_BUILTIN_PMOVZXBD512,
28360 IX86_BUILTIN_PMOVZXBQ512,
28361 IX86_BUILTIN_PMOVZXDQ512,
28362 IX86_BUILTIN_PMOVZXWD512,
28363 IX86_BUILTIN_PMOVZXWQ512,
28364 IX86_BUILTIN_PMULDQ512,
28365 IX86_BUILTIN_PMULLD512,
28366 IX86_BUILTIN_PMULUDQ512,
28367 IX86_BUILTIN_PORD512,
28368 IX86_BUILTIN_PORQ512,
28369 IX86_BUILTIN_PROLD512,
28370 IX86_BUILTIN_PROLQ512,
28371 IX86_BUILTIN_PROLVD512,
28372 IX86_BUILTIN_PROLVQ512,
28373 IX86_BUILTIN_PRORD512,
28374 IX86_BUILTIN_PRORQ512,
28375 IX86_BUILTIN_PRORVD512,
28376 IX86_BUILTIN_PRORVQ512,
28377 IX86_BUILTIN_PSHUFD512,
28378 IX86_BUILTIN_PSLLD512,
28379 IX86_BUILTIN_PSLLDI512,
28380 IX86_BUILTIN_PSLLQ512,
28381 IX86_BUILTIN_PSLLQI512,
28382 IX86_BUILTIN_PSLLVV16SI,
28383 IX86_BUILTIN_PSLLVV8DI,
28384 IX86_BUILTIN_PSRAD512,
28385 IX86_BUILTIN_PSRADI512,
28386 IX86_BUILTIN_PSRAQ512,
28387 IX86_BUILTIN_PSRAQI512,
28388 IX86_BUILTIN_PSRAVV16SI,
28389 IX86_BUILTIN_PSRAVV8DI,
28390 IX86_BUILTIN_PSRLD512,
28391 IX86_BUILTIN_PSRLDI512,
28392 IX86_BUILTIN_PSRLQ512,
28393 IX86_BUILTIN_PSRLQI512,
28394 IX86_BUILTIN_PSRLVV16SI,
28395 IX86_BUILTIN_PSRLVV8DI,
28396 IX86_BUILTIN_PSUBD512,
28397 IX86_BUILTIN_PSUBQ512,
28398 IX86_BUILTIN_PTESTMD512,
28399 IX86_BUILTIN_PTESTMQ512,
28400 IX86_BUILTIN_PTESTNMD512,
28401 IX86_BUILTIN_PTESTNMQ512,
28402 IX86_BUILTIN_PUNPCKHDQ512,
28403 IX86_BUILTIN_PUNPCKHQDQ512,
28404 IX86_BUILTIN_PUNPCKLDQ512,
28405 IX86_BUILTIN_PUNPCKLQDQ512,
28406 IX86_BUILTIN_PXORD512,
28407 IX86_BUILTIN_PXORQ512,
28408 IX86_BUILTIN_RCP14PD512,
28409 IX86_BUILTIN_RCP14PS512,
28410 IX86_BUILTIN_RCP14SD,
28411 IX86_BUILTIN_RCP14SS,
28412 IX86_BUILTIN_RNDSCALEPD,
28413 IX86_BUILTIN_RNDSCALEPS,
28414 IX86_BUILTIN_RNDSCALESD,
28415 IX86_BUILTIN_RNDSCALESS,
28416 IX86_BUILTIN_RSQRT14PD512,
28417 IX86_BUILTIN_RSQRT14PS512,
28418 IX86_BUILTIN_RSQRT14SD,
28419 IX86_BUILTIN_RSQRT14SS,
28420 IX86_BUILTIN_SCALEFPD512,
28421 IX86_BUILTIN_SCALEFPS512,
28422 IX86_BUILTIN_SCALEFSD,
28423 IX86_BUILTIN_SCALEFSS,
28424 IX86_BUILTIN_SHUFPD512,
28425 IX86_BUILTIN_SHUFPS512,
28426 IX86_BUILTIN_SHUF_F32x4,
28427 IX86_BUILTIN_SHUF_F64x2,
28428 IX86_BUILTIN_SHUF_I32x4,
28429 IX86_BUILTIN_SHUF_I64x2,
28430 IX86_BUILTIN_SQRTPD512,
28431 IX86_BUILTIN_SQRTPD512_MASK,
28432 IX86_BUILTIN_SQRTPS512_MASK,
28433 IX86_BUILTIN_SQRTPS_NR512,
28434 IX86_BUILTIN_SQRTSD_ROUND,
28435 IX86_BUILTIN_SQRTSS_ROUND,
28436 IX86_BUILTIN_STOREAPD512,
28437 IX86_BUILTIN_STOREAPS512,
28438 IX86_BUILTIN_STOREDQUDI512,
28439 IX86_BUILTIN_STOREDQUSI512,
28440 IX86_BUILTIN_STOREUPD512,
28441 IX86_BUILTIN_STOREUPS512,
28442 IX86_BUILTIN_SUBPD512,
28443 IX86_BUILTIN_SUBPS512,
28444 IX86_BUILTIN_SUBSD_ROUND,
28445 IX86_BUILTIN_SUBSS_ROUND,
28446 IX86_BUILTIN_UCMPD512,
28447 IX86_BUILTIN_UCMPQ512,
28448 IX86_BUILTIN_UNPCKHPD512,
28449 IX86_BUILTIN_UNPCKHPS512,
28450 IX86_BUILTIN_UNPCKLPD512,
28451 IX86_BUILTIN_UNPCKLPS512,
28452 IX86_BUILTIN_VCVTSD2SI32,
28453 IX86_BUILTIN_VCVTSD2SI64,
28454 IX86_BUILTIN_VCVTSD2USI32,
28455 IX86_BUILTIN_VCVTSD2USI64,
28456 IX86_BUILTIN_VCVTSS2SI32,
28457 IX86_BUILTIN_VCVTSS2SI64,
28458 IX86_BUILTIN_VCVTSS2USI32,
28459 IX86_BUILTIN_VCVTSS2USI64,
28460 IX86_BUILTIN_VCVTTSD2SI32,
28461 IX86_BUILTIN_VCVTTSD2SI64,
28462 IX86_BUILTIN_VCVTTSD2USI32,
28463 IX86_BUILTIN_VCVTTSD2USI64,
28464 IX86_BUILTIN_VCVTTSS2SI32,
28465 IX86_BUILTIN_VCVTTSS2SI64,
28466 IX86_BUILTIN_VCVTTSS2USI32,
28467 IX86_BUILTIN_VCVTTSS2USI64,
28468 IX86_BUILTIN_VFMADDPD512_MASK,
28469 IX86_BUILTIN_VFMADDPD512_MASK3,
28470 IX86_BUILTIN_VFMADDPD512_MASKZ,
28471 IX86_BUILTIN_VFMADDPS512_MASK,
28472 IX86_BUILTIN_VFMADDPS512_MASK3,
28473 IX86_BUILTIN_VFMADDPS512_MASKZ,
28474 IX86_BUILTIN_VFMADDSD3_ROUND,
28475 IX86_BUILTIN_VFMADDSS3_ROUND,
28476 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28477 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28478 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28479 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28480 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28481 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28482 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28483 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28484 IX86_BUILTIN_VFMSUBPD512_MASK3,
28485 IX86_BUILTIN_VFMSUBPS512_MASK3,
28486 IX86_BUILTIN_VFMSUBSD3_MASK3,
28487 IX86_BUILTIN_VFMSUBSS3_MASK3,
28488 IX86_BUILTIN_VFNMADDPD512_MASK,
28489 IX86_BUILTIN_VFNMADDPS512_MASK,
28490 IX86_BUILTIN_VFNMSUBPD512_MASK,
28491 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28492 IX86_BUILTIN_VFNMSUBPS512_MASK,
28493 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28494 IX86_BUILTIN_VPCLZCNTD512,
28495 IX86_BUILTIN_VPCLZCNTQ512,
28496 IX86_BUILTIN_VPCONFLICTD512,
28497 IX86_BUILTIN_VPCONFLICTQ512,
28498 IX86_BUILTIN_VPERMDF512,
28499 IX86_BUILTIN_VPERMDI512,
28500 IX86_BUILTIN_VPERMI2VARD512,
28501 IX86_BUILTIN_VPERMI2VARPD512,
28502 IX86_BUILTIN_VPERMI2VARPS512,
28503 IX86_BUILTIN_VPERMI2VARQ512,
28504 IX86_BUILTIN_VPERMILPD512,
28505 IX86_BUILTIN_VPERMILPS512,
28506 IX86_BUILTIN_VPERMILVARPD512,
28507 IX86_BUILTIN_VPERMILVARPS512,
28508 IX86_BUILTIN_VPERMT2VARD512,
28509 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28510 IX86_BUILTIN_VPERMT2VARPD512,
28511 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28512 IX86_BUILTIN_VPERMT2VARPS512,
28513 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28514 IX86_BUILTIN_VPERMT2VARQ512,
28515 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28516 IX86_BUILTIN_VPERMVARDF512,
28517 IX86_BUILTIN_VPERMVARDI512,
28518 IX86_BUILTIN_VPERMVARSF512,
28519 IX86_BUILTIN_VPERMVARSI512,
28520 IX86_BUILTIN_VTERNLOGD512_MASK,
28521 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28522 IX86_BUILTIN_VTERNLOGQ512_MASK,
28523 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28524
28525 /* Mask arithmetic operations */
28526 IX86_BUILTIN_KAND16,
28527 IX86_BUILTIN_KANDN16,
28528 IX86_BUILTIN_KNOT16,
28529 IX86_BUILTIN_KOR16,
28530 IX86_BUILTIN_KORTESTC16,
28531 IX86_BUILTIN_KORTESTZ16,
28532 IX86_BUILTIN_KUNPCKBW,
28533 IX86_BUILTIN_KXNOR16,
28534 IX86_BUILTIN_KXOR16,
28535 IX86_BUILTIN_KMOV16,
28536
28537 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28538 where all operands are 32-byte or 64-byte wide respectively. */
28539 IX86_BUILTIN_GATHERALTSIV4DF,
28540 IX86_BUILTIN_GATHERALTDIV8SF,
28541 IX86_BUILTIN_GATHERALTSIV4DI,
28542 IX86_BUILTIN_GATHERALTDIV8SI,
28543 IX86_BUILTIN_GATHER3ALTDIV16SF,
28544 IX86_BUILTIN_GATHER3ALTDIV16SI,
28545 IX86_BUILTIN_GATHER3ALTSIV8DF,
28546 IX86_BUILTIN_GATHER3ALTSIV8DI,
28547 IX86_BUILTIN_GATHER3DIV16SF,
28548 IX86_BUILTIN_GATHER3DIV16SI,
28549 IX86_BUILTIN_GATHER3DIV8DF,
28550 IX86_BUILTIN_GATHER3DIV8DI,
28551 IX86_BUILTIN_GATHER3SIV16SF,
28552 IX86_BUILTIN_GATHER3SIV16SI,
28553 IX86_BUILTIN_GATHER3SIV8DF,
28554 IX86_BUILTIN_GATHER3SIV8DI,
28555 IX86_BUILTIN_SCATTERDIV16SF,
28556 IX86_BUILTIN_SCATTERDIV16SI,
28557 IX86_BUILTIN_SCATTERDIV8DF,
28558 IX86_BUILTIN_SCATTERDIV8DI,
28559 IX86_BUILTIN_SCATTERSIV16SF,
28560 IX86_BUILTIN_SCATTERSIV16SI,
28561 IX86_BUILTIN_SCATTERSIV8DF,
28562 IX86_BUILTIN_SCATTERSIV8DI,
28563
28564 /* AVX512PF */
28565 IX86_BUILTIN_GATHERPFQPD,
28566 IX86_BUILTIN_GATHERPFDPS,
28567 IX86_BUILTIN_GATHERPFDPD,
28568 IX86_BUILTIN_GATHERPFQPS,
28569 IX86_BUILTIN_SCATTERPFDPD,
28570 IX86_BUILTIN_SCATTERPFDPS,
28571 IX86_BUILTIN_SCATTERPFQPD,
28572 IX86_BUILTIN_SCATTERPFQPS,
28573
28574 /* AVX-512ER */
28575 IX86_BUILTIN_EXP2PD_MASK,
28576 IX86_BUILTIN_EXP2PS_MASK,
28577 IX86_BUILTIN_EXP2PS,
28578 IX86_BUILTIN_RCP28PD,
28579 IX86_BUILTIN_RCP28PS,
28580 IX86_BUILTIN_RCP28SD,
28581 IX86_BUILTIN_RCP28SS,
28582 IX86_BUILTIN_RSQRT28PD,
28583 IX86_BUILTIN_RSQRT28PS,
28584 IX86_BUILTIN_RSQRT28SD,
28585 IX86_BUILTIN_RSQRT28SS,
28586
28587 /* SHA builtins. */
28588 IX86_BUILTIN_SHA1MSG1,
28589 IX86_BUILTIN_SHA1MSG2,
28590 IX86_BUILTIN_SHA1NEXTE,
28591 IX86_BUILTIN_SHA1RNDS4,
28592 IX86_BUILTIN_SHA256MSG1,
28593 IX86_BUILTIN_SHA256MSG2,
28594 IX86_BUILTIN_SHA256RNDS2,
28595
28596 /* CLFLUSHOPT instructions. */
28597 IX86_BUILTIN_CLFLUSHOPT,
28598
28599 /* TFmode support builtins. */
28600 IX86_BUILTIN_INFQ,
28601 IX86_BUILTIN_HUGE_VALQ,
28602 IX86_BUILTIN_FABSQ,
28603 IX86_BUILTIN_COPYSIGNQ,
28604
28605 /* Vectorizer support builtins. */
28606 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28607 IX86_BUILTIN_CPYSGNPS,
28608 IX86_BUILTIN_CPYSGNPD,
28609 IX86_BUILTIN_CPYSGNPS256,
28610 IX86_BUILTIN_CPYSGNPS512,
28611 IX86_BUILTIN_CPYSGNPD256,
28612 IX86_BUILTIN_CPYSGNPD512,
28613 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28614 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28615
28616
28617 /* FMA4 instructions. */
28618 IX86_BUILTIN_VFMADDSS,
28619 IX86_BUILTIN_VFMADDSD,
28620 IX86_BUILTIN_VFMADDPS,
28621 IX86_BUILTIN_VFMADDPD,
28622 IX86_BUILTIN_VFMADDPS256,
28623 IX86_BUILTIN_VFMADDPD256,
28624 IX86_BUILTIN_VFMADDSUBPS,
28625 IX86_BUILTIN_VFMADDSUBPD,
28626 IX86_BUILTIN_VFMADDSUBPS256,
28627 IX86_BUILTIN_VFMADDSUBPD256,
28628
28629 /* FMA3 instructions. */
28630 IX86_BUILTIN_VFMADDSS3,
28631 IX86_BUILTIN_VFMADDSD3,
28632
28633 /* XOP instructions. */
28634 IX86_BUILTIN_VPCMOV,
28635 IX86_BUILTIN_VPCMOV_V2DI,
28636 IX86_BUILTIN_VPCMOV_V4SI,
28637 IX86_BUILTIN_VPCMOV_V8HI,
28638 IX86_BUILTIN_VPCMOV_V16QI,
28639 IX86_BUILTIN_VPCMOV_V4SF,
28640 IX86_BUILTIN_VPCMOV_V2DF,
28641 IX86_BUILTIN_VPCMOV256,
28642 IX86_BUILTIN_VPCMOV_V4DI256,
28643 IX86_BUILTIN_VPCMOV_V8SI256,
28644 IX86_BUILTIN_VPCMOV_V16HI256,
28645 IX86_BUILTIN_VPCMOV_V32QI256,
28646 IX86_BUILTIN_VPCMOV_V8SF256,
28647 IX86_BUILTIN_VPCMOV_V4DF256,
28648
28649 IX86_BUILTIN_VPPERM,
28650
28651 IX86_BUILTIN_VPMACSSWW,
28652 IX86_BUILTIN_VPMACSWW,
28653 IX86_BUILTIN_VPMACSSWD,
28654 IX86_BUILTIN_VPMACSWD,
28655 IX86_BUILTIN_VPMACSSDD,
28656 IX86_BUILTIN_VPMACSDD,
28657 IX86_BUILTIN_VPMACSSDQL,
28658 IX86_BUILTIN_VPMACSSDQH,
28659 IX86_BUILTIN_VPMACSDQL,
28660 IX86_BUILTIN_VPMACSDQH,
28661 IX86_BUILTIN_VPMADCSSWD,
28662 IX86_BUILTIN_VPMADCSWD,
28663
28664 IX86_BUILTIN_VPHADDBW,
28665 IX86_BUILTIN_VPHADDBD,
28666 IX86_BUILTIN_VPHADDBQ,
28667 IX86_BUILTIN_VPHADDWD,
28668 IX86_BUILTIN_VPHADDWQ,
28669 IX86_BUILTIN_VPHADDDQ,
28670 IX86_BUILTIN_VPHADDUBW,
28671 IX86_BUILTIN_VPHADDUBD,
28672 IX86_BUILTIN_VPHADDUBQ,
28673 IX86_BUILTIN_VPHADDUWD,
28674 IX86_BUILTIN_VPHADDUWQ,
28675 IX86_BUILTIN_VPHADDUDQ,
28676 IX86_BUILTIN_VPHSUBBW,
28677 IX86_BUILTIN_VPHSUBWD,
28678 IX86_BUILTIN_VPHSUBDQ,
28679
28680 IX86_BUILTIN_VPROTB,
28681 IX86_BUILTIN_VPROTW,
28682 IX86_BUILTIN_VPROTD,
28683 IX86_BUILTIN_VPROTQ,
28684 IX86_BUILTIN_VPROTB_IMM,
28685 IX86_BUILTIN_VPROTW_IMM,
28686 IX86_BUILTIN_VPROTD_IMM,
28687 IX86_BUILTIN_VPROTQ_IMM,
28688
28689 IX86_BUILTIN_VPSHLB,
28690 IX86_BUILTIN_VPSHLW,
28691 IX86_BUILTIN_VPSHLD,
28692 IX86_BUILTIN_VPSHLQ,
28693 IX86_BUILTIN_VPSHAB,
28694 IX86_BUILTIN_VPSHAW,
28695 IX86_BUILTIN_VPSHAD,
28696 IX86_BUILTIN_VPSHAQ,
28697
28698 IX86_BUILTIN_VFRCZSS,
28699 IX86_BUILTIN_VFRCZSD,
28700 IX86_BUILTIN_VFRCZPS,
28701 IX86_BUILTIN_VFRCZPD,
28702 IX86_BUILTIN_VFRCZPS256,
28703 IX86_BUILTIN_VFRCZPD256,
28704
28705 IX86_BUILTIN_VPCOMEQUB,
28706 IX86_BUILTIN_VPCOMNEUB,
28707 IX86_BUILTIN_VPCOMLTUB,
28708 IX86_BUILTIN_VPCOMLEUB,
28709 IX86_BUILTIN_VPCOMGTUB,
28710 IX86_BUILTIN_VPCOMGEUB,
28711 IX86_BUILTIN_VPCOMFALSEUB,
28712 IX86_BUILTIN_VPCOMTRUEUB,
28713
28714 IX86_BUILTIN_VPCOMEQUW,
28715 IX86_BUILTIN_VPCOMNEUW,
28716 IX86_BUILTIN_VPCOMLTUW,
28717 IX86_BUILTIN_VPCOMLEUW,
28718 IX86_BUILTIN_VPCOMGTUW,
28719 IX86_BUILTIN_VPCOMGEUW,
28720 IX86_BUILTIN_VPCOMFALSEUW,
28721 IX86_BUILTIN_VPCOMTRUEUW,
28722
28723 IX86_BUILTIN_VPCOMEQUD,
28724 IX86_BUILTIN_VPCOMNEUD,
28725 IX86_BUILTIN_VPCOMLTUD,
28726 IX86_BUILTIN_VPCOMLEUD,
28727 IX86_BUILTIN_VPCOMGTUD,
28728 IX86_BUILTIN_VPCOMGEUD,
28729 IX86_BUILTIN_VPCOMFALSEUD,
28730 IX86_BUILTIN_VPCOMTRUEUD,
28731
28732 IX86_BUILTIN_VPCOMEQUQ,
28733 IX86_BUILTIN_VPCOMNEUQ,
28734 IX86_BUILTIN_VPCOMLTUQ,
28735 IX86_BUILTIN_VPCOMLEUQ,
28736 IX86_BUILTIN_VPCOMGTUQ,
28737 IX86_BUILTIN_VPCOMGEUQ,
28738 IX86_BUILTIN_VPCOMFALSEUQ,
28739 IX86_BUILTIN_VPCOMTRUEUQ,
28740
28741 IX86_BUILTIN_VPCOMEQB,
28742 IX86_BUILTIN_VPCOMNEB,
28743 IX86_BUILTIN_VPCOMLTB,
28744 IX86_BUILTIN_VPCOMLEB,
28745 IX86_BUILTIN_VPCOMGTB,
28746 IX86_BUILTIN_VPCOMGEB,
28747 IX86_BUILTIN_VPCOMFALSEB,
28748 IX86_BUILTIN_VPCOMTRUEB,
28749
28750 IX86_BUILTIN_VPCOMEQW,
28751 IX86_BUILTIN_VPCOMNEW,
28752 IX86_BUILTIN_VPCOMLTW,
28753 IX86_BUILTIN_VPCOMLEW,
28754 IX86_BUILTIN_VPCOMGTW,
28755 IX86_BUILTIN_VPCOMGEW,
28756 IX86_BUILTIN_VPCOMFALSEW,
28757 IX86_BUILTIN_VPCOMTRUEW,
28758
28759 IX86_BUILTIN_VPCOMEQD,
28760 IX86_BUILTIN_VPCOMNED,
28761 IX86_BUILTIN_VPCOMLTD,
28762 IX86_BUILTIN_VPCOMLED,
28763 IX86_BUILTIN_VPCOMGTD,
28764 IX86_BUILTIN_VPCOMGED,
28765 IX86_BUILTIN_VPCOMFALSED,
28766 IX86_BUILTIN_VPCOMTRUED,
28767
28768 IX86_BUILTIN_VPCOMEQQ,
28769 IX86_BUILTIN_VPCOMNEQ,
28770 IX86_BUILTIN_VPCOMLTQ,
28771 IX86_BUILTIN_VPCOMLEQ,
28772 IX86_BUILTIN_VPCOMGTQ,
28773 IX86_BUILTIN_VPCOMGEQ,
28774 IX86_BUILTIN_VPCOMFALSEQ,
28775 IX86_BUILTIN_VPCOMTRUEQ,
28776
28777 /* LWP instructions. */
28778 IX86_BUILTIN_LLWPCB,
28779 IX86_BUILTIN_SLWPCB,
28780 IX86_BUILTIN_LWPVAL32,
28781 IX86_BUILTIN_LWPVAL64,
28782 IX86_BUILTIN_LWPINS32,
28783 IX86_BUILTIN_LWPINS64,
28784
28785 IX86_BUILTIN_CLZS,
28786
28787 /* RTM */
28788 IX86_BUILTIN_XBEGIN,
28789 IX86_BUILTIN_XEND,
28790 IX86_BUILTIN_XABORT,
28791 IX86_BUILTIN_XTEST,
28792
28793 /* BMI instructions. */
28794 IX86_BUILTIN_BEXTR32,
28795 IX86_BUILTIN_BEXTR64,
28796 IX86_BUILTIN_CTZS,
28797
28798 /* TBM instructions. */
28799 IX86_BUILTIN_BEXTRI32,
28800 IX86_BUILTIN_BEXTRI64,
28801
28802 /* BMI2 instructions. */
28803 IX86_BUILTIN_BZHI32,
28804 IX86_BUILTIN_BZHI64,
28805 IX86_BUILTIN_PDEP32,
28806 IX86_BUILTIN_PDEP64,
28807 IX86_BUILTIN_PEXT32,
28808 IX86_BUILTIN_PEXT64,
28809
28810 /* ADX instructions. */
28811 IX86_BUILTIN_ADDCARRYX32,
28812 IX86_BUILTIN_ADDCARRYX64,
28813
28814 /* SBB instructions. */
28815 IX86_BUILTIN_SBB32,
28816 IX86_BUILTIN_SBB64,
28817
28818 /* FSGSBASE instructions. */
28819 IX86_BUILTIN_RDFSBASE32,
28820 IX86_BUILTIN_RDFSBASE64,
28821 IX86_BUILTIN_RDGSBASE32,
28822 IX86_BUILTIN_RDGSBASE64,
28823 IX86_BUILTIN_WRFSBASE32,
28824 IX86_BUILTIN_WRFSBASE64,
28825 IX86_BUILTIN_WRGSBASE32,
28826 IX86_BUILTIN_WRGSBASE64,
28827
28828 /* RDRND instructions. */
28829 IX86_BUILTIN_RDRAND16_STEP,
28830 IX86_BUILTIN_RDRAND32_STEP,
28831 IX86_BUILTIN_RDRAND64_STEP,
28832
28833 /* RDSEED instructions. */
28834 IX86_BUILTIN_RDSEED16_STEP,
28835 IX86_BUILTIN_RDSEED32_STEP,
28836 IX86_BUILTIN_RDSEED64_STEP,
28837
28838 /* F16C instructions. */
28839 IX86_BUILTIN_CVTPH2PS,
28840 IX86_BUILTIN_CVTPH2PS256,
28841 IX86_BUILTIN_CVTPS2PH,
28842 IX86_BUILTIN_CVTPS2PH256,
28843
28844 /* CFString built-in for darwin */
28845 IX86_BUILTIN_CFSTRING,
28846
28847 /* Builtins to get CPU type and supported features. */
28848 IX86_BUILTIN_CPU_INIT,
28849 IX86_BUILTIN_CPU_IS,
28850 IX86_BUILTIN_CPU_SUPPORTS,
28851
28852 /* Read/write FLAGS register built-ins. */
28853 IX86_BUILTIN_READ_FLAGS,
28854 IX86_BUILTIN_WRITE_FLAGS,
28855
28856 IX86_BUILTIN_MAX
28857 };
28858
28859 /* Table for the ix86 builtin decls. */
28860 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28861
28862 /* Table of all of the builtin functions that are possible with different ISA's
28863 but are waiting to be built until a function is declared to use that
28864 ISA. */
28865 struct builtin_isa {
28866 const char *name; /* function name */
28867 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28868 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28869 bool const_p; /* true if the declaration is constant */
28870 bool set_and_not_built_p;
28871 };
28872
28873 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28874
28875
28876 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28877 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28878 function decl in the ix86_builtins array. Returns the function decl or
28879 NULL_TREE, if the builtin was not added.
28880
28881 If the front end has a special hook for builtin functions, delay adding
28882 builtin functions that aren't in the current ISA until the ISA is changed
28883 with function specific optimization. Doing so, can save about 300K for the
28884 default compiler. When the builtin is expanded, check at that time whether
28885 it is valid.
28886
28887 If the front end doesn't have a special hook, record all builtins, even if
28888 it isn't an instruction set in the current ISA in case the user uses
28889 function specific options for a different ISA, so that we don't get scope
28890 errors if a builtin is added in the middle of a function scope. */
28891
28892 static inline tree
28893 def_builtin (HOST_WIDE_INT mask, const char *name,
28894 enum ix86_builtin_func_type tcode,
28895 enum ix86_builtins code)
28896 {
28897 tree decl = NULL_TREE;
28898
28899 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28900 {
28901 ix86_builtins_isa[(int) code].isa = mask;
28902
28903 mask &= ~OPTION_MASK_ISA_64BIT;
28904 if (mask == 0
28905 || (mask & ix86_isa_flags) != 0
28906 || (lang_hooks.builtin_function
28907 == lang_hooks.builtin_function_ext_scope))
28908
28909 {
28910 tree type = ix86_get_builtin_func_type (tcode);
28911 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28912 NULL, NULL_TREE);
28913 ix86_builtins[(int) code] = decl;
28914 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28915 }
28916 else
28917 {
28918 ix86_builtins[(int) code] = NULL_TREE;
28919 ix86_builtins_isa[(int) code].tcode = tcode;
28920 ix86_builtins_isa[(int) code].name = name;
28921 ix86_builtins_isa[(int) code].const_p = false;
28922 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28923 }
28924 }
28925
28926 return decl;
28927 }
28928
28929 /* Like def_builtin, but also marks the function decl "const". */
28930
28931 static inline tree
28932 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28933 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28934 {
28935 tree decl = def_builtin (mask, name, tcode, code);
28936 if (decl)
28937 TREE_READONLY (decl) = 1;
28938 else
28939 ix86_builtins_isa[(int) code].const_p = true;
28940
28941 return decl;
28942 }
28943
28944 /* Add any new builtin functions for a given ISA that may not have been
28945 declared. This saves a bit of space compared to adding all of the
28946 declarations to the tree, even if we didn't use them. */
28947
28948 static void
28949 ix86_add_new_builtins (HOST_WIDE_INT isa)
28950 {
28951 int i;
28952
28953 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28954 {
28955 if ((ix86_builtins_isa[i].isa & isa) != 0
28956 && ix86_builtins_isa[i].set_and_not_built_p)
28957 {
28958 tree decl, type;
28959
28960 /* Don't define the builtin again. */
28961 ix86_builtins_isa[i].set_and_not_built_p = false;
28962
28963 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28964 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28965 type, i, BUILT_IN_MD, NULL,
28966 NULL_TREE);
28967
28968 ix86_builtins[i] = decl;
28969 if (ix86_builtins_isa[i].const_p)
28970 TREE_READONLY (decl) = 1;
28971 }
28972 }
28973 }
28974
28975 /* Bits for builtin_description.flag. */
28976
28977 /* Set when we don't support the comparison natively, and should
28978 swap_comparison in order to support it. */
28979 #define BUILTIN_DESC_SWAP_OPERANDS 1
28980
28981 struct builtin_description
28982 {
28983 const HOST_WIDE_INT mask;
28984 const enum insn_code icode;
28985 const char *const name;
28986 const enum ix86_builtins code;
28987 const enum rtx_code comparison;
28988 const int flag;
28989 };
28990
28991 static const struct builtin_description bdesc_comi[] =
28992 {
28993 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28994 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28995 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28997 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
29000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
29001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
29002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
29003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
29004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
29005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
29006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
29007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
29008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
29009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
29010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
29011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
29012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
29013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
29014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
29015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
29016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
29017 };
29018
29019 static const struct builtin_description bdesc_pcmpestr[] =
29020 {
29021 /* SSE4.2 */
29022 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
29023 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
29024 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
29025 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
29026 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
29027 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
29028 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
29029 };
29030
29031 static const struct builtin_description bdesc_pcmpistr[] =
29032 {
29033 /* SSE4.2 */
29034 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
29035 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
29036 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
29037 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
29038 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
29039 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
29040 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
29041 };
29042
29043 /* Special builtins with variable number of arguments. */
29044 static const struct builtin_description bdesc_special_args[] =
29045 {
29046 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
29047 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
29048 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
29049
29050 /* 80387 (for use internally for atomic compound assignment). */
29051 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
29052 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
29053 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
29054 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
29055
29056 /* MMX */
29057 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29058
29059 /* 3DNow! */
29060 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
29061
29062 /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES. */
29063 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
29064 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
29065 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29066 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29067 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29068 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29069 { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29070 { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29071
29072 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29073 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
29074 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29075 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29076 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29077 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29078 { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29079 { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
29080
29081 /* SSE */
29082 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29083 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29084 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29085
29086 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29087 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
29088 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29089 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
29090
29091 /* SSE or 3DNow!A */
29092 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29093 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
29094
29095 /* SSE2 */
29096 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29097 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
29098 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29099 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
29100 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29101 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
29102 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
29103 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29104 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29105 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29106
29107 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29108 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29109
29110 /* SSE3 */
29111 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29112
29113 /* SSE4.1 */
29114 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29115
29116 /* SSE4A */
29117 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29118 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29119
29120 /* AVX */
29121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29123
29124 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29125 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29126 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29129
29130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29137
29138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29140 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29141
29142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29143 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29145 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29146 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29148 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29150
29151 /* AVX2 */
29152 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29153 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29154 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29155 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29156 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29157 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29158 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29159 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29160 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29161
29162 /* AVX512F */
29163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29210
29211 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29212 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29213 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29214 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29215 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29216 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29217
29218 /* FSGSBASE */
29219 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29220 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29221 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29222 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29223 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29224 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29225 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29226 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29227
29228 /* RTM */
29229 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29230 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29231 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29232 };
29233
29234 /* Builtins with variable number of arguments. */
29235 static const struct builtin_description bdesc_args[] =
29236 {
29237 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29238 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29239 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29240 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29241 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29242 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29243 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29244
29245 /* MMX */
29246 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29247 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29248 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29249 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29250 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29251 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29252
29253 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29254 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29255 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29256 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29257 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29258 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29259 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29260 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29261
29262 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29263 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29264
29265 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29266 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29267 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29268 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29269
29270 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29271 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29272 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29273 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29274 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29275 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29276
29277 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29278 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29279 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29280 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29281 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29282 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29283
29284 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29285 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29286 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29287
29288 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29289
29290 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29291 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29292 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29293 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29295 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29296
29297 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29298 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29299 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29300 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29301 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29302 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29303
29304 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29305 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29306 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29307 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29308
29309 /* 3DNow! */
29310 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29311 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29312 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29313 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29314
29315 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29316 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29317 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29318 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29319 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29320 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29321 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29322 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29323 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29324 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29325 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29326 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29327 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29328 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29329 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29330
29331 /* 3DNow!A */
29332 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29333 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29334 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29335 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29336 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29337 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29338
29339 /* SSE */
29340 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29341 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29342 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29343 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29344 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29345 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29346 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29347 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29348 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29349 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29350 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29351 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29352
29353 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29354
29355 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29356 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29357 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29358 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29359 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29360 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29361 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29362 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29363
29364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29367 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29372 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29373 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29374 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29375 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29376 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29377 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29378 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29379 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29380 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29384
29385 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29386 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29389
29390 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29391 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29392 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29393 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29394
29395 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29396
29397 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29398 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29400 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29401 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29402
29403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29405 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29406
29407 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29408
29409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29411 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29412
29413 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29414 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29415
29416 /* SSE MMX or 3Dnow!A */
29417 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29418 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29419 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29420
29421 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29422 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29423 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29424 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29425
29426 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29427 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29428
29429 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29430
29431 /* SSE2 */
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29433
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29439
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29445
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29447
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29450 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29451 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29452
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29456
29457 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29460 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29462 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29465
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29467 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29475 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29483 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29486
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29488 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29491
29492 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29494 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29495 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29496
29497 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29498
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29500 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29501 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29502
29503 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29504
29505 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29506 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29507 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29508 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29509 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29510 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29511 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29512 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29513
29514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29522
29523 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29524 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29525
29526 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29528 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29529 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29530
29531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29533
29534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29540
29541 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29542 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29543 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29544 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29545
29546 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29547 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29548 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29549 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29550 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29551 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29552 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29553 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29554
29555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29558
29559 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29561
29562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29563 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29564
29565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29566
29567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29568 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29571
29572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29573 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29574 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29575 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29576 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29577 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29578 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29579
29580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29581 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29582 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29583 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29584 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29585 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29586 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29587
29588 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29589 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29590 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29591 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29592
29593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29596
29597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29598
29599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29600
29601 /* SSE2 MMX */
29602 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29603 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29604
29605 /* SSE3 */
29606 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29607 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29608
29609 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29610 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29611 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29612 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29613 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29614 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29615
29616 /* SSSE3 */
29617 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29618 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29619 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29620 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29621 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29622 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29623
29624 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29625 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29626 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29627 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29628 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29629 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29630 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29631 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29632 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29633 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29634 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29635 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29636 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29637 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29638 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29639 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29640 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29641 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29642 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29643 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29644 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29645 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29646 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29647 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29648
29649 /* SSSE3. */
29650 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29651 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29652
29653 /* SSE4.1 */
29654 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29655 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29656 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29657 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29658 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29659 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29660 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29661 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29662 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29663 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29664
29665 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29666 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29667 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29668 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29669 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29670 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29671 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29672 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29673 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29674 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29675 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29676 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29677 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29678
29679 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29680 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29681 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29682 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29683 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29684 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29685 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29686 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29687 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29688 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29689 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29690 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29691
29692 /* SSE4.1 */
29693 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29694 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29695 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29696 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29697
29698 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29699 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29700 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29701 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29702
29703 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29704 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29705
29706 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29707 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29708
29709 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29710 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29711 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29712 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29713
29714 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29715 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29716
29717 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29718 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29719
29720 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29721 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29722 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29723
29724 /* SSE4.2 */
29725 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29726 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29727 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29728 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29729 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29730
29731 /* SSE4A */
29732 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29733 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29734 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29735 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29736
29737 /* AES */
29738 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29739 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29740
29741 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29742 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29743 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29744 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29745
29746 /* PCLMUL */
29747 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29748
29749 /* AVX */
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29754 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29761 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29776
29777 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29780 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29781
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29783 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29785 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29786 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29787 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29788 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29789 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29790 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29791 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29792 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29793 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29794 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29795 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29796 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29797 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29798 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29799 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29800 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29801 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29802 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29803 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29804 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29805 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29806 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29807 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29808 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29809 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29810 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29811 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29812 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29813 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29814 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29815 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29816
29817 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29818 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29819 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29820
29821 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29822 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29823 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29824 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29825 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29826
29827 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29828
29829 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29830 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29831
29832 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29833 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29834 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29835 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29836
29837 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29838 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29839
29840 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29841 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29842
29843 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29844 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29845 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29846 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29847
29848 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29849 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29850
29851 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29852 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29853
29854 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29855 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29856 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29857 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29858
29859 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29860 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29861 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29862 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29863 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29864 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29865
29866 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29867 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29868 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29869 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29870 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29871 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29872 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29873 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29874 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29875 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29876 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29877 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29878 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29879 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29880 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29881
29882 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29883 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29884
29885 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29886 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29887
29888 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29889
29890 /* AVX2 */
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29933 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29934 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29935 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29936 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29937 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29938 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29939 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29940 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29941 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29942 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29943 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29944 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29945 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29946 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29947 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29948 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29949 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29950 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29951 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29952 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29953 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29954 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29955 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29956 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29957 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29958 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29959 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29960 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29961 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29962 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29963 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29964 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29965 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29966 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29967 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29968 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29969 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29970 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29971 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29972 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29973 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29974 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29975 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29976 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29977 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29978 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29979 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29980 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29981 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29982 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29983 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29984 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29985 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29986 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29987 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29988 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29989 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29990 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29991 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29992 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29993 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29994 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29995 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29996 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29997 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29998 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29999 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30000 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30001 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
30002 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
30003 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30004 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30005 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30006 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
30007 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
30008 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
30009 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30010 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30011 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
30012 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
30013 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
30014 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
30015 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
30016 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
30017 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
30018 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
30019 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
30020 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30021 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
30022 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
30023 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
30024 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
30025 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
30026 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
30027 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30028 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30029 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30030 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30031 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30032 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30033 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
30034 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
30035 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
30036 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30037
30038 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30039
30040 /* BMI */
30041 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30042 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30043 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
30044
30045 /* TBM */
30046 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30047 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30048
30049 /* F16C */
30050 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
30051 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
30052 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
30053 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
30054
30055 /* BMI2 */
30056 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30057 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30058 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30059 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30060 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
30061 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
30062
30063 /* AVX512F */
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30119 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30120 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30121 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30122 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30124 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30151 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30156 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30160 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30161 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30172 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30173 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30174 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30175 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30176 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30177 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30178 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30179 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30180 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30181 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30182 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30183 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30184 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30185 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30186 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30206 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30208 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30215 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30217 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30231 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30232 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30233 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30234 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30261
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30266 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30267 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30269 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30270
30271 /* Mask arithmetic operations */
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30273 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30275 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30277 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30279 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30281 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30282
30283 /* SHA */
30284 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30286 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30287 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30288 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30291 };
30292
30293 /* Builtins with rounding support. */
30294 static const struct builtin_description bdesc_round_args[] =
30295 {
30296 /* AVX512F */
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30306 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30307 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30308 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30309 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30310 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30311 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30312 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30313 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30314 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30315 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30316 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30317 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30318 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30319 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30320 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30321 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30322 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30323 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30324 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30325 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30326 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30327 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30328 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30329 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30330 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30331 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30332 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30333 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30334 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30335 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30336 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30337 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30338 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30339 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30340 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30341 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30342 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30343 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30344 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30345 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30346 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30347 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30348 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30349 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30350 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30351 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30352 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30353 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30354 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30355 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30356 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30357 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30358 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30359 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30360 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30361 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30362 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30363 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30364 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30365 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30366 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30367 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30368 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30369 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30370 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30371 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30372 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30373 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30374 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30375 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30376 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30377 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30378 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30379 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30380 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30381 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30382 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30383 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30384 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30385 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30386 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30387 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30388 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30389 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30390 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30391 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30392 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30393 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30394 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30395 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30396 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30397 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30398 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30399 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30400 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30401 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30402 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30403 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30404 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30405 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30406 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30407 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30408 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30409 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30410 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30411 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30412 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30413 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30414 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30415 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30416
30417 /* AVX512ER */
30418 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30419 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30420 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30421 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30422 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30423 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30424 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30425 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30426 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30427 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30428 };
30429
30430 /* FMA4 and XOP. */
30431 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30432 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30433 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30434 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30435 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30436 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30437 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30438 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30439 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30440 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30441 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30442 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30443 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30444 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30445 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30446 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30447 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30448 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30449 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30450 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30451 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30452 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30453 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30454 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30455 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30456 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30457 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30458 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30459 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30460 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30461 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30462 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30463 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30464 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30465 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30466 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30467 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30468 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30469 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30470 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30471 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30472 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30473 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30474 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30475 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30476 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30477 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30478 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30479 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30480 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30481 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30482 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30483
30484 static const struct builtin_description bdesc_multi_arg[] =
30485 {
30486 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30487 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30488 UNKNOWN, (int)MULTI_ARG_3_SF },
30489 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30490 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30491 UNKNOWN, (int)MULTI_ARG_3_DF },
30492
30493 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30494 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30495 UNKNOWN, (int)MULTI_ARG_3_SF },
30496 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30497 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30498 UNKNOWN, (int)MULTI_ARG_3_DF },
30499
30500 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30501 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30502 UNKNOWN, (int)MULTI_ARG_3_SF },
30503 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30504 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30505 UNKNOWN, (int)MULTI_ARG_3_DF },
30506 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30507 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30508 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30509 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30510 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30511 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30512
30513 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30514 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30515 UNKNOWN, (int)MULTI_ARG_3_SF },
30516 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30517 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30518 UNKNOWN, (int)MULTI_ARG_3_DF },
30519 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30520 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30521 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30522 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30523 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30524 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30525
30526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30533
30534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30541
30542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30543
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30556
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30559 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30568 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30573
30574 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30575 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30576 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30577 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30578 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30579 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30580
30581 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30582 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30583 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30584 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30585 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30586 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30587 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30588 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30589 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30590 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30591 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30592 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30593 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30594 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30595 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30596
30597 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30598 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30599 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30600 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30601 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30602 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30603 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30604
30605 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30606 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30607 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30608 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30609 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30610 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30611 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30612
30613 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30614 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30615 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30616 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30617 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30618 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30619 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30620
30621 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30622 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30623 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30624 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30625 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30626 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30627 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30628
30629 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30630 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30631 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30632 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30633 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30634 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30635 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30636
30637 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30638 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30639 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30640 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30641 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30642 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30643 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30644
30645 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30646 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30647 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30648 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30649 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30650 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30651 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30652
30653 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30654 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30655 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30656 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30657 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30658 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30659 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30660
30661 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30662 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30663 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30664 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30665 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30666 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30667 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30668 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30669
30670 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30671 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30672 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30673 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30674 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30675 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30676 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30677 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30678
30679 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30680 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30681 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30682 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30683
30684 };
30685 \f
30686 /* TM vector builtins. */
30687
30688 /* Reuse the existing x86-specific `struct builtin_description' cause
30689 we're lazy. Add casts to make them fit. */
30690 static const struct builtin_description bdesc_tm[] =
30691 {
30692 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30693 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30694 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30695 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30696 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30697 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30698 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30699
30700 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30701 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30702 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30703 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30704 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30705 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30706 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30707
30708 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30709 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30710 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30711 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30712 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30713 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30714 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30715
30716 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30717 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30718 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30719 };
30720
30721 /* TM callbacks. */
30722
30723 /* Return the builtin decl needed to load a vector of TYPE. */
30724
30725 static tree
30726 ix86_builtin_tm_load (tree type)
30727 {
30728 if (TREE_CODE (type) == VECTOR_TYPE)
30729 {
30730 switch (tree_to_uhwi (TYPE_SIZE (type)))
30731 {
30732 case 64:
30733 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30734 case 128:
30735 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30736 case 256:
30737 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30738 }
30739 }
30740 return NULL_TREE;
30741 }
30742
30743 /* Return the builtin decl needed to store a vector of TYPE. */
30744
30745 static tree
30746 ix86_builtin_tm_store (tree type)
30747 {
30748 if (TREE_CODE (type) == VECTOR_TYPE)
30749 {
30750 switch (tree_to_uhwi (TYPE_SIZE (type)))
30751 {
30752 case 64:
30753 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30754 case 128:
30755 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30756 case 256:
30757 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30758 }
30759 }
30760 return NULL_TREE;
30761 }
30762 \f
30763 /* Initialize the transactional memory vector load/store builtins. */
30764
30765 static void
30766 ix86_init_tm_builtins (void)
30767 {
30768 enum ix86_builtin_func_type ftype;
30769 const struct builtin_description *d;
30770 size_t i;
30771 tree decl;
30772 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30773 tree attrs_log, attrs_type_log;
30774
30775 if (!flag_tm)
30776 return;
30777
30778 /* If there are no builtins defined, we must be compiling in a
30779 language without trans-mem support. */
30780 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30781 return;
30782
30783 /* Use whatever attributes a normal TM load has. */
30784 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30785 attrs_load = DECL_ATTRIBUTES (decl);
30786 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30787 /* Use whatever attributes a normal TM store has. */
30788 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30789 attrs_store = DECL_ATTRIBUTES (decl);
30790 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30791 /* Use whatever attributes a normal TM log has. */
30792 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30793 attrs_log = DECL_ATTRIBUTES (decl);
30794 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30795
30796 for (i = 0, d = bdesc_tm;
30797 i < ARRAY_SIZE (bdesc_tm);
30798 i++, d++)
30799 {
30800 if ((d->mask & ix86_isa_flags) != 0
30801 || (lang_hooks.builtin_function
30802 == lang_hooks.builtin_function_ext_scope))
30803 {
30804 tree type, attrs, attrs_type;
30805 enum built_in_function code = (enum built_in_function) d->code;
30806
30807 ftype = (enum ix86_builtin_func_type) d->flag;
30808 type = ix86_get_builtin_func_type (ftype);
30809
30810 if (BUILTIN_TM_LOAD_P (code))
30811 {
30812 attrs = attrs_load;
30813 attrs_type = attrs_type_load;
30814 }
30815 else if (BUILTIN_TM_STORE_P (code))
30816 {
30817 attrs = attrs_store;
30818 attrs_type = attrs_type_store;
30819 }
30820 else
30821 {
30822 attrs = attrs_log;
30823 attrs_type = attrs_type_log;
30824 }
30825 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30826 /* The builtin without the prefix for
30827 calling it directly. */
30828 d->name + strlen ("__builtin_"),
30829 attrs);
30830 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30831 set the TYPE_ATTRIBUTES. */
30832 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30833
30834 set_builtin_decl (code, decl, false);
30835 }
30836 }
30837 }
30838
30839 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30840 in the current target ISA to allow the user to compile particular modules
30841 with different target specific options that differ from the command line
30842 options. */
30843 static void
30844 ix86_init_mmx_sse_builtins (void)
30845 {
30846 const struct builtin_description * d;
30847 enum ix86_builtin_func_type ftype;
30848 size_t i;
30849
30850 /* Add all special builtins with variable number of operands. */
30851 for (i = 0, d = bdesc_special_args;
30852 i < ARRAY_SIZE (bdesc_special_args);
30853 i++, d++)
30854 {
30855 if (d->name == 0)
30856 continue;
30857
30858 ftype = (enum ix86_builtin_func_type) d->flag;
30859 def_builtin (d->mask, d->name, ftype, d->code);
30860 }
30861
30862 /* Add all builtins with variable number of operands. */
30863 for (i = 0, d = bdesc_args;
30864 i < ARRAY_SIZE (bdesc_args);
30865 i++, d++)
30866 {
30867 if (d->name == 0)
30868 continue;
30869
30870 ftype = (enum ix86_builtin_func_type) d->flag;
30871 def_builtin_const (d->mask, d->name, ftype, d->code);
30872 }
30873
30874 /* Add all builtins with rounding. */
30875 for (i = 0, d = bdesc_round_args;
30876 i < ARRAY_SIZE (bdesc_round_args);
30877 i++, d++)
30878 {
30879 if (d->name == 0)
30880 continue;
30881
30882 ftype = (enum ix86_builtin_func_type) d->flag;
30883 def_builtin_const (d->mask, d->name, ftype, d->code);
30884 }
30885
30886 /* pcmpestr[im] insns. */
30887 for (i = 0, d = bdesc_pcmpestr;
30888 i < ARRAY_SIZE (bdesc_pcmpestr);
30889 i++, d++)
30890 {
30891 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30892 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30893 else
30894 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30895 def_builtin_const (d->mask, d->name, ftype, d->code);
30896 }
30897
30898 /* pcmpistr[im] insns. */
30899 for (i = 0, d = bdesc_pcmpistr;
30900 i < ARRAY_SIZE (bdesc_pcmpistr);
30901 i++, d++)
30902 {
30903 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30904 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30905 else
30906 ftype = INT_FTYPE_V16QI_V16QI_INT;
30907 def_builtin_const (d->mask, d->name, ftype, d->code);
30908 }
30909
30910 /* comi/ucomi insns. */
30911 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30912 {
30913 if (d->mask == OPTION_MASK_ISA_SSE2)
30914 ftype = INT_FTYPE_V2DF_V2DF;
30915 else
30916 ftype = INT_FTYPE_V4SF_V4SF;
30917 def_builtin_const (d->mask, d->name, ftype, d->code);
30918 }
30919
30920 /* SSE */
30921 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30922 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30923 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30924 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30925
30926 /* SSE or 3DNow!A */
30927 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30928 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30929 IX86_BUILTIN_MASKMOVQ);
30930
30931 /* SSE2 */
30932 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30933 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30934
30935 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30936 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30937 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30938 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30939
30940 /* SSE3. */
30941 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30942 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30943 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30944 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30945
30946 /* AES */
30947 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30948 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30949 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30950 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30951 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30952 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30953 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30954 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30955 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30956 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30957 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30958 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30959
30960 /* PCLMUL */
30961 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30962 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30963
30964 /* RDRND */
30965 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30966 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30967 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30968 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30969 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30970 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30971 IX86_BUILTIN_RDRAND64_STEP);
30972
30973 /* AVX2 */
30974 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30975 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30976 IX86_BUILTIN_GATHERSIV2DF);
30977
30978 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30979 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30980 IX86_BUILTIN_GATHERSIV4DF);
30981
30982 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30983 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30984 IX86_BUILTIN_GATHERDIV2DF);
30985
30986 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30987 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30988 IX86_BUILTIN_GATHERDIV4DF);
30989
30990 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30991 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30992 IX86_BUILTIN_GATHERSIV4SF);
30993
30994 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30995 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30996 IX86_BUILTIN_GATHERSIV8SF);
30997
30998 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30999 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
31000 IX86_BUILTIN_GATHERDIV4SF);
31001
31002 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
31003 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
31004 IX86_BUILTIN_GATHERDIV8SF);
31005
31006 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
31007 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
31008 IX86_BUILTIN_GATHERSIV2DI);
31009
31010 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
31011 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
31012 IX86_BUILTIN_GATHERSIV4DI);
31013
31014 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
31015 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
31016 IX86_BUILTIN_GATHERDIV2DI);
31017
31018 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
31019 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
31020 IX86_BUILTIN_GATHERDIV4DI);
31021
31022 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
31023 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
31024 IX86_BUILTIN_GATHERSIV4SI);
31025
31026 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
31027 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
31028 IX86_BUILTIN_GATHERSIV8SI);
31029
31030 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
31031 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
31032 IX86_BUILTIN_GATHERDIV4SI);
31033
31034 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
31035 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
31036 IX86_BUILTIN_GATHERDIV8SI);
31037
31038 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
31039 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
31040 IX86_BUILTIN_GATHERALTSIV4DF);
31041
31042 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
31043 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
31044 IX86_BUILTIN_GATHERALTDIV8SF);
31045
31046 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
31047 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
31048 IX86_BUILTIN_GATHERALTSIV4DI);
31049
31050 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
31051 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
31052 IX86_BUILTIN_GATHERALTDIV8SI);
31053
31054 /* AVX512F */
31055 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
31056 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
31057 IX86_BUILTIN_GATHER3SIV16SF);
31058
31059 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
31060 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
31061 IX86_BUILTIN_GATHER3SIV8DF);
31062
31063 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
31064 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
31065 IX86_BUILTIN_GATHER3DIV16SF);
31066
31067 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
31068 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
31069 IX86_BUILTIN_GATHER3DIV8DF);
31070
31071 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
31072 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
31073 IX86_BUILTIN_GATHER3SIV16SI);
31074
31075 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
31076 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
31077 IX86_BUILTIN_GATHER3SIV8DI);
31078
31079 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
31080 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
31081 IX86_BUILTIN_GATHER3DIV16SI);
31082
31083 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
31084 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
31085 IX86_BUILTIN_GATHER3DIV8DI);
31086
31087 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
31088 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
31089 IX86_BUILTIN_GATHER3ALTSIV8DF);
31090
31091 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
31092 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
31093 IX86_BUILTIN_GATHER3ALTDIV16SF);
31094
31095 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
31096 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
31097 IX86_BUILTIN_GATHER3ALTSIV8DI);
31098
31099 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
31100 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
31101 IX86_BUILTIN_GATHER3ALTDIV16SI);
31102
31103 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
31104 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
31105 IX86_BUILTIN_SCATTERSIV16SF);
31106
31107 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
31108 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
31109 IX86_BUILTIN_SCATTERSIV8DF);
31110
31111 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31112 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31113 IX86_BUILTIN_SCATTERDIV16SF);
31114
31115 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31116 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31117 IX86_BUILTIN_SCATTERDIV8DF);
31118
31119 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31120 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31121 IX86_BUILTIN_SCATTERSIV16SI);
31122
31123 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31124 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31125 IX86_BUILTIN_SCATTERSIV8DI);
31126
31127 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31128 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31129 IX86_BUILTIN_SCATTERDIV16SI);
31130
31131 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31132 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31133 IX86_BUILTIN_SCATTERDIV8DI);
31134
31135 /* AVX512PF */
31136 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31137 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31138 IX86_BUILTIN_GATHERPFDPD);
31139 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31140 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31141 IX86_BUILTIN_GATHERPFDPS);
31142 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31143 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31144 IX86_BUILTIN_GATHERPFQPD);
31145 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31146 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31147 IX86_BUILTIN_GATHERPFQPS);
31148 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31149 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31150 IX86_BUILTIN_SCATTERPFDPD);
31151 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31152 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31153 IX86_BUILTIN_SCATTERPFDPS);
31154 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31155 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31156 IX86_BUILTIN_SCATTERPFQPD);
31157 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31158 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31159 IX86_BUILTIN_SCATTERPFQPS);
31160
31161 /* SHA */
31162 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31163 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31164 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31165 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31166 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31167 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31168 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31169 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31170 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31171 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31172 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31173 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31174 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31175 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31176
31177 /* RTM. */
31178 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31179 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31180
31181 /* MMX access to the vec_init patterns. */
31182 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31183 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31184
31185 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31186 V4HI_FTYPE_HI_HI_HI_HI,
31187 IX86_BUILTIN_VEC_INIT_V4HI);
31188
31189 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31190 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31191 IX86_BUILTIN_VEC_INIT_V8QI);
31192
31193 /* Access to the vec_extract patterns. */
31194 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31195 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31196 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31197 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31198 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31199 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31200 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31201 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31202 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31203 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31204
31205 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31206 "__builtin_ia32_vec_ext_v4hi",
31207 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31208
31209 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31210 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31211
31212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31213 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31214
31215 /* Access to the vec_set patterns. */
31216 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31217 "__builtin_ia32_vec_set_v2di",
31218 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31219
31220 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31221 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31222
31223 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31224 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31225
31226 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31227 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31228
31229 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31230 "__builtin_ia32_vec_set_v4hi",
31231 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31232
31233 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31234 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31235
31236 /* RDSEED */
31237 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31238 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31239 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31240 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31241 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31242 "__builtin_ia32_rdseed_di_step",
31243 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31244
31245 /* ADCX */
31246 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31247 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31248 def_builtin (OPTION_MASK_ISA_64BIT,
31249 "__builtin_ia32_addcarryx_u64",
31250 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31251 IX86_BUILTIN_ADDCARRYX64);
31252
31253 /* SBB */
31254 def_builtin (0, "__builtin_ia32_sbb_u32",
31255 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
31256 def_builtin (OPTION_MASK_ISA_64BIT,
31257 "__builtin_ia32_sbb_u64",
31258 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31259 IX86_BUILTIN_SBB64);
31260
31261 /* Read/write FLAGS. */
31262 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31263 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31264 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31265 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31266 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31267 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31268 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31269 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31270
31271 /* CLFLUSHOPT. */
31272 def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
31273 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
31274
31275 /* Add FMA4 multi-arg argument instructions */
31276 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31277 {
31278 if (d->name == 0)
31279 continue;
31280
31281 ftype = (enum ix86_builtin_func_type) d->flag;
31282 def_builtin_const (d->mask, d->name, ftype, d->code);
31283 }
31284 }
31285
31286 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31287 to return a pointer to VERSION_DECL if the outcome of the expression
31288 formed by PREDICATE_CHAIN is true. This function will be called during
31289 version dispatch to decide which function version to execute. It returns
31290 the basic block at the end, to which more conditions can be added. */
31291
31292 static basic_block
31293 add_condition_to_bb (tree function_decl, tree version_decl,
31294 tree predicate_chain, basic_block new_bb)
31295 {
31296 gimple return_stmt;
31297 tree convert_expr, result_var;
31298 gimple convert_stmt;
31299 gimple call_cond_stmt;
31300 gimple if_else_stmt;
31301
31302 basic_block bb1, bb2, bb3;
31303 edge e12, e23;
31304
31305 tree cond_var, and_expr_var = NULL_TREE;
31306 gimple_seq gseq;
31307
31308 tree predicate_decl, predicate_arg;
31309
31310 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31311
31312 gcc_assert (new_bb != NULL);
31313 gseq = bb_seq (new_bb);
31314
31315
31316 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31317 build_fold_addr_expr (version_decl));
31318 result_var = create_tmp_var (ptr_type_node, NULL);
31319 convert_stmt = gimple_build_assign (result_var, convert_expr);
31320 return_stmt = gimple_build_return (result_var);
31321
31322 if (predicate_chain == NULL_TREE)
31323 {
31324 gimple_seq_add_stmt (&gseq, convert_stmt);
31325 gimple_seq_add_stmt (&gseq, return_stmt);
31326 set_bb_seq (new_bb, gseq);
31327 gimple_set_bb (convert_stmt, new_bb);
31328 gimple_set_bb (return_stmt, new_bb);
31329 pop_cfun ();
31330 return new_bb;
31331 }
31332
31333 while (predicate_chain != NULL)
31334 {
31335 cond_var = create_tmp_var (integer_type_node, NULL);
31336 predicate_decl = TREE_PURPOSE (predicate_chain);
31337 predicate_arg = TREE_VALUE (predicate_chain);
31338 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31339 gimple_call_set_lhs (call_cond_stmt, cond_var);
31340
31341 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31342 gimple_set_bb (call_cond_stmt, new_bb);
31343 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31344
31345 predicate_chain = TREE_CHAIN (predicate_chain);
31346
31347 if (and_expr_var == NULL)
31348 and_expr_var = cond_var;
31349 else
31350 {
31351 gimple assign_stmt;
31352 /* Use MIN_EXPR to check if any integer is zero?.
31353 and_expr_var = min_expr <cond_var, and_expr_var> */
31354 assign_stmt = gimple_build_assign (and_expr_var,
31355 build2 (MIN_EXPR, integer_type_node,
31356 cond_var, and_expr_var));
31357
31358 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31359 gimple_set_bb (assign_stmt, new_bb);
31360 gimple_seq_add_stmt (&gseq, assign_stmt);
31361 }
31362 }
31363
31364 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31365 integer_zero_node,
31366 NULL_TREE, NULL_TREE);
31367 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31368 gimple_set_bb (if_else_stmt, new_bb);
31369 gimple_seq_add_stmt (&gseq, if_else_stmt);
31370
31371 gimple_seq_add_stmt (&gseq, convert_stmt);
31372 gimple_seq_add_stmt (&gseq, return_stmt);
31373 set_bb_seq (new_bb, gseq);
31374
31375 bb1 = new_bb;
31376 e12 = split_block (bb1, if_else_stmt);
31377 bb2 = e12->dest;
31378 e12->flags &= ~EDGE_FALLTHRU;
31379 e12->flags |= EDGE_TRUE_VALUE;
31380
31381 e23 = split_block (bb2, return_stmt);
31382
31383 gimple_set_bb (convert_stmt, bb2);
31384 gimple_set_bb (return_stmt, bb2);
31385
31386 bb3 = e23->dest;
31387 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31388
31389 remove_edge (e23);
31390 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31391
31392 pop_cfun ();
31393
31394 return bb3;
31395 }
31396
31397 /* This parses the attribute arguments to target in DECL and determines
31398 the right builtin to use to match the platform specification.
31399 It returns the priority value for this version decl. If PREDICATE_LIST
31400 is not NULL, it stores the list of cpu features that need to be checked
31401 before dispatching this function. */
31402
31403 static unsigned int
31404 get_builtin_code_for_version (tree decl, tree *predicate_list)
31405 {
31406 tree attrs;
31407 struct cl_target_option cur_target;
31408 tree target_node;
31409 struct cl_target_option *new_target;
31410 const char *arg_str = NULL;
31411 const char *attrs_str = NULL;
31412 char *tok_str = NULL;
31413 char *token;
31414
31415 /* Priority of i386 features, greater value is higher priority. This is
31416 used to decide the order in which function dispatch must happen. For
31417 instance, a version specialized for SSE4.2 should be checked for dispatch
31418 before a version for SSE3, as SSE4.2 implies SSE3. */
31419 enum feature_priority
31420 {
31421 P_ZERO = 0,
31422 P_MMX,
31423 P_SSE,
31424 P_SSE2,
31425 P_SSE3,
31426 P_SSSE3,
31427 P_PROC_SSSE3,
31428 P_SSE4_A,
31429 P_PROC_SSE4_A,
31430 P_SSE4_1,
31431 P_SSE4_2,
31432 P_PROC_SSE4_2,
31433 P_POPCNT,
31434 P_AVX,
31435 P_PROC_AVX,
31436 P_FMA4,
31437 P_XOP,
31438 P_PROC_XOP,
31439 P_FMA,
31440 P_PROC_FMA,
31441 P_AVX2,
31442 P_PROC_AVX2
31443 };
31444
31445 enum feature_priority priority = P_ZERO;
31446
31447 /* These are the target attribute strings for which a dispatcher is
31448 available, from fold_builtin_cpu. */
31449
31450 static struct _feature_list
31451 {
31452 const char *const name;
31453 const enum feature_priority priority;
31454 }
31455 const feature_list[] =
31456 {
31457 {"mmx", P_MMX},
31458 {"sse", P_SSE},
31459 {"sse2", P_SSE2},
31460 {"sse3", P_SSE3},
31461 {"sse4a", P_SSE4_A},
31462 {"ssse3", P_SSSE3},
31463 {"sse4.1", P_SSE4_1},
31464 {"sse4.2", P_SSE4_2},
31465 {"popcnt", P_POPCNT},
31466 {"avx", P_AVX},
31467 {"fma4", P_FMA4},
31468 {"xop", P_XOP},
31469 {"fma", P_FMA},
31470 {"avx2", P_AVX2}
31471 };
31472
31473
31474 static unsigned int NUM_FEATURES
31475 = sizeof (feature_list) / sizeof (struct _feature_list);
31476
31477 unsigned int i;
31478
31479 tree predicate_chain = NULL_TREE;
31480 tree predicate_decl, predicate_arg;
31481
31482 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31483 gcc_assert (attrs != NULL);
31484
31485 attrs = TREE_VALUE (TREE_VALUE (attrs));
31486
31487 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31488 attrs_str = TREE_STRING_POINTER (attrs);
31489
31490 /* Return priority zero for default function. */
31491 if (strcmp (attrs_str, "default") == 0)
31492 return 0;
31493
31494 /* Handle arch= if specified. For priority, set it to be 1 more than
31495 the best instruction set the processor can handle. For instance, if
31496 there is a version for atom and a version for ssse3 (the highest ISA
31497 priority for atom), the atom version must be checked for dispatch
31498 before the ssse3 version. */
31499 if (strstr (attrs_str, "arch=") != NULL)
31500 {
31501 cl_target_option_save (&cur_target, &global_options);
31502 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31503 &global_options_set);
31504
31505 gcc_assert (target_node);
31506 new_target = TREE_TARGET_OPTION (target_node);
31507 gcc_assert (new_target);
31508
31509 if (new_target->arch_specified && new_target->arch > 0)
31510 {
31511 switch (new_target->arch)
31512 {
31513 case PROCESSOR_CORE2:
31514 arg_str = "core2";
31515 priority = P_PROC_SSSE3;
31516 break;
31517 case PROCESSOR_NEHALEM:
31518 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31519 arg_str = "westmere";
31520 else
31521 /* We translate "arch=corei7" and "arch=nehalem" to
31522 "corei7" so that it will be mapped to M_INTEL_COREI7
31523 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31524 arg_str = "corei7";
31525 priority = P_PROC_SSE4_2;
31526 break;
31527 case PROCESSOR_SANDYBRIDGE:
31528 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31529 arg_str = "ivybridge";
31530 else
31531 arg_str = "sandybridge";
31532 priority = P_PROC_AVX;
31533 break;
31534 case PROCESSOR_HASWELL:
31535 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31536 arg_str = "broadwell";
31537 else
31538 arg_str = "haswell";
31539 priority = P_PROC_AVX2;
31540 break;
31541 case PROCESSOR_BONNELL:
31542 arg_str = "bonnell";
31543 priority = P_PROC_SSSE3;
31544 break;
31545 case PROCESSOR_SILVERMONT:
31546 arg_str = "silvermont";
31547 priority = P_PROC_SSE4_2;
31548 break;
31549 case PROCESSOR_AMDFAM10:
31550 arg_str = "amdfam10h";
31551 priority = P_PROC_SSE4_A;
31552 break;
31553 case PROCESSOR_BTVER1:
31554 arg_str = "btver1";
31555 priority = P_PROC_SSE4_A;
31556 break;
31557 case PROCESSOR_BTVER2:
31558 arg_str = "btver2";
31559 priority = P_PROC_AVX;
31560 break;
31561 case PROCESSOR_BDVER1:
31562 arg_str = "bdver1";
31563 priority = P_PROC_XOP;
31564 break;
31565 case PROCESSOR_BDVER2:
31566 arg_str = "bdver2";
31567 priority = P_PROC_FMA;
31568 break;
31569 case PROCESSOR_BDVER3:
31570 arg_str = "bdver3";
31571 priority = P_PROC_FMA;
31572 break;
31573 case PROCESSOR_BDVER4:
31574 arg_str = "bdver4";
31575 priority = P_PROC_AVX2;
31576 break;
31577 }
31578 }
31579
31580 cl_target_option_restore (&global_options, &cur_target);
31581
31582 if (predicate_list && arg_str == NULL)
31583 {
31584 error_at (DECL_SOURCE_LOCATION (decl),
31585 "No dispatcher found for the versioning attributes");
31586 return 0;
31587 }
31588
31589 if (predicate_list)
31590 {
31591 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31592 /* For a C string literal the length includes the trailing NULL. */
31593 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31594 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31595 predicate_chain);
31596 }
31597 }
31598
31599 /* Process feature name. */
31600 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31601 strcpy (tok_str, attrs_str);
31602 token = strtok (tok_str, ",");
31603 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31604
31605 while (token != NULL)
31606 {
31607 /* Do not process "arch=" */
31608 if (strncmp (token, "arch=", 5) == 0)
31609 {
31610 token = strtok (NULL, ",");
31611 continue;
31612 }
31613 for (i = 0; i < NUM_FEATURES; ++i)
31614 {
31615 if (strcmp (token, feature_list[i].name) == 0)
31616 {
31617 if (predicate_list)
31618 {
31619 predicate_arg = build_string_literal (
31620 strlen (feature_list[i].name) + 1,
31621 feature_list[i].name);
31622 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31623 predicate_chain);
31624 }
31625 /* Find the maximum priority feature. */
31626 if (feature_list[i].priority > priority)
31627 priority = feature_list[i].priority;
31628
31629 break;
31630 }
31631 }
31632 if (predicate_list && i == NUM_FEATURES)
31633 {
31634 error_at (DECL_SOURCE_LOCATION (decl),
31635 "No dispatcher found for %s", token);
31636 return 0;
31637 }
31638 token = strtok (NULL, ",");
31639 }
31640 free (tok_str);
31641
31642 if (predicate_list && predicate_chain == NULL_TREE)
31643 {
31644 error_at (DECL_SOURCE_LOCATION (decl),
31645 "No dispatcher found for the versioning attributes : %s",
31646 attrs_str);
31647 return 0;
31648 }
31649 else if (predicate_list)
31650 {
31651 predicate_chain = nreverse (predicate_chain);
31652 *predicate_list = predicate_chain;
31653 }
31654
31655 return priority;
31656 }
31657
31658 /* This compares the priority of target features in function DECL1
31659 and DECL2. It returns positive value if DECL1 is higher priority,
31660 negative value if DECL2 is higher priority and 0 if they are the
31661 same. */
31662
31663 static int
31664 ix86_compare_version_priority (tree decl1, tree decl2)
31665 {
31666 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31667 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31668
31669 return (int)priority1 - (int)priority2;
31670 }
31671
31672 /* V1 and V2 point to function versions with different priorities
31673 based on the target ISA. This function compares their priorities. */
31674
31675 static int
31676 feature_compare (const void *v1, const void *v2)
31677 {
31678 typedef struct _function_version_info
31679 {
31680 tree version_decl;
31681 tree predicate_chain;
31682 unsigned int dispatch_priority;
31683 } function_version_info;
31684
31685 const function_version_info c1 = *(const function_version_info *)v1;
31686 const function_version_info c2 = *(const function_version_info *)v2;
31687 return (c2.dispatch_priority - c1.dispatch_priority);
31688 }
31689
31690 /* This function generates the dispatch function for
31691 multi-versioned functions. DISPATCH_DECL is the function which will
31692 contain the dispatch logic. FNDECLS are the function choices for
31693 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31694 in DISPATCH_DECL in which the dispatch code is generated. */
31695
31696 static int
31697 dispatch_function_versions (tree dispatch_decl,
31698 void *fndecls_p,
31699 basic_block *empty_bb)
31700 {
31701 tree default_decl;
31702 gimple ifunc_cpu_init_stmt;
31703 gimple_seq gseq;
31704 int ix;
31705 tree ele;
31706 vec<tree> *fndecls;
31707 unsigned int num_versions = 0;
31708 unsigned int actual_versions = 0;
31709 unsigned int i;
31710
31711 struct _function_version_info
31712 {
31713 tree version_decl;
31714 tree predicate_chain;
31715 unsigned int dispatch_priority;
31716 }*function_version_info;
31717
31718 gcc_assert (dispatch_decl != NULL
31719 && fndecls_p != NULL
31720 && empty_bb != NULL);
31721
31722 /*fndecls_p is actually a vector. */
31723 fndecls = static_cast<vec<tree> *> (fndecls_p);
31724
31725 /* At least one more version other than the default. */
31726 num_versions = fndecls->length ();
31727 gcc_assert (num_versions >= 2);
31728
31729 function_version_info = (struct _function_version_info *)
31730 XNEWVEC (struct _function_version_info, (num_versions - 1));
31731
31732 /* The first version in the vector is the default decl. */
31733 default_decl = (*fndecls)[0];
31734
31735 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31736
31737 gseq = bb_seq (*empty_bb);
31738 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31739 constructors, so explicity call __builtin_cpu_init here. */
31740 ifunc_cpu_init_stmt = gimple_build_call_vec (
31741 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31742 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31743 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31744 set_bb_seq (*empty_bb, gseq);
31745
31746 pop_cfun ();
31747
31748
31749 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31750 {
31751 tree version_decl = ele;
31752 tree predicate_chain = NULL_TREE;
31753 unsigned int priority;
31754 /* Get attribute string, parse it and find the right predicate decl.
31755 The predicate function could be a lengthy combination of many
31756 features, like arch-type and various isa-variants. */
31757 priority = get_builtin_code_for_version (version_decl,
31758 &predicate_chain);
31759
31760 if (predicate_chain == NULL_TREE)
31761 continue;
31762
31763 function_version_info [actual_versions].version_decl = version_decl;
31764 function_version_info [actual_versions].predicate_chain
31765 = predicate_chain;
31766 function_version_info [actual_versions].dispatch_priority = priority;
31767 actual_versions++;
31768 }
31769
31770 /* Sort the versions according to descending order of dispatch priority. The
31771 priority is based on the ISA. This is not a perfect solution. There
31772 could still be ambiguity. If more than one function version is suitable
31773 to execute, which one should be dispatched? In future, allow the user
31774 to specify a dispatch priority next to the version. */
31775 qsort (function_version_info, actual_versions,
31776 sizeof (struct _function_version_info), feature_compare);
31777
31778 for (i = 0; i < actual_versions; ++i)
31779 *empty_bb = add_condition_to_bb (dispatch_decl,
31780 function_version_info[i].version_decl,
31781 function_version_info[i].predicate_chain,
31782 *empty_bb);
31783
31784 /* dispatch default version at the end. */
31785 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31786 NULL, *empty_bb);
31787
31788 free (function_version_info);
31789 return 0;
31790 }
31791
31792 /* Comparator function to be used in qsort routine to sort attribute
31793 specification strings to "target". */
31794
31795 static int
31796 attr_strcmp (const void *v1, const void *v2)
31797 {
31798 const char *c1 = *(char *const*)v1;
31799 const char *c2 = *(char *const*)v2;
31800 return strcmp (c1, c2);
31801 }
31802
31803 /* ARGLIST is the argument to target attribute. This function tokenizes
31804 the comma separated arguments, sorts them and returns a string which
31805 is a unique identifier for the comma separated arguments. It also
31806 replaces non-identifier characters "=,-" with "_". */
31807
31808 static char *
31809 sorted_attr_string (tree arglist)
31810 {
31811 tree arg;
31812 size_t str_len_sum = 0;
31813 char **args = NULL;
31814 char *attr_str, *ret_str;
31815 char *attr = NULL;
31816 unsigned int argnum = 1;
31817 unsigned int i;
31818
31819 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31820 {
31821 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31822 size_t len = strlen (str);
31823 str_len_sum += len + 1;
31824 if (arg != arglist)
31825 argnum++;
31826 for (i = 0; i < strlen (str); i++)
31827 if (str[i] == ',')
31828 argnum++;
31829 }
31830
31831 attr_str = XNEWVEC (char, str_len_sum);
31832 str_len_sum = 0;
31833 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31834 {
31835 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31836 size_t len = strlen (str);
31837 memcpy (attr_str + str_len_sum, str, len);
31838 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31839 str_len_sum += len + 1;
31840 }
31841
31842 /* Replace "=,-" with "_". */
31843 for (i = 0; i < strlen (attr_str); i++)
31844 if (attr_str[i] == '=' || attr_str[i]== '-')
31845 attr_str[i] = '_';
31846
31847 if (argnum == 1)
31848 return attr_str;
31849
31850 args = XNEWVEC (char *, argnum);
31851
31852 i = 0;
31853 attr = strtok (attr_str, ",");
31854 while (attr != NULL)
31855 {
31856 args[i] = attr;
31857 i++;
31858 attr = strtok (NULL, ",");
31859 }
31860
31861 qsort (args, argnum, sizeof (char *), attr_strcmp);
31862
31863 ret_str = XNEWVEC (char, str_len_sum);
31864 str_len_sum = 0;
31865 for (i = 0; i < argnum; i++)
31866 {
31867 size_t len = strlen (args[i]);
31868 memcpy (ret_str + str_len_sum, args[i], len);
31869 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31870 str_len_sum += len + 1;
31871 }
31872
31873 XDELETEVEC (args);
31874 XDELETEVEC (attr_str);
31875 return ret_str;
31876 }
31877
31878 /* This function changes the assembler name for functions that are
31879 versions. If DECL is a function version and has a "target"
31880 attribute, it appends the attribute string to its assembler name. */
31881
31882 static tree
31883 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31884 {
31885 tree version_attr;
31886 const char *orig_name, *version_string;
31887 char *attr_str, *assembler_name;
31888
31889 if (DECL_DECLARED_INLINE_P (decl)
31890 && lookup_attribute ("gnu_inline",
31891 DECL_ATTRIBUTES (decl)))
31892 error_at (DECL_SOURCE_LOCATION (decl),
31893 "Function versions cannot be marked as gnu_inline,"
31894 " bodies have to be generated");
31895
31896 if (DECL_VIRTUAL_P (decl)
31897 || DECL_VINDEX (decl))
31898 sorry ("Virtual function multiversioning not supported");
31899
31900 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31901
31902 /* target attribute string cannot be NULL. */
31903 gcc_assert (version_attr != NULL_TREE);
31904
31905 orig_name = IDENTIFIER_POINTER (id);
31906 version_string
31907 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31908
31909 if (strcmp (version_string, "default") == 0)
31910 return id;
31911
31912 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31913 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31914
31915 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31916
31917 /* Allow assembler name to be modified if already set. */
31918 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31919 SET_DECL_RTL (decl, NULL);
31920
31921 tree ret = get_identifier (assembler_name);
31922 XDELETEVEC (attr_str);
31923 XDELETEVEC (assembler_name);
31924 return ret;
31925 }
31926
31927 /* This function returns true if FN1 and FN2 are versions of the same function,
31928 that is, the target strings of the function decls are different. This assumes
31929 that FN1 and FN2 have the same signature. */
31930
31931 static bool
31932 ix86_function_versions (tree fn1, tree fn2)
31933 {
31934 tree attr1, attr2;
31935 char *target1, *target2;
31936 bool result;
31937
31938 if (TREE_CODE (fn1) != FUNCTION_DECL
31939 || TREE_CODE (fn2) != FUNCTION_DECL)
31940 return false;
31941
31942 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31943 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31944
31945 /* At least one function decl should have the target attribute specified. */
31946 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31947 return false;
31948
31949 /* Diagnose missing target attribute if one of the decls is already
31950 multi-versioned. */
31951 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31952 {
31953 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31954 {
31955 if (attr2 != NULL_TREE)
31956 {
31957 tree tem = fn1;
31958 fn1 = fn2;
31959 fn2 = tem;
31960 attr1 = attr2;
31961 }
31962 error_at (DECL_SOURCE_LOCATION (fn2),
31963 "missing %<target%> attribute for multi-versioned %D",
31964 fn2);
31965 inform (DECL_SOURCE_LOCATION (fn1),
31966 "previous declaration of %D", fn1);
31967 /* Prevent diagnosing of the same error multiple times. */
31968 DECL_ATTRIBUTES (fn2)
31969 = tree_cons (get_identifier ("target"),
31970 copy_node (TREE_VALUE (attr1)),
31971 DECL_ATTRIBUTES (fn2));
31972 }
31973 return false;
31974 }
31975
31976 target1 = sorted_attr_string (TREE_VALUE (attr1));
31977 target2 = sorted_attr_string (TREE_VALUE (attr2));
31978
31979 /* The sorted target strings must be different for fn1 and fn2
31980 to be versions. */
31981 if (strcmp (target1, target2) == 0)
31982 result = false;
31983 else
31984 result = true;
31985
31986 XDELETEVEC (target1);
31987 XDELETEVEC (target2);
31988
31989 return result;
31990 }
31991
31992 static tree
31993 ix86_mangle_decl_assembler_name (tree decl, tree id)
31994 {
31995 /* For function version, add the target suffix to the assembler name. */
31996 if (TREE_CODE (decl) == FUNCTION_DECL
31997 && DECL_FUNCTION_VERSIONED (decl))
31998 id = ix86_mangle_function_version_assembler_name (decl, id);
31999 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
32000 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
32001 #endif
32002
32003 return id;
32004 }
32005
32006 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
32007 is true, append the full path name of the source file. */
32008
32009 static char *
32010 make_name (tree decl, const char *suffix, bool make_unique)
32011 {
32012 char *global_var_name;
32013 int name_len;
32014 const char *name;
32015 const char *unique_name = NULL;
32016
32017 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
32018
32019 /* Get a unique name that can be used globally without any chances
32020 of collision at link time. */
32021 if (make_unique)
32022 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
32023
32024 name_len = strlen (name) + strlen (suffix) + 2;
32025
32026 if (make_unique)
32027 name_len += strlen (unique_name) + 1;
32028 global_var_name = XNEWVEC (char, name_len);
32029
32030 /* Use '.' to concatenate names as it is demangler friendly. */
32031 if (make_unique)
32032 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
32033 suffix);
32034 else
32035 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
32036
32037 return global_var_name;
32038 }
32039
32040 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32041
32042 /* Make a dispatcher declaration for the multi-versioned function DECL.
32043 Calls to DECL function will be replaced with calls to the dispatcher
32044 by the front-end. Return the decl created. */
32045
32046 static tree
32047 make_dispatcher_decl (const tree decl)
32048 {
32049 tree func_decl;
32050 char *func_name;
32051 tree fn_type, func_type;
32052 bool is_uniq = false;
32053
32054 if (TREE_PUBLIC (decl) == 0)
32055 is_uniq = true;
32056
32057 func_name = make_name (decl, "ifunc", is_uniq);
32058
32059 fn_type = TREE_TYPE (decl);
32060 func_type = build_function_type (TREE_TYPE (fn_type),
32061 TYPE_ARG_TYPES (fn_type));
32062
32063 func_decl = build_fn_decl (func_name, func_type);
32064 XDELETEVEC (func_name);
32065 TREE_USED (func_decl) = 1;
32066 DECL_CONTEXT (func_decl) = NULL_TREE;
32067 DECL_INITIAL (func_decl) = error_mark_node;
32068 DECL_ARTIFICIAL (func_decl) = 1;
32069 /* Mark this func as external, the resolver will flip it again if
32070 it gets generated. */
32071 DECL_EXTERNAL (func_decl) = 1;
32072 /* This will be of type IFUNCs have to be externally visible. */
32073 TREE_PUBLIC (func_decl) = 1;
32074
32075 return func_decl;
32076 }
32077
32078 #endif
32079
32080 /* Returns true if decl is multi-versioned and DECL is the default function,
32081 that is it is not tagged with target specific optimization. */
32082
32083 static bool
32084 is_function_default_version (const tree decl)
32085 {
32086 if (TREE_CODE (decl) != FUNCTION_DECL
32087 || !DECL_FUNCTION_VERSIONED (decl))
32088 return false;
32089 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
32090 gcc_assert (attr);
32091 attr = TREE_VALUE (TREE_VALUE (attr));
32092 return (TREE_CODE (attr) == STRING_CST
32093 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
32094 }
32095
32096 /* Make a dispatcher declaration for the multi-versioned function DECL.
32097 Calls to DECL function will be replaced with calls to the dispatcher
32098 by the front-end. Returns the decl of the dispatcher function. */
32099
32100 static tree
32101 ix86_get_function_versions_dispatcher (void *decl)
32102 {
32103 tree fn = (tree) decl;
32104 struct cgraph_node *node = NULL;
32105 struct cgraph_node *default_node = NULL;
32106 struct cgraph_function_version_info *node_v = NULL;
32107 struct cgraph_function_version_info *first_v = NULL;
32108
32109 tree dispatch_decl = NULL;
32110
32111 struct cgraph_function_version_info *default_version_info = NULL;
32112
32113 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
32114
32115 node = cgraph_node::get (fn);
32116 gcc_assert (node != NULL);
32117
32118 node_v = node->function_version ();
32119 gcc_assert (node_v != NULL);
32120
32121 if (node_v->dispatcher_resolver != NULL)
32122 return node_v->dispatcher_resolver;
32123
32124 /* Find the default version and make it the first node. */
32125 first_v = node_v;
32126 /* Go to the beginning of the chain. */
32127 while (first_v->prev != NULL)
32128 first_v = first_v->prev;
32129 default_version_info = first_v;
32130 while (default_version_info != NULL)
32131 {
32132 if (is_function_default_version
32133 (default_version_info->this_node->decl))
32134 break;
32135 default_version_info = default_version_info->next;
32136 }
32137
32138 /* If there is no default node, just return NULL. */
32139 if (default_version_info == NULL)
32140 return NULL;
32141
32142 /* Make default info the first node. */
32143 if (first_v != default_version_info)
32144 {
32145 default_version_info->prev->next = default_version_info->next;
32146 if (default_version_info->next)
32147 default_version_info->next->prev = default_version_info->prev;
32148 first_v->prev = default_version_info;
32149 default_version_info->next = first_v;
32150 default_version_info->prev = NULL;
32151 }
32152
32153 default_node = default_version_info->this_node;
32154
32155 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32156 if (targetm.has_ifunc_p ())
32157 {
32158 struct cgraph_function_version_info *it_v = NULL;
32159 struct cgraph_node *dispatcher_node = NULL;
32160 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32161
32162 /* Right now, the dispatching is done via ifunc. */
32163 dispatch_decl = make_dispatcher_decl (default_node->decl);
32164
32165 dispatcher_node = cgraph_node::get_create (dispatch_decl);
32166 gcc_assert (dispatcher_node != NULL);
32167 dispatcher_node->dispatcher_function = 1;
32168 dispatcher_version_info
32169 = dispatcher_node->insert_new_function_version ();
32170 dispatcher_version_info->next = default_version_info;
32171 dispatcher_node->definition = 1;
32172
32173 /* Set the dispatcher for all the versions. */
32174 it_v = default_version_info;
32175 while (it_v != NULL)
32176 {
32177 it_v->dispatcher_resolver = dispatch_decl;
32178 it_v = it_v->next;
32179 }
32180 }
32181 else
32182 #endif
32183 {
32184 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32185 "multiversioning needs ifunc which is not supported "
32186 "on this target");
32187 }
32188
32189 return dispatch_decl;
32190 }
32191
32192 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32193 it to CHAIN. */
32194
32195 static tree
32196 make_attribute (const char *name, const char *arg_name, tree chain)
32197 {
32198 tree attr_name;
32199 tree attr_arg_name;
32200 tree attr_args;
32201 tree attr;
32202
32203 attr_name = get_identifier (name);
32204 attr_arg_name = build_string (strlen (arg_name), arg_name);
32205 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32206 attr = tree_cons (attr_name, attr_args, chain);
32207 return attr;
32208 }
32209
32210 /* Make the resolver function decl to dispatch the versions of
32211 a multi-versioned function, DEFAULT_DECL. Create an
32212 empty basic block in the resolver and store the pointer in
32213 EMPTY_BB. Return the decl of the resolver function. */
32214
32215 static tree
32216 make_resolver_func (const tree default_decl,
32217 const tree dispatch_decl,
32218 basic_block *empty_bb)
32219 {
32220 char *resolver_name;
32221 tree decl, type, decl_name, t;
32222 bool is_uniq = false;
32223
32224 /* IFUNC's have to be globally visible. So, if the default_decl is
32225 not, then the name of the IFUNC should be made unique. */
32226 if (TREE_PUBLIC (default_decl) == 0)
32227 is_uniq = true;
32228
32229 /* Append the filename to the resolver function if the versions are
32230 not externally visible. This is because the resolver function has
32231 to be externally visible for the loader to find it. So, appending
32232 the filename will prevent conflicts with a resolver function from
32233 another module which is based on the same version name. */
32234 resolver_name = make_name (default_decl, "resolver", is_uniq);
32235
32236 /* The resolver function should return a (void *). */
32237 type = build_function_type_list (ptr_type_node, NULL_TREE);
32238
32239 decl = build_fn_decl (resolver_name, type);
32240 decl_name = get_identifier (resolver_name);
32241 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32242
32243 DECL_NAME (decl) = decl_name;
32244 TREE_USED (decl) = 1;
32245 DECL_ARTIFICIAL (decl) = 1;
32246 DECL_IGNORED_P (decl) = 0;
32247 /* IFUNC resolvers have to be externally visible. */
32248 TREE_PUBLIC (decl) = 1;
32249 DECL_UNINLINABLE (decl) = 1;
32250
32251 /* Resolver is not external, body is generated. */
32252 DECL_EXTERNAL (decl) = 0;
32253 DECL_EXTERNAL (dispatch_decl) = 0;
32254
32255 DECL_CONTEXT (decl) = NULL_TREE;
32256 DECL_INITIAL (decl) = make_node (BLOCK);
32257 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32258
32259 if (DECL_COMDAT_GROUP (default_decl)
32260 || TREE_PUBLIC (default_decl))
32261 {
32262 /* In this case, each translation unit with a call to this
32263 versioned function will put out a resolver. Ensure it
32264 is comdat to keep just one copy. */
32265 DECL_COMDAT (decl) = 1;
32266 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32267 }
32268 /* Build result decl and add to function_decl. */
32269 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32270 DECL_ARTIFICIAL (t) = 1;
32271 DECL_IGNORED_P (t) = 1;
32272 DECL_RESULT (decl) = t;
32273
32274 gimplify_function_tree (decl);
32275 push_cfun (DECL_STRUCT_FUNCTION (decl));
32276 *empty_bb = init_lowered_empty_function (decl, false);
32277
32278 cgraph_node::add_new_function (decl, true);
32279 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
32280
32281 pop_cfun ();
32282
32283 gcc_assert (dispatch_decl != NULL);
32284 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32285 DECL_ATTRIBUTES (dispatch_decl)
32286 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32287
32288 /* Create the alias for dispatch to resolver here. */
32289 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32290 cgraph_node::create_same_body_alias (dispatch_decl, decl);
32291 XDELETEVEC (resolver_name);
32292 return decl;
32293 }
32294
32295 /* Generate the dispatching code body to dispatch multi-versioned function
32296 DECL. The target hook is called to process the "target" attributes and
32297 provide the code to dispatch the right function at run-time. NODE points
32298 to the dispatcher decl whose body will be created. */
32299
32300 static tree
32301 ix86_generate_version_dispatcher_body (void *node_p)
32302 {
32303 tree resolver_decl;
32304 basic_block empty_bb;
32305 tree default_ver_decl;
32306 struct cgraph_node *versn;
32307 struct cgraph_node *node;
32308
32309 struct cgraph_function_version_info *node_version_info = NULL;
32310 struct cgraph_function_version_info *versn_info = NULL;
32311
32312 node = (cgraph_node *)node_p;
32313
32314 node_version_info = node->function_version ();
32315 gcc_assert (node->dispatcher_function
32316 && node_version_info != NULL);
32317
32318 if (node_version_info->dispatcher_resolver)
32319 return node_version_info->dispatcher_resolver;
32320
32321 /* The first version in the chain corresponds to the default version. */
32322 default_ver_decl = node_version_info->next->this_node->decl;
32323
32324 /* node is going to be an alias, so remove the finalized bit. */
32325 node->definition = false;
32326
32327 resolver_decl = make_resolver_func (default_ver_decl,
32328 node->decl, &empty_bb);
32329
32330 node_version_info->dispatcher_resolver = resolver_decl;
32331
32332 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32333
32334 auto_vec<tree, 2> fn_ver_vec;
32335
32336 for (versn_info = node_version_info->next; versn_info;
32337 versn_info = versn_info->next)
32338 {
32339 versn = versn_info->this_node;
32340 /* Check for virtual functions here again, as by this time it should
32341 have been determined if this function needs a vtable index or
32342 not. This happens for methods in derived classes that override
32343 virtual methods in base classes but are not explicitly marked as
32344 virtual. */
32345 if (DECL_VINDEX (versn->decl))
32346 sorry ("Virtual function multiversioning not supported");
32347
32348 fn_ver_vec.safe_push (versn->decl);
32349 }
32350
32351 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32352 cgraph_edge::rebuild_edges ();
32353 pop_cfun ();
32354 return resolver_decl;
32355 }
32356 /* This builds the processor_model struct type defined in
32357 libgcc/config/i386/cpuinfo.c */
32358
32359 static tree
32360 build_processor_model_struct (void)
32361 {
32362 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32363 "__cpu_features"};
32364 tree field = NULL_TREE, field_chain = NULL_TREE;
32365 int i;
32366 tree type = make_node (RECORD_TYPE);
32367
32368 /* The first 3 fields are unsigned int. */
32369 for (i = 0; i < 3; ++i)
32370 {
32371 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32372 get_identifier (field_name[i]), unsigned_type_node);
32373 if (field_chain != NULL_TREE)
32374 DECL_CHAIN (field) = field_chain;
32375 field_chain = field;
32376 }
32377
32378 /* The last field is an array of unsigned integers of size one. */
32379 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32380 get_identifier (field_name[3]),
32381 build_array_type (unsigned_type_node,
32382 build_index_type (size_one_node)));
32383 if (field_chain != NULL_TREE)
32384 DECL_CHAIN (field) = field_chain;
32385 field_chain = field;
32386
32387 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32388 return type;
32389 }
32390
32391 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32392
32393 static tree
32394 make_var_decl (tree type, const char *name)
32395 {
32396 tree new_decl;
32397
32398 new_decl = build_decl (UNKNOWN_LOCATION,
32399 VAR_DECL,
32400 get_identifier(name),
32401 type);
32402
32403 DECL_EXTERNAL (new_decl) = 1;
32404 TREE_STATIC (new_decl) = 1;
32405 TREE_PUBLIC (new_decl) = 1;
32406 DECL_INITIAL (new_decl) = 0;
32407 DECL_ARTIFICIAL (new_decl) = 0;
32408 DECL_PRESERVE_P (new_decl) = 1;
32409
32410 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32411 assemble_variable (new_decl, 0, 0, 0);
32412
32413 return new_decl;
32414 }
32415
32416 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32417 into an integer defined in libgcc/config/i386/cpuinfo.c */
32418
32419 static tree
32420 fold_builtin_cpu (tree fndecl, tree *args)
32421 {
32422 unsigned int i;
32423 enum ix86_builtins fn_code = (enum ix86_builtins)
32424 DECL_FUNCTION_CODE (fndecl);
32425 tree param_string_cst = NULL;
32426
32427 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32428 enum processor_features
32429 {
32430 F_CMOV = 0,
32431 F_MMX,
32432 F_POPCNT,
32433 F_SSE,
32434 F_SSE2,
32435 F_SSE3,
32436 F_SSSE3,
32437 F_SSE4_1,
32438 F_SSE4_2,
32439 F_AVX,
32440 F_AVX2,
32441 F_SSE4_A,
32442 F_FMA4,
32443 F_XOP,
32444 F_FMA,
32445 F_MAX
32446 };
32447
32448 /* These are the values for vendor types and cpu types and subtypes
32449 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32450 the corresponding start value. */
32451 enum processor_model
32452 {
32453 M_INTEL = 1,
32454 M_AMD,
32455 M_CPU_TYPE_START,
32456 M_INTEL_BONNELL,
32457 M_INTEL_CORE2,
32458 M_INTEL_COREI7,
32459 M_AMDFAM10H,
32460 M_AMDFAM15H,
32461 M_INTEL_SILVERMONT,
32462 M_AMD_BTVER1,
32463 M_AMD_BTVER2,
32464 M_CPU_SUBTYPE_START,
32465 M_INTEL_COREI7_NEHALEM,
32466 M_INTEL_COREI7_WESTMERE,
32467 M_INTEL_COREI7_SANDYBRIDGE,
32468 M_AMDFAM10H_BARCELONA,
32469 M_AMDFAM10H_SHANGHAI,
32470 M_AMDFAM10H_ISTANBUL,
32471 M_AMDFAM15H_BDVER1,
32472 M_AMDFAM15H_BDVER2,
32473 M_AMDFAM15H_BDVER3,
32474 M_AMDFAM15H_BDVER4,
32475 M_INTEL_COREI7_IVYBRIDGE,
32476 M_INTEL_COREI7_HASWELL
32477 };
32478
32479 static struct _arch_names_table
32480 {
32481 const char *const name;
32482 const enum processor_model model;
32483 }
32484 const arch_names_table[] =
32485 {
32486 {"amd", M_AMD},
32487 {"intel", M_INTEL},
32488 {"atom", M_INTEL_BONNELL},
32489 {"slm", M_INTEL_SILVERMONT},
32490 {"core2", M_INTEL_CORE2},
32491 {"corei7", M_INTEL_COREI7},
32492 {"nehalem", M_INTEL_COREI7_NEHALEM},
32493 {"westmere", M_INTEL_COREI7_WESTMERE},
32494 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32495 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32496 {"haswell", M_INTEL_COREI7_HASWELL},
32497 {"bonnell", M_INTEL_BONNELL},
32498 {"silvermont", M_INTEL_SILVERMONT},
32499 {"amdfam10h", M_AMDFAM10H},
32500 {"barcelona", M_AMDFAM10H_BARCELONA},
32501 {"shanghai", M_AMDFAM10H_SHANGHAI},
32502 {"istanbul", M_AMDFAM10H_ISTANBUL},
32503 {"btver1", M_AMD_BTVER1},
32504 {"amdfam15h", M_AMDFAM15H},
32505 {"bdver1", M_AMDFAM15H_BDVER1},
32506 {"bdver2", M_AMDFAM15H_BDVER2},
32507 {"bdver3", M_AMDFAM15H_BDVER3},
32508 {"bdver4", M_AMDFAM15H_BDVER4},
32509 {"btver2", M_AMD_BTVER2},
32510 };
32511
32512 static struct _isa_names_table
32513 {
32514 const char *const name;
32515 const enum processor_features feature;
32516 }
32517 const isa_names_table[] =
32518 {
32519 {"cmov", F_CMOV},
32520 {"mmx", F_MMX},
32521 {"popcnt", F_POPCNT},
32522 {"sse", F_SSE},
32523 {"sse2", F_SSE2},
32524 {"sse3", F_SSE3},
32525 {"ssse3", F_SSSE3},
32526 {"sse4a", F_SSE4_A},
32527 {"sse4.1", F_SSE4_1},
32528 {"sse4.2", F_SSE4_2},
32529 {"avx", F_AVX},
32530 {"fma4", F_FMA4},
32531 {"xop", F_XOP},
32532 {"fma", F_FMA},
32533 {"avx2", F_AVX2}
32534 };
32535
32536 tree __processor_model_type = build_processor_model_struct ();
32537 tree __cpu_model_var = make_var_decl (__processor_model_type,
32538 "__cpu_model");
32539
32540
32541 varpool_node::add (__cpu_model_var);
32542
32543 gcc_assert ((args != NULL) && (*args != NULL));
32544
32545 param_string_cst = *args;
32546 while (param_string_cst
32547 && TREE_CODE (param_string_cst) != STRING_CST)
32548 {
32549 /* *args must be a expr that can contain other EXPRS leading to a
32550 STRING_CST. */
32551 if (!EXPR_P (param_string_cst))
32552 {
32553 error ("Parameter to builtin must be a string constant or literal");
32554 return integer_zero_node;
32555 }
32556 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32557 }
32558
32559 gcc_assert (param_string_cst);
32560
32561 if (fn_code == IX86_BUILTIN_CPU_IS)
32562 {
32563 tree ref;
32564 tree field;
32565 tree final;
32566
32567 unsigned int field_val = 0;
32568 unsigned int NUM_ARCH_NAMES
32569 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32570
32571 for (i = 0; i < NUM_ARCH_NAMES; i++)
32572 if (strcmp (arch_names_table[i].name,
32573 TREE_STRING_POINTER (param_string_cst)) == 0)
32574 break;
32575
32576 if (i == NUM_ARCH_NAMES)
32577 {
32578 error ("Parameter to builtin not valid: %s",
32579 TREE_STRING_POINTER (param_string_cst));
32580 return integer_zero_node;
32581 }
32582
32583 field = TYPE_FIELDS (__processor_model_type);
32584 field_val = arch_names_table[i].model;
32585
32586 /* CPU types are stored in the next field. */
32587 if (field_val > M_CPU_TYPE_START
32588 && field_val < M_CPU_SUBTYPE_START)
32589 {
32590 field = DECL_CHAIN (field);
32591 field_val -= M_CPU_TYPE_START;
32592 }
32593
32594 /* CPU subtypes are stored in the next field. */
32595 if (field_val > M_CPU_SUBTYPE_START)
32596 {
32597 field = DECL_CHAIN ( DECL_CHAIN (field));
32598 field_val -= M_CPU_SUBTYPE_START;
32599 }
32600
32601 /* Get the appropriate field in __cpu_model. */
32602 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32603 field, NULL_TREE);
32604
32605 /* Check the value. */
32606 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32607 build_int_cstu (unsigned_type_node, field_val));
32608 return build1 (CONVERT_EXPR, integer_type_node, final);
32609 }
32610 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32611 {
32612 tree ref;
32613 tree array_elt;
32614 tree field;
32615 tree final;
32616
32617 unsigned int field_val = 0;
32618 unsigned int NUM_ISA_NAMES
32619 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32620
32621 for (i = 0; i < NUM_ISA_NAMES; i++)
32622 if (strcmp (isa_names_table[i].name,
32623 TREE_STRING_POINTER (param_string_cst)) == 0)
32624 break;
32625
32626 if (i == NUM_ISA_NAMES)
32627 {
32628 error ("Parameter to builtin not valid: %s",
32629 TREE_STRING_POINTER (param_string_cst));
32630 return integer_zero_node;
32631 }
32632
32633 field = TYPE_FIELDS (__processor_model_type);
32634 /* Get the last field, which is __cpu_features. */
32635 while (DECL_CHAIN (field))
32636 field = DECL_CHAIN (field);
32637
32638 /* Get the appropriate field: __cpu_model.__cpu_features */
32639 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32640 field, NULL_TREE);
32641
32642 /* Access the 0th element of __cpu_features array. */
32643 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32644 integer_zero_node, NULL_TREE, NULL_TREE);
32645
32646 field_val = (1 << isa_names_table[i].feature);
32647 /* Return __cpu_model.__cpu_features[0] & field_val */
32648 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32649 build_int_cstu (unsigned_type_node, field_val));
32650 return build1 (CONVERT_EXPR, integer_type_node, final);
32651 }
32652 gcc_unreachable ();
32653 }
32654
32655 static tree
32656 ix86_fold_builtin (tree fndecl, int n_args,
32657 tree *args, bool ignore ATTRIBUTE_UNUSED)
32658 {
32659 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32660 {
32661 enum ix86_builtins fn_code = (enum ix86_builtins)
32662 DECL_FUNCTION_CODE (fndecl);
32663 if (fn_code == IX86_BUILTIN_CPU_IS
32664 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32665 {
32666 gcc_assert (n_args == 1);
32667 return fold_builtin_cpu (fndecl, args);
32668 }
32669 }
32670
32671 #ifdef SUBTARGET_FOLD_BUILTIN
32672 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32673 #endif
32674
32675 return NULL_TREE;
32676 }
32677
32678 /* Make builtins to detect cpu type and features supported. NAME is
32679 the builtin name, CODE is the builtin code, and FTYPE is the function
32680 type of the builtin. */
32681
32682 static void
32683 make_cpu_type_builtin (const char* name, int code,
32684 enum ix86_builtin_func_type ftype, bool is_const)
32685 {
32686 tree decl;
32687 tree type;
32688
32689 type = ix86_get_builtin_func_type (ftype);
32690 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32691 NULL, NULL_TREE);
32692 gcc_assert (decl != NULL_TREE);
32693 ix86_builtins[(int) code] = decl;
32694 TREE_READONLY (decl) = is_const;
32695 }
32696
32697 /* Make builtins to get CPU type and features supported. The created
32698 builtins are :
32699
32700 __builtin_cpu_init (), to detect cpu type and features,
32701 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32702 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32703 */
32704
32705 static void
32706 ix86_init_platform_type_builtins (void)
32707 {
32708 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32709 INT_FTYPE_VOID, false);
32710 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32711 INT_FTYPE_PCCHAR, true);
32712 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32713 INT_FTYPE_PCCHAR, true);
32714 }
32715
32716 /* Internal method for ix86_init_builtins. */
32717
32718 static void
32719 ix86_init_builtins_va_builtins_abi (void)
32720 {
32721 tree ms_va_ref, sysv_va_ref;
32722 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32723 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32724 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32725 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32726
32727 if (!TARGET_64BIT)
32728 return;
32729 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32730 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32731 ms_va_ref = build_reference_type (ms_va_list_type_node);
32732 sysv_va_ref =
32733 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32734
32735 fnvoid_va_end_ms =
32736 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32737 fnvoid_va_start_ms =
32738 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32739 fnvoid_va_end_sysv =
32740 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32741 fnvoid_va_start_sysv =
32742 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32743 NULL_TREE);
32744 fnvoid_va_copy_ms =
32745 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32746 NULL_TREE);
32747 fnvoid_va_copy_sysv =
32748 build_function_type_list (void_type_node, sysv_va_ref,
32749 sysv_va_ref, NULL_TREE);
32750
32751 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32752 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32753 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32754 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32755 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32756 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32757 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32758 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32759 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32760 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32761 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32762 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32763 }
32764
32765 static void
32766 ix86_init_builtin_types (void)
32767 {
32768 tree float128_type_node, float80_type_node;
32769
32770 /* The __float80 type. */
32771 float80_type_node = long_double_type_node;
32772 if (TYPE_MODE (float80_type_node) != XFmode)
32773 {
32774 /* The __float80 type. */
32775 float80_type_node = make_node (REAL_TYPE);
32776
32777 TYPE_PRECISION (float80_type_node) = 80;
32778 layout_type (float80_type_node);
32779 }
32780 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32781
32782 /* The __float128 type. */
32783 float128_type_node = make_node (REAL_TYPE);
32784 TYPE_PRECISION (float128_type_node) = 128;
32785 layout_type (float128_type_node);
32786 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32787
32788 /* This macro is built by i386-builtin-types.awk. */
32789 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32790 }
32791
32792 static void
32793 ix86_init_builtins (void)
32794 {
32795 tree t;
32796
32797 ix86_init_builtin_types ();
32798
32799 /* Builtins to get CPU type and features. */
32800 ix86_init_platform_type_builtins ();
32801
32802 /* TFmode support builtins. */
32803 def_builtin_const (0, "__builtin_infq",
32804 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32805 def_builtin_const (0, "__builtin_huge_valq",
32806 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32807
32808 /* We will expand them to normal call if SSE isn't available since
32809 they are used by libgcc. */
32810 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32811 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32812 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32813 TREE_READONLY (t) = 1;
32814 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32815
32816 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32817 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32818 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32819 TREE_READONLY (t) = 1;
32820 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32821
32822 ix86_init_tm_builtins ();
32823 ix86_init_mmx_sse_builtins ();
32824
32825 if (TARGET_LP64)
32826 ix86_init_builtins_va_builtins_abi ();
32827
32828 #ifdef SUBTARGET_INIT_BUILTINS
32829 SUBTARGET_INIT_BUILTINS;
32830 #endif
32831 }
32832
32833 /* Return the ix86 builtin for CODE. */
32834
32835 static tree
32836 ix86_builtin_decl (unsigned code, bool)
32837 {
32838 if (code >= IX86_BUILTIN_MAX)
32839 return error_mark_node;
32840
32841 return ix86_builtins[code];
32842 }
32843
32844 /* Errors in the source file can cause expand_expr to return const0_rtx
32845 where we expect a vector. To avoid crashing, use one of the vector
32846 clear instructions. */
32847 static rtx
32848 safe_vector_operand (rtx x, enum machine_mode mode)
32849 {
32850 if (x == const0_rtx)
32851 x = CONST0_RTX (mode);
32852 return x;
32853 }
32854
32855 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32856
32857 static rtx
32858 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32859 {
32860 rtx pat;
32861 tree arg0 = CALL_EXPR_ARG (exp, 0);
32862 tree arg1 = CALL_EXPR_ARG (exp, 1);
32863 rtx op0 = expand_normal (arg0);
32864 rtx op1 = expand_normal (arg1);
32865 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32866 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32867 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32868
32869 if (VECTOR_MODE_P (mode0))
32870 op0 = safe_vector_operand (op0, mode0);
32871 if (VECTOR_MODE_P (mode1))
32872 op1 = safe_vector_operand (op1, mode1);
32873
32874 if (optimize || !target
32875 || GET_MODE (target) != tmode
32876 || !insn_data[icode].operand[0].predicate (target, tmode))
32877 target = gen_reg_rtx (tmode);
32878
32879 if (GET_MODE (op1) == SImode && mode1 == TImode)
32880 {
32881 rtx x = gen_reg_rtx (V4SImode);
32882 emit_insn (gen_sse2_loadd (x, op1));
32883 op1 = gen_lowpart (TImode, x);
32884 }
32885
32886 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32887 op0 = copy_to_mode_reg (mode0, op0);
32888 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32889 op1 = copy_to_mode_reg (mode1, op1);
32890
32891 pat = GEN_FCN (icode) (target, op0, op1);
32892 if (! pat)
32893 return 0;
32894
32895 emit_insn (pat);
32896
32897 return target;
32898 }
32899
32900 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32901
32902 static rtx
32903 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32904 enum ix86_builtin_func_type m_type,
32905 enum rtx_code sub_code)
32906 {
32907 rtx pat;
32908 int i;
32909 int nargs;
32910 bool comparison_p = false;
32911 bool tf_p = false;
32912 bool last_arg_constant = false;
32913 int num_memory = 0;
32914 struct {
32915 rtx op;
32916 enum machine_mode mode;
32917 } args[4];
32918
32919 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32920
32921 switch (m_type)
32922 {
32923 case MULTI_ARG_4_DF2_DI_I:
32924 case MULTI_ARG_4_DF2_DI_I1:
32925 case MULTI_ARG_4_SF2_SI_I:
32926 case MULTI_ARG_4_SF2_SI_I1:
32927 nargs = 4;
32928 last_arg_constant = true;
32929 break;
32930
32931 case MULTI_ARG_3_SF:
32932 case MULTI_ARG_3_DF:
32933 case MULTI_ARG_3_SF2:
32934 case MULTI_ARG_3_DF2:
32935 case MULTI_ARG_3_DI:
32936 case MULTI_ARG_3_SI:
32937 case MULTI_ARG_3_SI_DI:
32938 case MULTI_ARG_3_HI:
32939 case MULTI_ARG_3_HI_SI:
32940 case MULTI_ARG_3_QI:
32941 case MULTI_ARG_3_DI2:
32942 case MULTI_ARG_3_SI2:
32943 case MULTI_ARG_3_HI2:
32944 case MULTI_ARG_3_QI2:
32945 nargs = 3;
32946 break;
32947
32948 case MULTI_ARG_2_SF:
32949 case MULTI_ARG_2_DF:
32950 case MULTI_ARG_2_DI:
32951 case MULTI_ARG_2_SI:
32952 case MULTI_ARG_2_HI:
32953 case MULTI_ARG_2_QI:
32954 nargs = 2;
32955 break;
32956
32957 case MULTI_ARG_2_DI_IMM:
32958 case MULTI_ARG_2_SI_IMM:
32959 case MULTI_ARG_2_HI_IMM:
32960 case MULTI_ARG_2_QI_IMM:
32961 nargs = 2;
32962 last_arg_constant = true;
32963 break;
32964
32965 case MULTI_ARG_1_SF:
32966 case MULTI_ARG_1_DF:
32967 case MULTI_ARG_1_SF2:
32968 case MULTI_ARG_1_DF2:
32969 case MULTI_ARG_1_DI:
32970 case MULTI_ARG_1_SI:
32971 case MULTI_ARG_1_HI:
32972 case MULTI_ARG_1_QI:
32973 case MULTI_ARG_1_SI_DI:
32974 case MULTI_ARG_1_HI_DI:
32975 case MULTI_ARG_1_HI_SI:
32976 case MULTI_ARG_1_QI_DI:
32977 case MULTI_ARG_1_QI_SI:
32978 case MULTI_ARG_1_QI_HI:
32979 nargs = 1;
32980 break;
32981
32982 case MULTI_ARG_2_DI_CMP:
32983 case MULTI_ARG_2_SI_CMP:
32984 case MULTI_ARG_2_HI_CMP:
32985 case MULTI_ARG_2_QI_CMP:
32986 nargs = 2;
32987 comparison_p = true;
32988 break;
32989
32990 case MULTI_ARG_2_SF_TF:
32991 case MULTI_ARG_2_DF_TF:
32992 case MULTI_ARG_2_DI_TF:
32993 case MULTI_ARG_2_SI_TF:
32994 case MULTI_ARG_2_HI_TF:
32995 case MULTI_ARG_2_QI_TF:
32996 nargs = 2;
32997 tf_p = true;
32998 break;
32999
33000 default:
33001 gcc_unreachable ();
33002 }
33003
33004 if (optimize || !target
33005 || GET_MODE (target) != tmode
33006 || !insn_data[icode].operand[0].predicate (target, tmode))
33007 target = gen_reg_rtx (tmode);
33008
33009 gcc_assert (nargs <= 4);
33010
33011 for (i = 0; i < nargs; i++)
33012 {
33013 tree arg = CALL_EXPR_ARG (exp, i);
33014 rtx op = expand_normal (arg);
33015 int adjust = (comparison_p) ? 1 : 0;
33016 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
33017
33018 if (last_arg_constant && i == nargs - 1)
33019 {
33020 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
33021 {
33022 enum insn_code new_icode = icode;
33023 switch (icode)
33024 {
33025 case CODE_FOR_xop_vpermil2v2df3:
33026 case CODE_FOR_xop_vpermil2v4sf3:
33027 case CODE_FOR_xop_vpermil2v4df3:
33028 case CODE_FOR_xop_vpermil2v8sf3:
33029 error ("the last argument must be a 2-bit immediate");
33030 return gen_reg_rtx (tmode);
33031 case CODE_FOR_xop_rotlv2di3:
33032 new_icode = CODE_FOR_rotlv2di3;
33033 goto xop_rotl;
33034 case CODE_FOR_xop_rotlv4si3:
33035 new_icode = CODE_FOR_rotlv4si3;
33036 goto xop_rotl;
33037 case CODE_FOR_xop_rotlv8hi3:
33038 new_icode = CODE_FOR_rotlv8hi3;
33039 goto xop_rotl;
33040 case CODE_FOR_xop_rotlv16qi3:
33041 new_icode = CODE_FOR_rotlv16qi3;
33042 xop_rotl:
33043 if (CONST_INT_P (op))
33044 {
33045 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
33046 op = GEN_INT (INTVAL (op) & mask);
33047 gcc_checking_assert
33048 (insn_data[icode].operand[i + 1].predicate (op, mode));
33049 }
33050 else
33051 {
33052 gcc_checking_assert
33053 (nargs == 2
33054 && insn_data[new_icode].operand[0].mode == tmode
33055 && insn_data[new_icode].operand[1].mode == tmode
33056 && insn_data[new_icode].operand[2].mode == mode
33057 && insn_data[new_icode].operand[0].predicate
33058 == insn_data[icode].operand[0].predicate
33059 && insn_data[new_icode].operand[1].predicate
33060 == insn_data[icode].operand[1].predicate);
33061 icode = new_icode;
33062 goto non_constant;
33063 }
33064 break;
33065 default:
33066 gcc_unreachable ();
33067 }
33068 }
33069 }
33070 else
33071 {
33072 non_constant:
33073 if (VECTOR_MODE_P (mode))
33074 op = safe_vector_operand (op, mode);
33075
33076 /* If we aren't optimizing, only allow one memory operand to be
33077 generated. */
33078 if (memory_operand (op, mode))
33079 num_memory++;
33080
33081 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
33082
33083 if (optimize
33084 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
33085 || num_memory > 1)
33086 op = force_reg (mode, op);
33087 }
33088
33089 args[i].op = op;
33090 args[i].mode = mode;
33091 }
33092
33093 switch (nargs)
33094 {
33095 case 1:
33096 pat = GEN_FCN (icode) (target, args[0].op);
33097 break;
33098
33099 case 2:
33100 if (tf_p)
33101 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
33102 GEN_INT ((int)sub_code));
33103 else if (! comparison_p)
33104 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
33105 else
33106 {
33107 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
33108 args[0].op,
33109 args[1].op);
33110
33111 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
33112 }
33113 break;
33114
33115 case 3:
33116 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
33117 break;
33118
33119 case 4:
33120 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33121 break;
33122
33123 default:
33124 gcc_unreachable ();
33125 }
33126
33127 if (! pat)
33128 return 0;
33129
33130 emit_insn (pat);
33131 return target;
33132 }
33133
33134 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33135 insns with vec_merge. */
33136
33137 static rtx
33138 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33139 rtx target)
33140 {
33141 rtx pat;
33142 tree arg0 = CALL_EXPR_ARG (exp, 0);
33143 rtx op1, op0 = expand_normal (arg0);
33144 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33145 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33146
33147 if (optimize || !target
33148 || GET_MODE (target) != tmode
33149 || !insn_data[icode].operand[0].predicate (target, tmode))
33150 target = gen_reg_rtx (tmode);
33151
33152 if (VECTOR_MODE_P (mode0))
33153 op0 = safe_vector_operand (op0, mode0);
33154
33155 if ((optimize && !register_operand (op0, mode0))
33156 || !insn_data[icode].operand[1].predicate (op0, mode0))
33157 op0 = copy_to_mode_reg (mode0, op0);
33158
33159 op1 = op0;
33160 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33161 op1 = copy_to_mode_reg (mode0, op1);
33162
33163 pat = GEN_FCN (icode) (target, op0, op1);
33164 if (! pat)
33165 return 0;
33166 emit_insn (pat);
33167 return target;
33168 }
33169
33170 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33171
33172 static rtx
33173 ix86_expand_sse_compare (const struct builtin_description *d,
33174 tree exp, rtx target, bool swap)
33175 {
33176 rtx pat;
33177 tree arg0 = CALL_EXPR_ARG (exp, 0);
33178 tree arg1 = CALL_EXPR_ARG (exp, 1);
33179 rtx op0 = expand_normal (arg0);
33180 rtx op1 = expand_normal (arg1);
33181 rtx op2;
33182 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33183 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33184 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33185 enum rtx_code comparison = d->comparison;
33186
33187 if (VECTOR_MODE_P (mode0))
33188 op0 = safe_vector_operand (op0, mode0);
33189 if (VECTOR_MODE_P (mode1))
33190 op1 = safe_vector_operand (op1, mode1);
33191
33192 /* Swap operands if we have a comparison that isn't available in
33193 hardware. */
33194 if (swap)
33195 {
33196 rtx tmp = gen_reg_rtx (mode1);
33197 emit_move_insn (tmp, op1);
33198 op1 = op0;
33199 op0 = tmp;
33200 }
33201
33202 if (optimize || !target
33203 || GET_MODE (target) != tmode
33204 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33205 target = gen_reg_rtx (tmode);
33206
33207 if ((optimize && !register_operand (op0, mode0))
33208 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33209 op0 = copy_to_mode_reg (mode0, op0);
33210 if ((optimize && !register_operand (op1, mode1))
33211 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33212 op1 = copy_to_mode_reg (mode1, op1);
33213
33214 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33215 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33216 if (! pat)
33217 return 0;
33218 emit_insn (pat);
33219 return target;
33220 }
33221
33222 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33223
33224 static rtx
33225 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33226 rtx target)
33227 {
33228 rtx pat;
33229 tree arg0 = CALL_EXPR_ARG (exp, 0);
33230 tree arg1 = CALL_EXPR_ARG (exp, 1);
33231 rtx op0 = expand_normal (arg0);
33232 rtx op1 = expand_normal (arg1);
33233 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33234 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33235 enum rtx_code comparison = d->comparison;
33236
33237 if (VECTOR_MODE_P (mode0))
33238 op0 = safe_vector_operand (op0, mode0);
33239 if (VECTOR_MODE_P (mode1))
33240 op1 = safe_vector_operand (op1, mode1);
33241
33242 /* Swap operands if we have a comparison that isn't available in
33243 hardware. */
33244 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33245 {
33246 rtx tmp = op1;
33247 op1 = op0;
33248 op0 = tmp;
33249 }
33250
33251 target = gen_reg_rtx (SImode);
33252 emit_move_insn (target, const0_rtx);
33253 target = gen_rtx_SUBREG (QImode, target, 0);
33254
33255 if ((optimize && !register_operand (op0, mode0))
33256 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33257 op0 = copy_to_mode_reg (mode0, op0);
33258 if ((optimize && !register_operand (op1, mode1))
33259 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33260 op1 = copy_to_mode_reg (mode1, op1);
33261
33262 pat = GEN_FCN (d->icode) (op0, op1);
33263 if (! pat)
33264 return 0;
33265 emit_insn (pat);
33266 emit_insn (gen_rtx_SET (VOIDmode,
33267 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33268 gen_rtx_fmt_ee (comparison, QImode,
33269 SET_DEST (pat),
33270 const0_rtx)));
33271
33272 return SUBREG_REG (target);
33273 }
33274
33275 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33276
33277 static rtx
33278 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33279 rtx target)
33280 {
33281 rtx pat;
33282 tree arg0 = CALL_EXPR_ARG (exp, 0);
33283 rtx op1, op0 = expand_normal (arg0);
33284 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33285 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33286
33287 if (optimize || target == 0
33288 || GET_MODE (target) != tmode
33289 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33290 target = gen_reg_rtx (tmode);
33291
33292 if (VECTOR_MODE_P (mode0))
33293 op0 = safe_vector_operand (op0, mode0);
33294
33295 if ((optimize && !register_operand (op0, mode0))
33296 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33297 op0 = copy_to_mode_reg (mode0, op0);
33298
33299 op1 = GEN_INT (d->comparison);
33300
33301 pat = GEN_FCN (d->icode) (target, op0, op1);
33302 if (! pat)
33303 return 0;
33304 emit_insn (pat);
33305 return target;
33306 }
33307
33308 static rtx
33309 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33310 tree exp, rtx target)
33311 {
33312 rtx pat;
33313 tree arg0 = CALL_EXPR_ARG (exp, 0);
33314 tree arg1 = CALL_EXPR_ARG (exp, 1);
33315 rtx op0 = expand_normal (arg0);
33316 rtx op1 = expand_normal (arg1);
33317 rtx op2;
33318 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33319 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33320 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33321
33322 if (optimize || target == 0
33323 || GET_MODE (target) != tmode
33324 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33325 target = gen_reg_rtx (tmode);
33326
33327 op0 = safe_vector_operand (op0, mode0);
33328 op1 = safe_vector_operand (op1, mode1);
33329
33330 if ((optimize && !register_operand (op0, mode0))
33331 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33332 op0 = copy_to_mode_reg (mode0, op0);
33333 if ((optimize && !register_operand (op1, mode1))
33334 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33335 op1 = copy_to_mode_reg (mode1, op1);
33336
33337 op2 = GEN_INT (d->comparison);
33338
33339 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33340 if (! pat)
33341 return 0;
33342 emit_insn (pat);
33343 return target;
33344 }
33345
33346 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33347
33348 static rtx
33349 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33350 rtx target)
33351 {
33352 rtx pat;
33353 tree arg0 = CALL_EXPR_ARG (exp, 0);
33354 tree arg1 = CALL_EXPR_ARG (exp, 1);
33355 rtx op0 = expand_normal (arg0);
33356 rtx op1 = expand_normal (arg1);
33357 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33358 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33359 enum rtx_code comparison = d->comparison;
33360
33361 if (VECTOR_MODE_P (mode0))
33362 op0 = safe_vector_operand (op0, mode0);
33363 if (VECTOR_MODE_P (mode1))
33364 op1 = safe_vector_operand (op1, mode1);
33365
33366 target = gen_reg_rtx (SImode);
33367 emit_move_insn (target, const0_rtx);
33368 target = gen_rtx_SUBREG (QImode, target, 0);
33369
33370 if ((optimize && !register_operand (op0, mode0))
33371 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33372 op0 = copy_to_mode_reg (mode0, op0);
33373 if ((optimize && !register_operand (op1, mode1))
33374 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33375 op1 = copy_to_mode_reg (mode1, op1);
33376
33377 pat = GEN_FCN (d->icode) (op0, op1);
33378 if (! pat)
33379 return 0;
33380 emit_insn (pat);
33381 emit_insn (gen_rtx_SET (VOIDmode,
33382 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33383 gen_rtx_fmt_ee (comparison, QImode,
33384 SET_DEST (pat),
33385 const0_rtx)));
33386
33387 return SUBREG_REG (target);
33388 }
33389
33390 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33391
33392 static rtx
33393 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33394 tree exp, rtx target)
33395 {
33396 rtx pat;
33397 tree arg0 = CALL_EXPR_ARG (exp, 0);
33398 tree arg1 = CALL_EXPR_ARG (exp, 1);
33399 tree arg2 = CALL_EXPR_ARG (exp, 2);
33400 tree arg3 = CALL_EXPR_ARG (exp, 3);
33401 tree arg4 = CALL_EXPR_ARG (exp, 4);
33402 rtx scratch0, scratch1;
33403 rtx op0 = expand_normal (arg0);
33404 rtx op1 = expand_normal (arg1);
33405 rtx op2 = expand_normal (arg2);
33406 rtx op3 = expand_normal (arg3);
33407 rtx op4 = expand_normal (arg4);
33408 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33409
33410 tmode0 = insn_data[d->icode].operand[0].mode;
33411 tmode1 = insn_data[d->icode].operand[1].mode;
33412 modev2 = insn_data[d->icode].operand[2].mode;
33413 modei3 = insn_data[d->icode].operand[3].mode;
33414 modev4 = insn_data[d->icode].operand[4].mode;
33415 modei5 = insn_data[d->icode].operand[5].mode;
33416 modeimm = insn_data[d->icode].operand[6].mode;
33417
33418 if (VECTOR_MODE_P (modev2))
33419 op0 = safe_vector_operand (op0, modev2);
33420 if (VECTOR_MODE_P (modev4))
33421 op2 = safe_vector_operand (op2, modev4);
33422
33423 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33424 op0 = copy_to_mode_reg (modev2, op0);
33425 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33426 op1 = copy_to_mode_reg (modei3, op1);
33427 if ((optimize && !register_operand (op2, modev4))
33428 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33429 op2 = copy_to_mode_reg (modev4, op2);
33430 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33431 op3 = copy_to_mode_reg (modei5, op3);
33432
33433 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33434 {
33435 error ("the fifth argument must be an 8-bit immediate");
33436 return const0_rtx;
33437 }
33438
33439 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33440 {
33441 if (optimize || !target
33442 || GET_MODE (target) != tmode0
33443 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33444 target = gen_reg_rtx (tmode0);
33445
33446 scratch1 = gen_reg_rtx (tmode1);
33447
33448 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33449 }
33450 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33451 {
33452 if (optimize || !target
33453 || GET_MODE (target) != tmode1
33454 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33455 target = gen_reg_rtx (tmode1);
33456
33457 scratch0 = gen_reg_rtx (tmode0);
33458
33459 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33460 }
33461 else
33462 {
33463 gcc_assert (d->flag);
33464
33465 scratch0 = gen_reg_rtx (tmode0);
33466 scratch1 = gen_reg_rtx (tmode1);
33467
33468 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33469 }
33470
33471 if (! pat)
33472 return 0;
33473
33474 emit_insn (pat);
33475
33476 if (d->flag)
33477 {
33478 target = gen_reg_rtx (SImode);
33479 emit_move_insn (target, const0_rtx);
33480 target = gen_rtx_SUBREG (QImode, target, 0);
33481
33482 emit_insn
33483 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33484 gen_rtx_fmt_ee (EQ, QImode,
33485 gen_rtx_REG ((enum machine_mode) d->flag,
33486 FLAGS_REG),
33487 const0_rtx)));
33488 return SUBREG_REG (target);
33489 }
33490 else
33491 return target;
33492 }
33493
33494
33495 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33496
33497 static rtx
33498 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33499 tree exp, rtx target)
33500 {
33501 rtx pat;
33502 tree arg0 = CALL_EXPR_ARG (exp, 0);
33503 tree arg1 = CALL_EXPR_ARG (exp, 1);
33504 tree arg2 = CALL_EXPR_ARG (exp, 2);
33505 rtx scratch0, scratch1;
33506 rtx op0 = expand_normal (arg0);
33507 rtx op1 = expand_normal (arg1);
33508 rtx op2 = expand_normal (arg2);
33509 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33510
33511 tmode0 = insn_data[d->icode].operand[0].mode;
33512 tmode1 = insn_data[d->icode].operand[1].mode;
33513 modev2 = insn_data[d->icode].operand[2].mode;
33514 modev3 = insn_data[d->icode].operand[3].mode;
33515 modeimm = insn_data[d->icode].operand[4].mode;
33516
33517 if (VECTOR_MODE_P (modev2))
33518 op0 = safe_vector_operand (op0, modev2);
33519 if (VECTOR_MODE_P (modev3))
33520 op1 = safe_vector_operand (op1, modev3);
33521
33522 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33523 op0 = copy_to_mode_reg (modev2, op0);
33524 if ((optimize && !register_operand (op1, modev3))
33525 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33526 op1 = copy_to_mode_reg (modev3, op1);
33527
33528 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33529 {
33530 error ("the third argument must be an 8-bit immediate");
33531 return const0_rtx;
33532 }
33533
33534 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33535 {
33536 if (optimize || !target
33537 || GET_MODE (target) != tmode0
33538 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33539 target = gen_reg_rtx (tmode0);
33540
33541 scratch1 = gen_reg_rtx (tmode1);
33542
33543 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33544 }
33545 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33546 {
33547 if (optimize || !target
33548 || GET_MODE (target) != tmode1
33549 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33550 target = gen_reg_rtx (tmode1);
33551
33552 scratch0 = gen_reg_rtx (tmode0);
33553
33554 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33555 }
33556 else
33557 {
33558 gcc_assert (d->flag);
33559
33560 scratch0 = gen_reg_rtx (tmode0);
33561 scratch1 = gen_reg_rtx (tmode1);
33562
33563 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33564 }
33565
33566 if (! pat)
33567 return 0;
33568
33569 emit_insn (pat);
33570
33571 if (d->flag)
33572 {
33573 target = gen_reg_rtx (SImode);
33574 emit_move_insn (target, const0_rtx);
33575 target = gen_rtx_SUBREG (QImode, target, 0);
33576
33577 emit_insn
33578 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33579 gen_rtx_fmt_ee (EQ, QImode,
33580 gen_rtx_REG ((enum machine_mode) d->flag,
33581 FLAGS_REG),
33582 const0_rtx)));
33583 return SUBREG_REG (target);
33584 }
33585 else
33586 return target;
33587 }
33588
33589 /* Subroutine of ix86_expand_builtin to take care of insns with
33590 variable number of operands. */
33591
33592 static rtx
33593 ix86_expand_args_builtin (const struct builtin_description *d,
33594 tree exp, rtx target)
33595 {
33596 rtx pat, real_target;
33597 unsigned int i, nargs;
33598 unsigned int nargs_constant = 0;
33599 unsigned int mask_pos = 0;
33600 int num_memory = 0;
33601 struct
33602 {
33603 rtx op;
33604 enum machine_mode mode;
33605 } args[6];
33606 bool last_arg_count = false;
33607 enum insn_code icode = d->icode;
33608 const struct insn_data_d *insn_p = &insn_data[icode];
33609 enum machine_mode tmode = insn_p->operand[0].mode;
33610 enum machine_mode rmode = VOIDmode;
33611 bool swap = false;
33612 enum rtx_code comparison = d->comparison;
33613
33614 switch ((enum ix86_builtin_func_type) d->flag)
33615 {
33616 case V2DF_FTYPE_V2DF_ROUND:
33617 case V4DF_FTYPE_V4DF_ROUND:
33618 case V4SF_FTYPE_V4SF_ROUND:
33619 case V8SF_FTYPE_V8SF_ROUND:
33620 case V4SI_FTYPE_V4SF_ROUND:
33621 case V8SI_FTYPE_V8SF_ROUND:
33622 return ix86_expand_sse_round (d, exp, target);
33623 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33624 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33625 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33626 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33627 case INT_FTYPE_V8SF_V8SF_PTEST:
33628 case INT_FTYPE_V4DI_V4DI_PTEST:
33629 case INT_FTYPE_V4DF_V4DF_PTEST:
33630 case INT_FTYPE_V4SF_V4SF_PTEST:
33631 case INT_FTYPE_V2DI_V2DI_PTEST:
33632 case INT_FTYPE_V2DF_V2DF_PTEST:
33633 return ix86_expand_sse_ptest (d, exp, target);
33634 case FLOAT128_FTYPE_FLOAT128:
33635 case FLOAT_FTYPE_FLOAT:
33636 case INT_FTYPE_INT:
33637 case UINT64_FTYPE_INT:
33638 case UINT16_FTYPE_UINT16:
33639 case INT64_FTYPE_INT64:
33640 case INT64_FTYPE_V4SF:
33641 case INT64_FTYPE_V2DF:
33642 case INT_FTYPE_V16QI:
33643 case INT_FTYPE_V8QI:
33644 case INT_FTYPE_V8SF:
33645 case INT_FTYPE_V4DF:
33646 case INT_FTYPE_V4SF:
33647 case INT_FTYPE_V2DF:
33648 case INT_FTYPE_V32QI:
33649 case V16QI_FTYPE_V16QI:
33650 case V8SI_FTYPE_V8SF:
33651 case V8SI_FTYPE_V4SI:
33652 case V8HI_FTYPE_V8HI:
33653 case V8HI_FTYPE_V16QI:
33654 case V8QI_FTYPE_V8QI:
33655 case V8SF_FTYPE_V8SF:
33656 case V8SF_FTYPE_V8SI:
33657 case V8SF_FTYPE_V4SF:
33658 case V8SF_FTYPE_V8HI:
33659 case V4SI_FTYPE_V4SI:
33660 case V4SI_FTYPE_V16QI:
33661 case V4SI_FTYPE_V4SF:
33662 case V4SI_FTYPE_V8SI:
33663 case V4SI_FTYPE_V8HI:
33664 case V4SI_FTYPE_V4DF:
33665 case V4SI_FTYPE_V2DF:
33666 case V4HI_FTYPE_V4HI:
33667 case V4DF_FTYPE_V4DF:
33668 case V4DF_FTYPE_V4SI:
33669 case V4DF_FTYPE_V4SF:
33670 case V4DF_FTYPE_V2DF:
33671 case V4SF_FTYPE_V4SF:
33672 case V4SF_FTYPE_V4SI:
33673 case V4SF_FTYPE_V8SF:
33674 case V4SF_FTYPE_V4DF:
33675 case V4SF_FTYPE_V8HI:
33676 case V4SF_FTYPE_V2DF:
33677 case V2DI_FTYPE_V2DI:
33678 case V2DI_FTYPE_V16QI:
33679 case V2DI_FTYPE_V8HI:
33680 case V2DI_FTYPE_V4SI:
33681 case V2DF_FTYPE_V2DF:
33682 case V2DF_FTYPE_V4SI:
33683 case V2DF_FTYPE_V4DF:
33684 case V2DF_FTYPE_V4SF:
33685 case V2DF_FTYPE_V2SI:
33686 case V2SI_FTYPE_V2SI:
33687 case V2SI_FTYPE_V4SF:
33688 case V2SI_FTYPE_V2SF:
33689 case V2SI_FTYPE_V2DF:
33690 case V2SF_FTYPE_V2SF:
33691 case V2SF_FTYPE_V2SI:
33692 case V32QI_FTYPE_V32QI:
33693 case V32QI_FTYPE_V16QI:
33694 case V16HI_FTYPE_V16HI:
33695 case V16HI_FTYPE_V8HI:
33696 case V8SI_FTYPE_V8SI:
33697 case V16HI_FTYPE_V16QI:
33698 case V8SI_FTYPE_V16QI:
33699 case V4DI_FTYPE_V16QI:
33700 case V8SI_FTYPE_V8HI:
33701 case V4DI_FTYPE_V8HI:
33702 case V4DI_FTYPE_V4SI:
33703 case V4DI_FTYPE_V2DI:
33704 case HI_FTYPE_HI:
33705 case UINT_FTYPE_V2DF:
33706 case UINT_FTYPE_V4SF:
33707 case UINT64_FTYPE_V2DF:
33708 case UINT64_FTYPE_V4SF:
33709 case V16QI_FTYPE_V8DI:
33710 case V16HI_FTYPE_V16SI:
33711 case V16SI_FTYPE_HI:
33712 case V16SI_FTYPE_V16SI:
33713 case V16SI_FTYPE_INT:
33714 case V16SF_FTYPE_FLOAT:
33715 case V16SF_FTYPE_V8SF:
33716 case V16SI_FTYPE_V8SI:
33717 case V16SF_FTYPE_V4SF:
33718 case V16SI_FTYPE_V4SI:
33719 case V16SF_FTYPE_V16SF:
33720 case V8HI_FTYPE_V8DI:
33721 case V8UHI_FTYPE_V8UHI:
33722 case V8SI_FTYPE_V8DI:
33723 case V8USI_FTYPE_V8USI:
33724 case V8SF_FTYPE_V8DF:
33725 case V8DI_FTYPE_QI:
33726 case V8DI_FTYPE_INT64:
33727 case V8DI_FTYPE_V4DI:
33728 case V8DI_FTYPE_V8DI:
33729 case V8DF_FTYPE_DOUBLE:
33730 case V8DF_FTYPE_V4DF:
33731 case V8DF_FTYPE_V2DF:
33732 case V8DF_FTYPE_V8DF:
33733 case V8DF_FTYPE_V8SI:
33734 nargs = 1;
33735 break;
33736 case V4SF_FTYPE_V4SF_VEC_MERGE:
33737 case V2DF_FTYPE_V2DF_VEC_MERGE:
33738 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33739 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33740 case V16QI_FTYPE_V16QI_V16QI:
33741 case V16QI_FTYPE_V8HI_V8HI:
33742 case V16SI_FTYPE_V16SI_V16SI:
33743 case V16SF_FTYPE_V16SF_V16SF:
33744 case V16SF_FTYPE_V16SF_V16SI:
33745 case V8QI_FTYPE_V8QI_V8QI:
33746 case V8QI_FTYPE_V4HI_V4HI:
33747 case V8HI_FTYPE_V8HI_V8HI:
33748 case V8HI_FTYPE_V16QI_V16QI:
33749 case V8HI_FTYPE_V4SI_V4SI:
33750 case V8SF_FTYPE_V8SF_V8SF:
33751 case V8SF_FTYPE_V8SF_V8SI:
33752 case V8DI_FTYPE_V8DI_V8DI:
33753 case V8DF_FTYPE_V8DF_V8DF:
33754 case V8DF_FTYPE_V8DF_V8DI:
33755 case V4SI_FTYPE_V4SI_V4SI:
33756 case V4SI_FTYPE_V8HI_V8HI:
33757 case V4SI_FTYPE_V4SF_V4SF:
33758 case V4SI_FTYPE_V2DF_V2DF:
33759 case V4HI_FTYPE_V4HI_V4HI:
33760 case V4HI_FTYPE_V8QI_V8QI:
33761 case V4HI_FTYPE_V2SI_V2SI:
33762 case V4DF_FTYPE_V4DF_V4DF:
33763 case V4DF_FTYPE_V4DF_V4DI:
33764 case V4SF_FTYPE_V4SF_V4SF:
33765 case V4SF_FTYPE_V4SF_V4SI:
33766 case V4SF_FTYPE_V4SF_V2SI:
33767 case V4SF_FTYPE_V4SF_V2DF:
33768 case V4SF_FTYPE_V4SF_UINT:
33769 case V4SF_FTYPE_V4SF_UINT64:
33770 case V4SF_FTYPE_V4SF_DI:
33771 case V4SF_FTYPE_V4SF_SI:
33772 case V2DI_FTYPE_V2DI_V2DI:
33773 case V2DI_FTYPE_V16QI_V16QI:
33774 case V2DI_FTYPE_V4SI_V4SI:
33775 case V2UDI_FTYPE_V4USI_V4USI:
33776 case V2DI_FTYPE_V2DI_V16QI:
33777 case V2DI_FTYPE_V2DF_V2DF:
33778 case V2SI_FTYPE_V2SI_V2SI:
33779 case V2SI_FTYPE_V4HI_V4HI:
33780 case V2SI_FTYPE_V2SF_V2SF:
33781 case V2DF_FTYPE_V2DF_V2DF:
33782 case V2DF_FTYPE_V2DF_V4SF:
33783 case V2DF_FTYPE_V2DF_V2DI:
33784 case V2DF_FTYPE_V2DF_DI:
33785 case V2DF_FTYPE_V2DF_SI:
33786 case V2DF_FTYPE_V2DF_UINT:
33787 case V2DF_FTYPE_V2DF_UINT64:
33788 case V2SF_FTYPE_V2SF_V2SF:
33789 case V1DI_FTYPE_V1DI_V1DI:
33790 case V1DI_FTYPE_V8QI_V8QI:
33791 case V1DI_FTYPE_V2SI_V2SI:
33792 case V32QI_FTYPE_V16HI_V16HI:
33793 case V16HI_FTYPE_V8SI_V8SI:
33794 case V32QI_FTYPE_V32QI_V32QI:
33795 case V16HI_FTYPE_V32QI_V32QI:
33796 case V16HI_FTYPE_V16HI_V16HI:
33797 case V8SI_FTYPE_V4DF_V4DF:
33798 case V8SI_FTYPE_V8SI_V8SI:
33799 case V8SI_FTYPE_V16HI_V16HI:
33800 case V4DI_FTYPE_V4DI_V4DI:
33801 case V4DI_FTYPE_V8SI_V8SI:
33802 case V4UDI_FTYPE_V8USI_V8USI:
33803 case QI_FTYPE_V8DI_V8DI:
33804 case HI_FTYPE_V16SI_V16SI:
33805 if (comparison == UNKNOWN)
33806 return ix86_expand_binop_builtin (icode, exp, target);
33807 nargs = 2;
33808 break;
33809 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33810 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33811 gcc_assert (comparison != UNKNOWN);
33812 nargs = 2;
33813 swap = true;
33814 break;
33815 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33816 case V16HI_FTYPE_V16HI_SI_COUNT:
33817 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33818 case V8SI_FTYPE_V8SI_SI_COUNT:
33819 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33820 case V4DI_FTYPE_V4DI_INT_COUNT:
33821 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33822 case V8HI_FTYPE_V8HI_SI_COUNT:
33823 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33824 case V4SI_FTYPE_V4SI_SI_COUNT:
33825 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33826 case V4HI_FTYPE_V4HI_SI_COUNT:
33827 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33828 case V2DI_FTYPE_V2DI_SI_COUNT:
33829 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33830 case V2SI_FTYPE_V2SI_SI_COUNT:
33831 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33832 case V1DI_FTYPE_V1DI_SI_COUNT:
33833 nargs = 2;
33834 last_arg_count = true;
33835 break;
33836 case UINT64_FTYPE_UINT64_UINT64:
33837 case UINT_FTYPE_UINT_UINT:
33838 case UINT_FTYPE_UINT_USHORT:
33839 case UINT_FTYPE_UINT_UCHAR:
33840 case UINT16_FTYPE_UINT16_INT:
33841 case UINT8_FTYPE_UINT8_INT:
33842 case HI_FTYPE_HI_HI:
33843 case V16SI_FTYPE_V8DF_V8DF:
33844 nargs = 2;
33845 break;
33846 case V2DI_FTYPE_V2DI_INT_CONVERT:
33847 nargs = 2;
33848 rmode = V1TImode;
33849 nargs_constant = 1;
33850 break;
33851 case V4DI_FTYPE_V4DI_INT_CONVERT:
33852 nargs = 2;
33853 rmode = V2TImode;
33854 nargs_constant = 1;
33855 break;
33856 case V8HI_FTYPE_V8HI_INT:
33857 case V8HI_FTYPE_V8SF_INT:
33858 case V16HI_FTYPE_V16SF_INT:
33859 case V8HI_FTYPE_V4SF_INT:
33860 case V8SF_FTYPE_V8SF_INT:
33861 case V4SF_FTYPE_V16SF_INT:
33862 case V16SF_FTYPE_V16SF_INT:
33863 case V4SI_FTYPE_V4SI_INT:
33864 case V4SI_FTYPE_V8SI_INT:
33865 case V4HI_FTYPE_V4HI_INT:
33866 case V4DF_FTYPE_V4DF_INT:
33867 case V4DF_FTYPE_V8DF_INT:
33868 case V4SF_FTYPE_V4SF_INT:
33869 case V4SF_FTYPE_V8SF_INT:
33870 case V2DI_FTYPE_V2DI_INT:
33871 case V2DF_FTYPE_V2DF_INT:
33872 case V2DF_FTYPE_V4DF_INT:
33873 case V16HI_FTYPE_V16HI_INT:
33874 case V8SI_FTYPE_V8SI_INT:
33875 case V16SI_FTYPE_V16SI_INT:
33876 case V4SI_FTYPE_V16SI_INT:
33877 case V4DI_FTYPE_V4DI_INT:
33878 case V2DI_FTYPE_V4DI_INT:
33879 case V4DI_FTYPE_V8DI_INT:
33880 case HI_FTYPE_HI_INT:
33881 nargs = 2;
33882 nargs_constant = 1;
33883 break;
33884 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33885 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33886 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33887 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33888 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33889 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33890 case HI_FTYPE_V16SI_V16SI_HI:
33891 case QI_FTYPE_V8DI_V8DI_QI:
33892 case V16HI_FTYPE_V16SI_V16HI_HI:
33893 case V16QI_FTYPE_V16SI_V16QI_HI:
33894 case V16QI_FTYPE_V8DI_V16QI_QI:
33895 case V16SF_FTYPE_V16SF_V16SF_HI:
33896 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33897 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33898 case V16SF_FTYPE_V16SI_V16SF_HI:
33899 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33900 case V16SF_FTYPE_V4SF_V16SF_HI:
33901 case V16SI_FTYPE_SI_V16SI_HI:
33902 case V16SI_FTYPE_V16HI_V16SI_HI:
33903 case V16SI_FTYPE_V16QI_V16SI_HI:
33904 case V16SI_FTYPE_V16SF_V16SI_HI:
33905 case V16SI_FTYPE_V16SI_V16SI_HI:
33906 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33907 case V16SI_FTYPE_V4SI_V16SI_HI:
33908 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33909 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33910 case V8DF_FTYPE_V2DF_V8DF_QI:
33911 case V8DF_FTYPE_V4DF_V8DF_QI:
33912 case V8DF_FTYPE_V8DF_V8DF_QI:
33913 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33914 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33915 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33916 case V8DF_FTYPE_V8SF_V8DF_QI:
33917 case V8DF_FTYPE_V8SI_V8DF_QI:
33918 case V8DI_FTYPE_DI_V8DI_QI:
33919 case V8DI_FTYPE_V16QI_V8DI_QI:
33920 case V8DI_FTYPE_V2DI_V8DI_QI:
33921 case V8DI_FTYPE_V4DI_V8DI_QI:
33922 case V8DI_FTYPE_V8DI_V8DI_QI:
33923 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33924 case V8DI_FTYPE_V8HI_V8DI_QI:
33925 case V8DI_FTYPE_V8SI_V8DI_QI:
33926 case V8HI_FTYPE_V8DI_V8HI_QI:
33927 case V8SF_FTYPE_V8DF_V8SF_QI:
33928 case V8SI_FTYPE_V8DF_V8SI_QI:
33929 case V8SI_FTYPE_V8DI_V8SI_QI:
33930 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33931 nargs = 3;
33932 break;
33933 case V32QI_FTYPE_V32QI_V32QI_INT:
33934 case V16HI_FTYPE_V16HI_V16HI_INT:
33935 case V16QI_FTYPE_V16QI_V16QI_INT:
33936 case V4DI_FTYPE_V4DI_V4DI_INT:
33937 case V8HI_FTYPE_V8HI_V8HI_INT:
33938 case V8SI_FTYPE_V8SI_V8SI_INT:
33939 case V8SI_FTYPE_V8SI_V4SI_INT:
33940 case V8SF_FTYPE_V8SF_V8SF_INT:
33941 case V8SF_FTYPE_V8SF_V4SF_INT:
33942 case V4SI_FTYPE_V4SI_V4SI_INT:
33943 case V4DF_FTYPE_V4DF_V4DF_INT:
33944 case V16SF_FTYPE_V16SF_V16SF_INT:
33945 case V16SF_FTYPE_V16SF_V4SF_INT:
33946 case V16SI_FTYPE_V16SI_V4SI_INT:
33947 case V4DF_FTYPE_V4DF_V2DF_INT:
33948 case V4SF_FTYPE_V4SF_V4SF_INT:
33949 case V2DI_FTYPE_V2DI_V2DI_INT:
33950 case V4DI_FTYPE_V4DI_V2DI_INT:
33951 case V2DF_FTYPE_V2DF_V2DF_INT:
33952 case QI_FTYPE_V8DI_V8DI_INT:
33953 case QI_FTYPE_V8DF_V8DF_INT:
33954 case QI_FTYPE_V2DF_V2DF_INT:
33955 case QI_FTYPE_V4SF_V4SF_INT:
33956 case HI_FTYPE_V16SI_V16SI_INT:
33957 case HI_FTYPE_V16SF_V16SF_INT:
33958 nargs = 3;
33959 nargs_constant = 1;
33960 break;
33961 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33962 nargs = 3;
33963 rmode = V4DImode;
33964 nargs_constant = 1;
33965 break;
33966 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33967 nargs = 3;
33968 rmode = V2DImode;
33969 nargs_constant = 1;
33970 break;
33971 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33972 nargs = 3;
33973 rmode = DImode;
33974 nargs_constant = 1;
33975 break;
33976 case V2DI_FTYPE_V2DI_UINT_UINT:
33977 nargs = 3;
33978 nargs_constant = 2;
33979 break;
33980 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33981 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33982 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33983 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33984 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33985 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33986 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33987 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33988 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33989 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33990 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33991 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33992 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33993 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33994 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33995 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33996 nargs = 4;
33997 break;
33998 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33999 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
34000 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
34001 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
34002 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
34003 nargs = 4;
34004 nargs_constant = 1;
34005 break;
34006 case QI_FTYPE_V2DF_V2DF_INT_QI:
34007 case QI_FTYPE_V4SF_V4SF_INT_QI:
34008 nargs = 4;
34009 mask_pos = 1;
34010 nargs_constant = 1;
34011 break;
34012 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
34013 nargs = 4;
34014 nargs_constant = 2;
34015 break;
34016 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
34017 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
34018 nargs = 4;
34019 break;
34020 case QI_FTYPE_V8DI_V8DI_INT_QI:
34021 case HI_FTYPE_V16SI_V16SI_INT_HI:
34022 case QI_FTYPE_V8DF_V8DF_INT_QI:
34023 case HI_FTYPE_V16SF_V16SF_INT_HI:
34024 mask_pos = 1;
34025 nargs = 4;
34026 nargs_constant = 1;
34027 break;
34028 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
34029 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
34030 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
34031 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
34032 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
34033 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
34034 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
34035 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
34036 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
34037 nargs = 4;
34038 mask_pos = 2;
34039 nargs_constant = 1;
34040 break;
34041 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
34042 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
34043 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
34044 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
34045 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
34046 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
34047 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
34048 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
34049 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
34050 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
34051 nargs = 5;
34052 mask_pos = 2;
34053 nargs_constant = 1;
34054 break;
34055 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
34056 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
34057 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
34058 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
34059 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
34060 nargs = 5;
34061 mask_pos = 1;
34062 nargs_constant = 1;
34063 break;
34064
34065 default:
34066 gcc_unreachable ();
34067 }
34068
34069 gcc_assert (nargs <= ARRAY_SIZE (args));
34070
34071 if (comparison != UNKNOWN)
34072 {
34073 gcc_assert (nargs == 2);
34074 return ix86_expand_sse_compare (d, exp, target, swap);
34075 }
34076
34077 if (rmode == VOIDmode || rmode == tmode)
34078 {
34079 if (optimize
34080 || target == 0
34081 || GET_MODE (target) != tmode
34082 || !insn_p->operand[0].predicate (target, tmode))
34083 target = gen_reg_rtx (tmode);
34084 real_target = target;
34085 }
34086 else
34087 {
34088 real_target = gen_reg_rtx (tmode);
34089 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
34090 }
34091
34092 for (i = 0; i < nargs; i++)
34093 {
34094 tree arg = CALL_EXPR_ARG (exp, i);
34095 rtx op = expand_normal (arg);
34096 enum machine_mode mode = insn_p->operand[i + 1].mode;
34097 bool match = insn_p->operand[i + 1].predicate (op, mode);
34098
34099 if (last_arg_count && (i + 1) == nargs)
34100 {
34101 /* SIMD shift insns take either an 8-bit immediate or
34102 register as count. But builtin functions take int as
34103 count. If count doesn't match, we put it in register. */
34104 if (!match)
34105 {
34106 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
34107 if (!insn_p->operand[i + 1].predicate (op, mode))
34108 op = copy_to_reg (op);
34109 }
34110 }
34111 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34112 (!mask_pos && (nargs - i) <= nargs_constant))
34113 {
34114 if (!match)
34115 switch (icode)
34116 {
34117 case CODE_FOR_avx_vinsertf128v4di:
34118 case CODE_FOR_avx_vextractf128v4di:
34119 error ("the last argument must be an 1-bit immediate");
34120 return const0_rtx;
34121
34122 case CODE_FOR_avx512f_cmpv8di3_mask:
34123 case CODE_FOR_avx512f_cmpv16si3_mask:
34124 case CODE_FOR_avx512f_ucmpv8di3_mask:
34125 case CODE_FOR_avx512f_ucmpv16si3_mask:
34126 case CODE_FOR_avx512vl_cmpv4di3_mask:
34127 case CODE_FOR_avx512vl_cmpv8si3_mask:
34128 case CODE_FOR_avx512vl_ucmpv4di3_mask:
34129 case CODE_FOR_avx512vl_ucmpv8si3_mask:
34130 case CODE_FOR_avx512vl_cmpv2di3_mask:
34131 case CODE_FOR_avx512vl_cmpv4si3_mask:
34132 case CODE_FOR_avx512vl_ucmpv2di3_mask:
34133 case CODE_FOR_avx512vl_ucmpv4si3_mask:
34134 error ("the last argument must be a 3-bit immediate");
34135 return const0_rtx;
34136
34137 case CODE_FOR_sse4_1_roundsd:
34138 case CODE_FOR_sse4_1_roundss:
34139
34140 case CODE_FOR_sse4_1_roundpd:
34141 case CODE_FOR_sse4_1_roundps:
34142 case CODE_FOR_avx_roundpd256:
34143 case CODE_FOR_avx_roundps256:
34144
34145 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34146 case CODE_FOR_sse4_1_roundps_sfix:
34147 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34148 case CODE_FOR_avx_roundps_sfix256:
34149
34150 case CODE_FOR_sse4_1_blendps:
34151 case CODE_FOR_avx_blendpd256:
34152 case CODE_FOR_avx_vpermilv4df:
34153 case CODE_FOR_avx512f_getmantv8df_mask:
34154 case CODE_FOR_avx512f_getmantv16sf_mask:
34155 case CODE_FOR_avx512vl_getmantv8sf_mask:
34156 case CODE_FOR_avx512vl_getmantv4df_mask:
34157 case CODE_FOR_avx512vl_getmantv4sf_mask:
34158 case CODE_FOR_avx512vl_getmantv2df_mask:
34159 case CODE_FOR_avx512dq_rangepv8df_mask_round:
34160 case CODE_FOR_avx512dq_rangepv16sf_mask_round:
34161 case CODE_FOR_avx512dq_rangepv4df_mask:
34162 case CODE_FOR_avx512dq_rangepv8sf_mask:
34163 case CODE_FOR_avx512dq_rangepv2df_mask:
34164 case CODE_FOR_avx512dq_rangepv4sf_mask:
34165 error ("the last argument must be a 4-bit immediate");
34166 return const0_rtx;
34167
34168 case CODE_FOR_sha1rnds4:
34169 case CODE_FOR_sse4_1_blendpd:
34170 case CODE_FOR_avx_vpermilv2df:
34171 case CODE_FOR_xop_vpermil2v2df3:
34172 case CODE_FOR_xop_vpermil2v4sf3:
34173 case CODE_FOR_xop_vpermil2v4df3:
34174 case CODE_FOR_xop_vpermil2v8sf3:
34175 case CODE_FOR_avx512f_vinsertf32x4_mask:
34176 case CODE_FOR_avx512f_vinserti32x4_mask:
34177 case CODE_FOR_avx512f_vextractf32x4_mask:
34178 case CODE_FOR_avx512f_vextracti32x4_mask:
34179 case CODE_FOR_sse2_shufpd:
34180 case CODE_FOR_sse2_shufpd_mask:
34181 case CODE_FOR_avx512dq_shuf_f64x2_mask:
34182 case CODE_FOR_avx512dq_shuf_i64x2_mask:
34183 case CODE_FOR_avx512vl_shuf_i32x4_mask:
34184 case CODE_FOR_avx512vl_shuf_f32x4_mask:
34185 error ("the last argument must be a 2-bit immediate");
34186 return const0_rtx;
34187
34188 case CODE_FOR_avx_vextractf128v4df:
34189 case CODE_FOR_avx_vextractf128v8sf:
34190 case CODE_FOR_avx_vextractf128v8si:
34191 case CODE_FOR_avx_vinsertf128v4df:
34192 case CODE_FOR_avx_vinsertf128v8sf:
34193 case CODE_FOR_avx_vinsertf128v8si:
34194 case CODE_FOR_avx512f_vinsertf64x4_mask:
34195 case CODE_FOR_avx512f_vinserti64x4_mask:
34196 case CODE_FOR_avx512f_vextractf64x4_mask:
34197 case CODE_FOR_avx512f_vextracti64x4_mask:
34198 case CODE_FOR_avx512dq_vinsertf32x8_mask:
34199 case CODE_FOR_avx512dq_vinserti32x8_mask:
34200 case CODE_FOR_avx512vl_vinsertv4df:
34201 case CODE_FOR_avx512vl_vinsertv4di:
34202 case CODE_FOR_avx512vl_vinsertv8sf:
34203 case CODE_FOR_avx512vl_vinsertv8si:
34204 error ("the last argument must be a 1-bit immediate");
34205 return const0_rtx;
34206
34207 case CODE_FOR_avx_vmcmpv2df3:
34208 case CODE_FOR_avx_vmcmpv4sf3:
34209 case CODE_FOR_avx_cmpv2df3:
34210 case CODE_FOR_avx_cmpv4sf3:
34211 case CODE_FOR_avx_cmpv4df3:
34212 case CODE_FOR_avx_cmpv8sf3:
34213 case CODE_FOR_avx512f_cmpv8df3_mask:
34214 case CODE_FOR_avx512f_cmpv16sf3_mask:
34215 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34216 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34217 error ("the last argument must be a 5-bit immediate");
34218 return const0_rtx;
34219
34220 default:
34221 switch (nargs_constant)
34222 {
34223 case 2:
34224 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34225 (!mask_pos && (nargs - i) == nargs_constant))
34226 {
34227 error ("the next to last argument must be an 8-bit immediate");
34228 break;
34229 }
34230 case 1:
34231 error ("the last argument must be an 8-bit immediate");
34232 break;
34233 default:
34234 gcc_unreachable ();
34235 }
34236 return const0_rtx;
34237 }
34238 }
34239 else
34240 {
34241 if (VECTOR_MODE_P (mode))
34242 op = safe_vector_operand (op, mode);
34243
34244 /* If we aren't optimizing, only allow one memory operand to
34245 be generated. */
34246 if (memory_operand (op, mode))
34247 num_memory++;
34248
34249 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34250 {
34251 if (optimize || !match || num_memory > 1)
34252 op = copy_to_mode_reg (mode, op);
34253 }
34254 else
34255 {
34256 op = copy_to_reg (op);
34257 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34258 }
34259 }
34260
34261 args[i].op = op;
34262 args[i].mode = mode;
34263 }
34264
34265 switch (nargs)
34266 {
34267 case 1:
34268 pat = GEN_FCN (icode) (real_target, args[0].op);
34269 break;
34270 case 2:
34271 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34272 break;
34273 case 3:
34274 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34275 args[2].op);
34276 break;
34277 case 4:
34278 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34279 args[2].op, args[3].op);
34280 break;
34281 case 5:
34282 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34283 args[2].op, args[3].op, args[4].op);
34284 case 6:
34285 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34286 args[2].op, args[3].op, args[4].op,
34287 args[5].op);
34288 break;
34289 default:
34290 gcc_unreachable ();
34291 }
34292
34293 if (! pat)
34294 return 0;
34295
34296 emit_insn (pat);
34297 return target;
34298 }
34299
34300 /* Transform pattern of following layout:
34301 (parallel [
34302 set (A B)
34303 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34304 ])
34305 into:
34306 (set (A B))
34307
34308 Or:
34309 (parallel [ A B
34310 ...
34311 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34312 ...
34313 ])
34314 into:
34315 (parallel [ A B ... ]) */
34316
34317 static rtx
34318 ix86_erase_embedded_rounding (rtx pat)
34319 {
34320 if (GET_CODE (pat) == INSN)
34321 pat = PATTERN (pat);
34322
34323 gcc_assert (GET_CODE (pat) == PARALLEL);
34324
34325 if (XVECLEN (pat, 0) == 2)
34326 {
34327 rtx p0 = XVECEXP (pat, 0, 0);
34328 rtx p1 = XVECEXP (pat, 0, 1);
34329
34330 gcc_assert (GET_CODE (p0) == SET
34331 && GET_CODE (p1) == UNSPEC
34332 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34333
34334 return p0;
34335 }
34336 else
34337 {
34338 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34339 int i = 0;
34340 int j = 0;
34341
34342 for (; i < XVECLEN (pat, 0); ++i)
34343 {
34344 rtx elem = XVECEXP (pat, 0, i);
34345 if (GET_CODE (elem) != UNSPEC
34346 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34347 res [j++] = elem;
34348 }
34349
34350 /* No more than 1 occurence was removed. */
34351 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34352
34353 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34354 }
34355 }
34356
34357 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34358 with rounding. */
34359 static rtx
34360 ix86_expand_sse_comi_round (const struct builtin_description *d,
34361 tree exp, rtx target)
34362 {
34363 rtx pat, set_dst;
34364 tree arg0 = CALL_EXPR_ARG (exp, 0);
34365 tree arg1 = CALL_EXPR_ARG (exp, 1);
34366 tree arg2 = CALL_EXPR_ARG (exp, 2);
34367 tree arg3 = CALL_EXPR_ARG (exp, 3);
34368 rtx op0 = expand_normal (arg0);
34369 rtx op1 = expand_normal (arg1);
34370 rtx op2 = expand_normal (arg2);
34371 rtx op3 = expand_normal (arg3);
34372 enum insn_code icode = d->icode;
34373 const struct insn_data_d *insn_p = &insn_data[icode];
34374 enum machine_mode mode0 = insn_p->operand[0].mode;
34375 enum machine_mode mode1 = insn_p->operand[1].mode;
34376 enum rtx_code comparison = UNEQ;
34377 bool need_ucomi = false;
34378
34379 /* See avxintrin.h for values. */
34380 enum rtx_code comi_comparisons[32] =
34381 {
34382 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34383 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34384 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34385 };
34386 bool need_ucomi_values[32] =
34387 {
34388 true, false, false, true, true, false, false, true,
34389 true, false, false, true, true, false, false, true,
34390 false, true, true, false, false, true, true, false,
34391 false, true, true, false, false, true, true, false
34392 };
34393
34394 if (!CONST_INT_P (op2))
34395 {
34396 error ("the third argument must be comparison constant");
34397 return const0_rtx;
34398 }
34399 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34400 {
34401 error ("incorect comparison mode");
34402 return const0_rtx;
34403 }
34404
34405 if (!insn_p->operand[2].predicate (op3, SImode))
34406 {
34407 error ("incorrect rounding operand");
34408 return const0_rtx;
34409 }
34410
34411 comparison = comi_comparisons[INTVAL (op2)];
34412 need_ucomi = need_ucomi_values[INTVAL (op2)];
34413
34414 if (VECTOR_MODE_P (mode0))
34415 op0 = safe_vector_operand (op0, mode0);
34416 if (VECTOR_MODE_P (mode1))
34417 op1 = safe_vector_operand (op1, mode1);
34418
34419 target = gen_reg_rtx (SImode);
34420 emit_move_insn (target, const0_rtx);
34421 target = gen_rtx_SUBREG (QImode, target, 0);
34422
34423 if ((optimize && !register_operand (op0, mode0))
34424 || !insn_p->operand[0].predicate (op0, mode0))
34425 op0 = copy_to_mode_reg (mode0, op0);
34426 if ((optimize && !register_operand (op1, mode1))
34427 || !insn_p->operand[1].predicate (op1, mode1))
34428 op1 = copy_to_mode_reg (mode1, op1);
34429
34430 if (need_ucomi)
34431 icode = icode == CODE_FOR_sse_comi_round
34432 ? CODE_FOR_sse_ucomi_round
34433 : CODE_FOR_sse2_ucomi_round;
34434
34435 pat = GEN_FCN (icode) (op0, op1, op3);
34436 if (! pat)
34437 return 0;
34438
34439 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34440 if (INTVAL (op3) == NO_ROUND)
34441 {
34442 pat = ix86_erase_embedded_rounding (pat);
34443 if (! pat)
34444 return 0;
34445
34446 set_dst = SET_DEST (pat);
34447 }
34448 else
34449 {
34450 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34451 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34452 }
34453
34454 emit_insn (pat);
34455 emit_insn (gen_rtx_SET (VOIDmode,
34456 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34457 gen_rtx_fmt_ee (comparison, QImode,
34458 set_dst,
34459 const0_rtx)));
34460
34461 return SUBREG_REG (target);
34462 }
34463
34464 static rtx
34465 ix86_expand_round_builtin (const struct builtin_description *d,
34466 tree exp, rtx target)
34467 {
34468 rtx pat;
34469 unsigned int i, nargs;
34470 struct
34471 {
34472 rtx op;
34473 enum machine_mode mode;
34474 } args[6];
34475 enum insn_code icode = d->icode;
34476 const struct insn_data_d *insn_p = &insn_data[icode];
34477 enum machine_mode tmode = insn_p->operand[0].mode;
34478 unsigned int nargs_constant = 0;
34479 unsigned int redundant_embed_rnd = 0;
34480
34481 switch ((enum ix86_builtin_func_type) d->flag)
34482 {
34483 case UINT64_FTYPE_V2DF_INT:
34484 case UINT64_FTYPE_V4SF_INT:
34485 case UINT_FTYPE_V2DF_INT:
34486 case UINT_FTYPE_V4SF_INT:
34487 case INT64_FTYPE_V2DF_INT:
34488 case INT64_FTYPE_V4SF_INT:
34489 case INT_FTYPE_V2DF_INT:
34490 case INT_FTYPE_V4SF_INT:
34491 nargs = 2;
34492 break;
34493 case V4SF_FTYPE_V4SF_UINT_INT:
34494 case V4SF_FTYPE_V4SF_UINT64_INT:
34495 case V2DF_FTYPE_V2DF_UINT64_INT:
34496 case V4SF_FTYPE_V4SF_INT_INT:
34497 case V4SF_FTYPE_V4SF_INT64_INT:
34498 case V2DF_FTYPE_V2DF_INT64_INT:
34499 case V4SF_FTYPE_V4SF_V4SF_INT:
34500 case V2DF_FTYPE_V2DF_V2DF_INT:
34501 case V4SF_FTYPE_V4SF_V2DF_INT:
34502 case V2DF_FTYPE_V2DF_V4SF_INT:
34503 nargs = 3;
34504 break;
34505 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34506 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34507 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34508 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34509 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34510 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34511 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34512 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34513 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34514 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34515 nargs = 4;
34516 break;
34517 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34518 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34519 nargs_constant = 2;
34520 nargs = 4;
34521 break;
34522 case INT_FTYPE_V4SF_V4SF_INT_INT:
34523 case INT_FTYPE_V2DF_V2DF_INT_INT:
34524 return ix86_expand_sse_comi_round (d, exp, target);
34525 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34526 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34527 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34528 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34529 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34530 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34531 nargs = 5;
34532 break;
34533 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34534 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34535 nargs_constant = 4;
34536 nargs = 5;
34537 break;
34538 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34539 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34540 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34541 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34542 nargs_constant = 3;
34543 nargs = 5;
34544 break;
34545 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34546 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34547 nargs = 6;
34548 nargs_constant = 4;
34549 break;
34550 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34551 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34552 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34553 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34554 nargs = 6;
34555 nargs_constant = 3;
34556 break;
34557 default:
34558 gcc_unreachable ();
34559 }
34560 gcc_assert (nargs <= ARRAY_SIZE (args));
34561
34562 if (optimize
34563 || target == 0
34564 || GET_MODE (target) != tmode
34565 || !insn_p->operand[0].predicate (target, tmode))
34566 target = gen_reg_rtx (tmode);
34567
34568 for (i = 0; i < nargs; i++)
34569 {
34570 tree arg = CALL_EXPR_ARG (exp, i);
34571 rtx op = expand_normal (arg);
34572 enum machine_mode mode = insn_p->operand[i + 1].mode;
34573 bool match = insn_p->operand[i + 1].predicate (op, mode);
34574
34575 if (i == nargs - nargs_constant)
34576 {
34577 if (!match)
34578 {
34579 switch (icode)
34580 {
34581 case CODE_FOR_avx512f_getmantv8df_mask_round:
34582 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34583 case CODE_FOR_avx512f_vgetmantv2df_round:
34584 case CODE_FOR_avx512f_vgetmantv4sf_round:
34585 error ("the immediate argument must be a 4-bit immediate");
34586 return const0_rtx;
34587 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34588 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34589 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34590 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34591 error ("the immediate argument must be a 5-bit immediate");
34592 return const0_rtx;
34593 default:
34594 error ("the immediate argument must be an 8-bit immediate");
34595 return const0_rtx;
34596 }
34597 }
34598 }
34599 else if (i == nargs-1)
34600 {
34601 if (!insn_p->operand[nargs].predicate (op, SImode))
34602 {
34603 error ("incorrect rounding operand");
34604 return const0_rtx;
34605 }
34606
34607 /* If there is no rounding use normal version of the pattern. */
34608 if (INTVAL (op) == NO_ROUND)
34609 redundant_embed_rnd = 1;
34610 }
34611 else
34612 {
34613 if (VECTOR_MODE_P (mode))
34614 op = safe_vector_operand (op, mode);
34615
34616 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34617 {
34618 if (optimize || !match)
34619 op = copy_to_mode_reg (mode, op);
34620 }
34621 else
34622 {
34623 op = copy_to_reg (op);
34624 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34625 }
34626 }
34627
34628 args[i].op = op;
34629 args[i].mode = mode;
34630 }
34631
34632 switch (nargs)
34633 {
34634 case 1:
34635 pat = GEN_FCN (icode) (target, args[0].op);
34636 break;
34637 case 2:
34638 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34639 break;
34640 case 3:
34641 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34642 args[2].op);
34643 break;
34644 case 4:
34645 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34646 args[2].op, args[3].op);
34647 break;
34648 case 5:
34649 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34650 args[2].op, args[3].op, args[4].op);
34651 case 6:
34652 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34653 args[2].op, args[3].op, args[4].op,
34654 args[5].op);
34655 break;
34656 default:
34657 gcc_unreachable ();
34658 }
34659
34660 if (!pat)
34661 return 0;
34662
34663 if (redundant_embed_rnd)
34664 pat = ix86_erase_embedded_rounding (pat);
34665
34666 emit_insn (pat);
34667 return target;
34668 }
34669
34670 /* Subroutine of ix86_expand_builtin to take care of special insns
34671 with variable number of operands. */
34672
34673 static rtx
34674 ix86_expand_special_args_builtin (const struct builtin_description *d,
34675 tree exp, rtx target)
34676 {
34677 tree arg;
34678 rtx pat, op;
34679 unsigned int i, nargs, arg_adjust, memory;
34680 bool aligned_mem = false;
34681 struct
34682 {
34683 rtx op;
34684 enum machine_mode mode;
34685 } args[3];
34686 enum insn_code icode = d->icode;
34687 bool last_arg_constant = false;
34688 const struct insn_data_d *insn_p = &insn_data[icode];
34689 enum machine_mode tmode = insn_p->operand[0].mode;
34690 enum { load, store } klass;
34691
34692 switch ((enum ix86_builtin_func_type) d->flag)
34693 {
34694 case VOID_FTYPE_VOID:
34695 emit_insn (GEN_FCN (icode) (target));
34696 return 0;
34697 case VOID_FTYPE_UINT64:
34698 case VOID_FTYPE_UNSIGNED:
34699 nargs = 0;
34700 klass = store;
34701 memory = 0;
34702 break;
34703
34704 case INT_FTYPE_VOID:
34705 case USHORT_FTYPE_VOID:
34706 case UINT64_FTYPE_VOID:
34707 case UNSIGNED_FTYPE_VOID:
34708 nargs = 0;
34709 klass = load;
34710 memory = 0;
34711 break;
34712 case UINT64_FTYPE_PUNSIGNED:
34713 case V2DI_FTYPE_PV2DI:
34714 case V4DI_FTYPE_PV4DI:
34715 case V32QI_FTYPE_PCCHAR:
34716 case V16QI_FTYPE_PCCHAR:
34717 case V8SF_FTYPE_PCV4SF:
34718 case V8SF_FTYPE_PCFLOAT:
34719 case V4SF_FTYPE_PCFLOAT:
34720 case V4DF_FTYPE_PCV2DF:
34721 case V4DF_FTYPE_PCDOUBLE:
34722 case V2DF_FTYPE_PCDOUBLE:
34723 case VOID_FTYPE_PVOID:
34724 case V16SI_FTYPE_PV4SI:
34725 case V16SF_FTYPE_PV4SF:
34726 case V8DI_FTYPE_PV4DI:
34727 case V8DI_FTYPE_PV8DI:
34728 case V8DF_FTYPE_PV4DF:
34729 nargs = 1;
34730 klass = load;
34731 memory = 0;
34732 switch (icode)
34733 {
34734 case CODE_FOR_sse4_1_movntdqa:
34735 case CODE_FOR_avx2_movntdqa:
34736 case CODE_FOR_avx512f_movntdqa:
34737 aligned_mem = true;
34738 break;
34739 default:
34740 break;
34741 }
34742 break;
34743 case VOID_FTYPE_PV2SF_V4SF:
34744 case VOID_FTYPE_PV8DI_V8DI:
34745 case VOID_FTYPE_PV4DI_V4DI:
34746 case VOID_FTYPE_PV2DI_V2DI:
34747 case VOID_FTYPE_PCHAR_V32QI:
34748 case VOID_FTYPE_PCHAR_V16QI:
34749 case VOID_FTYPE_PFLOAT_V16SF:
34750 case VOID_FTYPE_PFLOAT_V8SF:
34751 case VOID_FTYPE_PFLOAT_V4SF:
34752 case VOID_FTYPE_PDOUBLE_V8DF:
34753 case VOID_FTYPE_PDOUBLE_V4DF:
34754 case VOID_FTYPE_PDOUBLE_V2DF:
34755 case VOID_FTYPE_PLONGLONG_LONGLONG:
34756 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34757 case VOID_FTYPE_PINT_INT:
34758 nargs = 1;
34759 klass = store;
34760 /* Reserve memory operand for target. */
34761 memory = ARRAY_SIZE (args);
34762 switch (icode)
34763 {
34764 /* These builtins and instructions require the memory
34765 to be properly aligned. */
34766 case CODE_FOR_avx_movntv4di:
34767 case CODE_FOR_sse2_movntv2di:
34768 case CODE_FOR_avx_movntv8sf:
34769 case CODE_FOR_sse_movntv4sf:
34770 case CODE_FOR_sse4a_vmmovntv4sf:
34771 case CODE_FOR_avx_movntv4df:
34772 case CODE_FOR_sse2_movntv2df:
34773 case CODE_FOR_sse4a_vmmovntv2df:
34774 case CODE_FOR_sse2_movntidi:
34775 case CODE_FOR_sse_movntq:
34776 case CODE_FOR_sse2_movntisi:
34777 case CODE_FOR_avx512f_movntv16sf:
34778 case CODE_FOR_avx512f_movntv8df:
34779 case CODE_FOR_avx512f_movntv8di:
34780 aligned_mem = true;
34781 break;
34782 default:
34783 break;
34784 }
34785 break;
34786 case V4SF_FTYPE_V4SF_PCV2SF:
34787 case V2DF_FTYPE_V2DF_PCDOUBLE:
34788 nargs = 2;
34789 klass = load;
34790 memory = 1;
34791 break;
34792 case V8SF_FTYPE_PCV8SF_V8SI:
34793 case V4DF_FTYPE_PCV4DF_V4DI:
34794 case V4SF_FTYPE_PCV4SF_V4SI:
34795 case V2DF_FTYPE_PCV2DF_V2DI:
34796 case V8SI_FTYPE_PCV8SI_V8SI:
34797 case V4DI_FTYPE_PCV4DI_V4DI:
34798 case V4SI_FTYPE_PCV4SI_V4SI:
34799 case V2DI_FTYPE_PCV2DI_V2DI:
34800 nargs = 2;
34801 klass = load;
34802 memory = 0;
34803 break;
34804 case VOID_FTYPE_PV8DF_V8DF_QI:
34805 case VOID_FTYPE_PV16SF_V16SF_HI:
34806 case VOID_FTYPE_PV8DI_V8DI_QI:
34807 case VOID_FTYPE_PV16SI_V16SI_HI:
34808 switch (icode)
34809 {
34810 /* These builtins and instructions require the memory
34811 to be properly aligned. */
34812 case CODE_FOR_avx512f_storev16sf_mask:
34813 case CODE_FOR_avx512f_storev16si_mask:
34814 case CODE_FOR_avx512f_storev8df_mask:
34815 case CODE_FOR_avx512f_storev8di_mask:
34816 case CODE_FOR_avx512vl_storev8sf_mask:
34817 case CODE_FOR_avx512vl_storev8si_mask:
34818 case CODE_FOR_avx512vl_storev4df_mask:
34819 case CODE_FOR_avx512vl_storev4di_mask:
34820 case CODE_FOR_avx512vl_storev4sf_mask:
34821 case CODE_FOR_avx512vl_storev4si_mask:
34822 case CODE_FOR_avx512vl_storev2df_mask:
34823 case CODE_FOR_avx512vl_storev2di_mask:
34824 aligned_mem = true;
34825 break;
34826 default:
34827 break;
34828 }
34829 /* FALLTHRU */
34830 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34831 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34832 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34833 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34834 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34835 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34836 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34837 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34838 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34839 case VOID_FTYPE_PFLOAT_V4SF_QI:
34840 case VOID_FTYPE_PV8SI_V8DI_QI:
34841 case VOID_FTYPE_PV8HI_V8DI_QI:
34842 case VOID_FTYPE_PV16HI_V16SI_HI:
34843 case VOID_FTYPE_PV16QI_V8DI_QI:
34844 case VOID_FTYPE_PV16QI_V16SI_HI:
34845 nargs = 2;
34846 klass = store;
34847 /* Reserve memory operand for target. */
34848 memory = ARRAY_SIZE (args);
34849 break;
34850 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34851 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34852 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34853 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34854 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34855 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34856 nargs = 3;
34857 klass = load;
34858 memory = 0;
34859 switch (icode)
34860 {
34861 /* These builtins and instructions require the memory
34862 to be properly aligned. */
34863 case CODE_FOR_avx512f_loadv16sf_mask:
34864 case CODE_FOR_avx512f_loadv16si_mask:
34865 case CODE_FOR_avx512f_loadv8df_mask:
34866 case CODE_FOR_avx512f_loadv8di_mask:
34867 case CODE_FOR_avx512vl_loadv8sf_mask:
34868 case CODE_FOR_avx512vl_loadv8si_mask:
34869 case CODE_FOR_avx512vl_loadv4df_mask:
34870 case CODE_FOR_avx512vl_loadv4di_mask:
34871 case CODE_FOR_avx512vl_loadv4sf_mask:
34872 case CODE_FOR_avx512vl_loadv4si_mask:
34873 case CODE_FOR_avx512vl_loadv2df_mask:
34874 case CODE_FOR_avx512vl_loadv2di_mask:
34875 case CODE_FOR_avx512bw_loadv64qi_mask:
34876 case CODE_FOR_avx512vl_loadv32qi_mask:
34877 case CODE_FOR_avx512vl_loadv16qi_mask:
34878 case CODE_FOR_avx512bw_loadv32hi_mask:
34879 case CODE_FOR_avx512vl_loadv16hi_mask:
34880 case CODE_FOR_avx512vl_loadv8hi_mask:
34881 aligned_mem = true;
34882 break;
34883 default:
34884 break;
34885 }
34886 break;
34887 case VOID_FTYPE_UINT_UINT_UINT:
34888 case VOID_FTYPE_UINT64_UINT_UINT:
34889 case UCHAR_FTYPE_UINT_UINT_UINT:
34890 case UCHAR_FTYPE_UINT64_UINT_UINT:
34891 nargs = 3;
34892 klass = load;
34893 memory = ARRAY_SIZE (args);
34894 last_arg_constant = true;
34895 break;
34896 default:
34897 gcc_unreachable ();
34898 }
34899
34900 gcc_assert (nargs <= ARRAY_SIZE (args));
34901
34902 if (klass == store)
34903 {
34904 arg = CALL_EXPR_ARG (exp, 0);
34905 op = expand_normal (arg);
34906 gcc_assert (target == 0);
34907 if (memory)
34908 {
34909 op = ix86_zero_extend_to_Pmode (op);
34910 target = gen_rtx_MEM (tmode, op);
34911 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34912 on it. Try to improve it using get_pointer_alignment,
34913 and if the special builtin is one that requires strict
34914 mode alignment, also from it's GET_MODE_ALIGNMENT.
34915 Failure to do so could lead to ix86_legitimate_combined_insn
34916 rejecting all changes to such insns. */
34917 unsigned int align = get_pointer_alignment (arg);
34918 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34919 align = GET_MODE_ALIGNMENT (tmode);
34920 if (MEM_ALIGN (target) < align)
34921 set_mem_align (target, align);
34922 }
34923 else
34924 target = force_reg (tmode, op);
34925 arg_adjust = 1;
34926 }
34927 else
34928 {
34929 arg_adjust = 0;
34930 if (optimize
34931 || target == 0
34932 || !register_operand (target, tmode)
34933 || GET_MODE (target) != tmode)
34934 target = gen_reg_rtx (tmode);
34935 }
34936
34937 for (i = 0; i < nargs; i++)
34938 {
34939 enum machine_mode mode = insn_p->operand[i + 1].mode;
34940 bool match;
34941
34942 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34943 op = expand_normal (arg);
34944 match = insn_p->operand[i + 1].predicate (op, mode);
34945
34946 if (last_arg_constant && (i + 1) == nargs)
34947 {
34948 if (!match)
34949 {
34950 if (icode == CODE_FOR_lwp_lwpvalsi3
34951 || icode == CODE_FOR_lwp_lwpinssi3
34952 || icode == CODE_FOR_lwp_lwpvaldi3
34953 || icode == CODE_FOR_lwp_lwpinsdi3)
34954 error ("the last argument must be a 32-bit immediate");
34955 else
34956 error ("the last argument must be an 8-bit immediate");
34957 return const0_rtx;
34958 }
34959 }
34960 else
34961 {
34962 if (i == memory)
34963 {
34964 /* This must be the memory operand. */
34965 op = ix86_zero_extend_to_Pmode (op);
34966 op = gen_rtx_MEM (mode, op);
34967 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34968 on it. Try to improve it using get_pointer_alignment,
34969 and if the special builtin is one that requires strict
34970 mode alignment, also from it's GET_MODE_ALIGNMENT.
34971 Failure to do so could lead to ix86_legitimate_combined_insn
34972 rejecting all changes to such insns. */
34973 unsigned int align = get_pointer_alignment (arg);
34974 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34975 align = GET_MODE_ALIGNMENT (mode);
34976 if (MEM_ALIGN (op) < align)
34977 set_mem_align (op, align);
34978 }
34979 else
34980 {
34981 /* This must be register. */
34982 if (VECTOR_MODE_P (mode))
34983 op = safe_vector_operand (op, mode);
34984
34985 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34986 op = copy_to_mode_reg (mode, op);
34987 else
34988 {
34989 op = copy_to_reg (op);
34990 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34991 }
34992 }
34993 }
34994
34995 args[i].op = op;
34996 args[i].mode = mode;
34997 }
34998
34999 switch (nargs)
35000 {
35001 case 0:
35002 pat = GEN_FCN (icode) (target);
35003 break;
35004 case 1:
35005 pat = GEN_FCN (icode) (target, args[0].op);
35006 break;
35007 case 2:
35008 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
35009 break;
35010 case 3:
35011 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
35012 break;
35013 default:
35014 gcc_unreachable ();
35015 }
35016
35017 if (! pat)
35018 return 0;
35019 emit_insn (pat);
35020 return klass == store ? 0 : target;
35021 }
35022
35023 /* Return the integer constant in ARG. Constrain it to be in the range
35024 of the subparts of VEC_TYPE; issue an error if not. */
35025
35026 static int
35027 get_element_number (tree vec_type, tree arg)
35028 {
35029 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
35030
35031 if (!tree_fits_uhwi_p (arg)
35032 || (elt = tree_to_uhwi (arg), elt > max))
35033 {
35034 error ("selector must be an integer constant in the range 0..%wi", max);
35035 return 0;
35036 }
35037
35038 return elt;
35039 }
35040
35041 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35042 ix86_expand_vector_init. We DO have language-level syntax for this, in
35043 the form of (type){ init-list }. Except that since we can't place emms
35044 instructions from inside the compiler, we can't allow the use of MMX
35045 registers unless the user explicitly asks for it. So we do *not* define
35046 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
35047 we have builtins invoked by mmintrin.h that gives us license to emit
35048 these sorts of instructions. */
35049
35050 static rtx
35051 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
35052 {
35053 enum machine_mode tmode = TYPE_MODE (type);
35054 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
35055 int i, n_elt = GET_MODE_NUNITS (tmode);
35056 rtvec v = rtvec_alloc (n_elt);
35057
35058 gcc_assert (VECTOR_MODE_P (tmode));
35059 gcc_assert (call_expr_nargs (exp) == n_elt);
35060
35061 for (i = 0; i < n_elt; ++i)
35062 {
35063 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
35064 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
35065 }
35066
35067 if (!target || !register_operand (target, tmode))
35068 target = gen_reg_rtx (tmode);
35069
35070 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
35071 return target;
35072 }
35073
35074 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35075 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
35076 had a language-level syntax for referencing vector elements. */
35077
35078 static rtx
35079 ix86_expand_vec_ext_builtin (tree exp, rtx target)
35080 {
35081 enum machine_mode tmode, mode0;
35082 tree arg0, arg1;
35083 int elt;
35084 rtx op0;
35085
35086 arg0 = CALL_EXPR_ARG (exp, 0);
35087 arg1 = CALL_EXPR_ARG (exp, 1);
35088
35089 op0 = expand_normal (arg0);
35090 elt = get_element_number (TREE_TYPE (arg0), arg1);
35091
35092 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35093 mode0 = TYPE_MODE (TREE_TYPE (arg0));
35094 gcc_assert (VECTOR_MODE_P (mode0));
35095
35096 op0 = force_reg (mode0, op0);
35097
35098 if (optimize || !target || !register_operand (target, tmode))
35099 target = gen_reg_rtx (tmode);
35100
35101 ix86_expand_vector_extract (true, target, op0, elt);
35102
35103 return target;
35104 }
35105
35106 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
35107 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
35108 a language-level syntax for referencing vector elements. */
35109
35110 static rtx
35111 ix86_expand_vec_set_builtin (tree exp)
35112 {
35113 enum machine_mode tmode, mode1;
35114 tree arg0, arg1, arg2;
35115 int elt;
35116 rtx op0, op1, target;
35117
35118 arg0 = CALL_EXPR_ARG (exp, 0);
35119 arg1 = CALL_EXPR_ARG (exp, 1);
35120 arg2 = CALL_EXPR_ARG (exp, 2);
35121
35122 tmode = TYPE_MODE (TREE_TYPE (arg0));
35123 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
35124 gcc_assert (VECTOR_MODE_P (tmode));
35125
35126 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
35127 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
35128 elt = get_element_number (TREE_TYPE (arg0), arg2);
35129
35130 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
35131 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
35132
35133 op0 = force_reg (tmode, op0);
35134 op1 = force_reg (mode1, op1);
35135
35136 /* OP0 is the source of these builtin functions and shouldn't be
35137 modified. Create a copy, use it and return it as target. */
35138 target = gen_reg_rtx (tmode);
35139 emit_move_insn (target, op0);
35140 ix86_expand_vector_set (true, target, op1, elt);
35141
35142 return target;
35143 }
35144
35145 /* Expand an expression EXP that calls a built-in function,
35146 with result going to TARGET if that's convenient
35147 (and in mode MODE if that's convenient).
35148 SUBTARGET may be used as the target for computing one of EXP's operands.
35149 IGNORE is nonzero if the value is to be ignored. */
35150
35151 static rtx
35152 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
35153 enum machine_mode mode, int ignore)
35154 {
35155 const struct builtin_description *d;
35156 size_t i;
35157 enum insn_code icode;
35158 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
35159 tree arg0, arg1, arg2, arg3, arg4;
35160 rtx op0, op1, op2, op3, op4, pat, insn;
35161 enum machine_mode mode0, mode1, mode2, mode3, mode4;
35162 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
35163
35164 /* For CPU builtins that can be folded, fold first and expand the fold. */
35165 switch (fcode)
35166 {
35167 case IX86_BUILTIN_CPU_INIT:
35168 {
35169 /* Make it call __cpu_indicator_init in libgcc. */
35170 tree call_expr, fndecl, type;
35171 type = build_function_type_list (integer_type_node, NULL_TREE);
35172 fndecl = build_fn_decl ("__cpu_indicator_init", type);
35173 call_expr = build_call_expr (fndecl, 0);
35174 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
35175 }
35176 case IX86_BUILTIN_CPU_IS:
35177 case IX86_BUILTIN_CPU_SUPPORTS:
35178 {
35179 tree arg0 = CALL_EXPR_ARG (exp, 0);
35180 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35181 gcc_assert (fold_expr != NULL_TREE);
35182 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35183 }
35184 }
35185
35186 /* Determine whether the builtin function is available under the current ISA.
35187 Originally the builtin was not created if it wasn't applicable to the
35188 current ISA based on the command line switches. With function specific
35189 options, we need to check in the context of the function making the call
35190 whether it is supported. */
35191 if (ix86_builtins_isa[fcode].isa
35192 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35193 {
35194 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35195 NULL, (enum fpmath_unit) 0, false);
35196
35197 if (!opts)
35198 error ("%qE needs unknown isa option", fndecl);
35199 else
35200 {
35201 gcc_assert (opts != NULL);
35202 error ("%qE needs isa option %s", fndecl, opts);
35203 free (opts);
35204 }
35205 return const0_rtx;
35206 }
35207
35208 switch (fcode)
35209 {
35210 case IX86_BUILTIN_MASKMOVQ:
35211 case IX86_BUILTIN_MASKMOVDQU:
35212 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35213 ? CODE_FOR_mmx_maskmovq
35214 : CODE_FOR_sse2_maskmovdqu);
35215 /* Note the arg order is different from the operand order. */
35216 arg1 = CALL_EXPR_ARG (exp, 0);
35217 arg2 = CALL_EXPR_ARG (exp, 1);
35218 arg0 = CALL_EXPR_ARG (exp, 2);
35219 op0 = expand_normal (arg0);
35220 op1 = expand_normal (arg1);
35221 op2 = expand_normal (arg2);
35222 mode0 = insn_data[icode].operand[0].mode;
35223 mode1 = insn_data[icode].operand[1].mode;
35224 mode2 = insn_data[icode].operand[2].mode;
35225
35226 op0 = ix86_zero_extend_to_Pmode (op0);
35227 op0 = gen_rtx_MEM (mode1, op0);
35228
35229 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35230 op0 = copy_to_mode_reg (mode0, op0);
35231 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35232 op1 = copy_to_mode_reg (mode1, op1);
35233 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35234 op2 = copy_to_mode_reg (mode2, op2);
35235 pat = GEN_FCN (icode) (op0, op1, op2);
35236 if (! pat)
35237 return 0;
35238 emit_insn (pat);
35239 return 0;
35240
35241 case IX86_BUILTIN_LDMXCSR:
35242 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35243 target = assign_386_stack_local (SImode, SLOT_TEMP);
35244 emit_move_insn (target, op0);
35245 emit_insn (gen_sse_ldmxcsr (target));
35246 return 0;
35247
35248 case IX86_BUILTIN_STMXCSR:
35249 target = assign_386_stack_local (SImode, SLOT_TEMP);
35250 emit_insn (gen_sse_stmxcsr (target));
35251 return copy_to_mode_reg (SImode, target);
35252
35253 case IX86_BUILTIN_CLFLUSH:
35254 arg0 = CALL_EXPR_ARG (exp, 0);
35255 op0 = expand_normal (arg0);
35256 icode = CODE_FOR_sse2_clflush;
35257 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35258 op0 = ix86_zero_extend_to_Pmode (op0);
35259
35260 emit_insn (gen_sse2_clflush (op0));
35261 return 0;
35262
35263 case IX86_BUILTIN_CLFLUSHOPT:
35264 arg0 = CALL_EXPR_ARG (exp, 0);
35265 op0 = expand_normal (arg0);
35266 icode = CODE_FOR_clflushopt;
35267 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35268 op0 = ix86_zero_extend_to_Pmode (op0);
35269
35270 emit_insn (gen_clflushopt (op0));
35271 return 0;
35272
35273 case IX86_BUILTIN_MONITOR:
35274 arg0 = CALL_EXPR_ARG (exp, 0);
35275 arg1 = CALL_EXPR_ARG (exp, 1);
35276 arg2 = CALL_EXPR_ARG (exp, 2);
35277 op0 = expand_normal (arg0);
35278 op1 = expand_normal (arg1);
35279 op2 = expand_normal (arg2);
35280 if (!REG_P (op0))
35281 op0 = ix86_zero_extend_to_Pmode (op0);
35282 if (!REG_P (op1))
35283 op1 = copy_to_mode_reg (SImode, op1);
35284 if (!REG_P (op2))
35285 op2 = copy_to_mode_reg (SImode, op2);
35286 emit_insn (ix86_gen_monitor (op0, op1, op2));
35287 return 0;
35288
35289 case IX86_BUILTIN_MWAIT:
35290 arg0 = CALL_EXPR_ARG (exp, 0);
35291 arg1 = CALL_EXPR_ARG (exp, 1);
35292 op0 = expand_normal (arg0);
35293 op1 = expand_normal (arg1);
35294 if (!REG_P (op0))
35295 op0 = copy_to_mode_reg (SImode, op0);
35296 if (!REG_P (op1))
35297 op1 = copy_to_mode_reg (SImode, op1);
35298 emit_insn (gen_sse3_mwait (op0, op1));
35299 return 0;
35300
35301 case IX86_BUILTIN_VEC_INIT_V2SI:
35302 case IX86_BUILTIN_VEC_INIT_V4HI:
35303 case IX86_BUILTIN_VEC_INIT_V8QI:
35304 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35305
35306 case IX86_BUILTIN_VEC_EXT_V2DF:
35307 case IX86_BUILTIN_VEC_EXT_V2DI:
35308 case IX86_BUILTIN_VEC_EXT_V4SF:
35309 case IX86_BUILTIN_VEC_EXT_V4SI:
35310 case IX86_BUILTIN_VEC_EXT_V8HI:
35311 case IX86_BUILTIN_VEC_EXT_V2SI:
35312 case IX86_BUILTIN_VEC_EXT_V4HI:
35313 case IX86_BUILTIN_VEC_EXT_V16QI:
35314 return ix86_expand_vec_ext_builtin (exp, target);
35315
35316 case IX86_BUILTIN_VEC_SET_V2DI:
35317 case IX86_BUILTIN_VEC_SET_V4SF:
35318 case IX86_BUILTIN_VEC_SET_V4SI:
35319 case IX86_BUILTIN_VEC_SET_V8HI:
35320 case IX86_BUILTIN_VEC_SET_V4HI:
35321 case IX86_BUILTIN_VEC_SET_V16QI:
35322 return ix86_expand_vec_set_builtin (exp);
35323
35324 case IX86_BUILTIN_INFQ:
35325 case IX86_BUILTIN_HUGE_VALQ:
35326 {
35327 REAL_VALUE_TYPE inf;
35328 rtx tmp;
35329
35330 real_inf (&inf);
35331 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35332
35333 tmp = validize_mem (force_const_mem (mode, tmp));
35334
35335 if (target == 0)
35336 target = gen_reg_rtx (mode);
35337
35338 emit_move_insn (target, tmp);
35339 return target;
35340 }
35341
35342 case IX86_BUILTIN_RDPMC:
35343 case IX86_BUILTIN_RDTSC:
35344 case IX86_BUILTIN_RDTSCP:
35345
35346 op0 = gen_reg_rtx (DImode);
35347 op1 = gen_reg_rtx (DImode);
35348
35349 if (fcode == IX86_BUILTIN_RDPMC)
35350 {
35351 arg0 = CALL_EXPR_ARG (exp, 0);
35352 op2 = expand_normal (arg0);
35353 if (!register_operand (op2, SImode))
35354 op2 = copy_to_mode_reg (SImode, op2);
35355
35356 insn = (TARGET_64BIT
35357 ? gen_rdpmc_rex64 (op0, op1, op2)
35358 : gen_rdpmc (op0, op2));
35359 emit_insn (insn);
35360 }
35361 else if (fcode == IX86_BUILTIN_RDTSC)
35362 {
35363 insn = (TARGET_64BIT
35364 ? gen_rdtsc_rex64 (op0, op1)
35365 : gen_rdtsc (op0));
35366 emit_insn (insn);
35367 }
35368 else
35369 {
35370 op2 = gen_reg_rtx (SImode);
35371
35372 insn = (TARGET_64BIT
35373 ? gen_rdtscp_rex64 (op0, op1, op2)
35374 : gen_rdtscp (op0, op2));
35375 emit_insn (insn);
35376
35377 arg0 = CALL_EXPR_ARG (exp, 0);
35378 op4 = expand_normal (arg0);
35379 if (!address_operand (op4, VOIDmode))
35380 {
35381 op4 = convert_memory_address (Pmode, op4);
35382 op4 = copy_addr_to_reg (op4);
35383 }
35384 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35385 }
35386
35387 if (target == 0)
35388 {
35389 /* mode is VOIDmode if __builtin_rd* has been called
35390 without lhs. */
35391 if (mode == VOIDmode)
35392 return target;
35393 target = gen_reg_rtx (mode);
35394 }
35395
35396 if (TARGET_64BIT)
35397 {
35398 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35399 op1, 1, OPTAB_DIRECT);
35400 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35401 op0, 1, OPTAB_DIRECT);
35402 }
35403
35404 emit_move_insn (target, op0);
35405 return target;
35406
35407 case IX86_BUILTIN_FXSAVE:
35408 case IX86_BUILTIN_FXRSTOR:
35409 case IX86_BUILTIN_FXSAVE64:
35410 case IX86_BUILTIN_FXRSTOR64:
35411 case IX86_BUILTIN_FNSTENV:
35412 case IX86_BUILTIN_FLDENV:
35413 mode0 = BLKmode;
35414 switch (fcode)
35415 {
35416 case IX86_BUILTIN_FXSAVE:
35417 icode = CODE_FOR_fxsave;
35418 break;
35419 case IX86_BUILTIN_FXRSTOR:
35420 icode = CODE_FOR_fxrstor;
35421 break;
35422 case IX86_BUILTIN_FXSAVE64:
35423 icode = CODE_FOR_fxsave64;
35424 break;
35425 case IX86_BUILTIN_FXRSTOR64:
35426 icode = CODE_FOR_fxrstor64;
35427 break;
35428 case IX86_BUILTIN_FNSTENV:
35429 icode = CODE_FOR_fnstenv;
35430 break;
35431 case IX86_BUILTIN_FLDENV:
35432 icode = CODE_FOR_fldenv;
35433 break;
35434 default:
35435 gcc_unreachable ();
35436 }
35437
35438 arg0 = CALL_EXPR_ARG (exp, 0);
35439 op0 = expand_normal (arg0);
35440
35441 if (!address_operand (op0, VOIDmode))
35442 {
35443 op0 = convert_memory_address (Pmode, op0);
35444 op0 = copy_addr_to_reg (op0);
35445 }
35446 op0 = gen_rtx_MEM (mode0, op0);
35447
35448 pat = GEN_FCN (icode) (op0);
35449 if (pat)
35450 emit_insn (pat);
35451 return 0;
35452
35453 case IX86_BUILTIN_XSAVE:
35454 case IX86_BUILTIN_XRSTOR:
35455 case IX86_BUILTIN_XSAVE64:
35456 case IX86_BUILTIN_XRSTOR64:
35457 case IX86_BUILTIN_XSAVEOPT:
35458 case IX86_BUILTIN_XSAVEOPT64:
35459 case IX86_BUILTIN_XSAVES:
35460 case IX86_BUILTIN_XRSTORS:
35461 case IX86_BUILTIN_XSAVES64:
35462 case IX86_BUILTIN_XRSTORS64:
35463 case IX86_BUILTIN_XSAVEC:
35464 case IX86_BUILTIN_XSAVEC64:
35465 arg0 = CALL_EXPR_ARG (exp, 0);
35466 arg1 = CALL_EXPR_ARG (exp, 1);
35467 op0 = expand_normal (arg0);
35468 op1 = expand_normal (arg1);
35469
35470 if (!address_operand (op0, VOIDmode))
35471 {
35472 op0 = convert_memory_address (Pmode, op0);
35473 op0 = copy_addr_to_reg (op0);
35474 }
35475 op0 = gen_rtx_MEM (BLKmode, op0);
35476
35477 op1 = force_reg (DImode, op1);
35478
35479 if (TARGET_64BIT)
35480 {
35481 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35482 NULL, 1, OPTAB_DIRECT);
35483 switch (fcode)
35484 {
35485 case IX86_BUILTIN_XSAVE:
35486 icode = CODE_FOR_xsave_rex64;
35487 break;
35488 case IX86_BUILTIN_XRSTOR:
35489 icode = CODE_FOR_xrstor_rex64;
35490 break;
35491 case IX86_BUILTIN_XSAVE64:
35492 icode = CODE_FOR_xsave64;
35493 break;
35494 case IX86_BUILTIN_XRSTOR64:
35495 icode = CODE_FOR_xrstor64;
35496 break;
35497 case IX86_BUILTIN_XSAVEOPT:
35498 icode = CODE_FOR_xsaveopt_rex64;
35499 break;
35500 case IX86_BUILTIN_XSAVEOPT64:
35501 icode = CODE_FOR_xsaveopt64;
35502 break;
35503 case IX86_BUILTIN_XSAVES:
35504 icode = CODE_FOR_xsaves_rex64;
35505 break;
35506 case IX86_BUILTIN_XRSTORS:
35507 icode = CODE_FOR_xrstors_rex64;
35508 break;
35509 case IX86_BUILTIN_XSAVES64:
35510 icode = CODE_FOR_xsaves64;
35511 break;
35512 case IX86_BUILTIN_XRSTORS64:
35513 icode = CODE_FOR_xrstors64;
35514 break;
35515 case IX86_BUILTIN_XSAVEC:
35516 icode = CODE_FOR_xsavec_rex64;
35517 break;
35518 case IX86_BUILTIN_XSAVEC64:
35519 icode = CODE_FOR_xsavec64;
35520 break;
35521 default:
35522 gcc_unreachable ();
35523 }
35524
35525 op2 = gen_lowpart (SImode, op2);
35526 op1 = gen_lowpart (SImode, op1);
35527 pat = GEN_FCN (icode) (op0, op1, op2);
35528 }
35529 else
35530 {
35531 switch (fcode)
35532 {
35533 case IX86_BUILTIN_XSAVE:
35534 icode = CODE_FOR_xsave;
35535 break;
35536 case IX86_BUILTIN_XRSTOR:
35537 icode = CODE_FOR_xrstor;
35538 break;
35539 case IX86_BUILTIN_XSAVEOPT:
35540 icode = CODE_FOR_xsaveopt;
35541 break;
35542 case IX86_BUILTIN_XSAVES:
35543 icode = CODE_FOR_xsaves;
35544 break;
35545 case IX86_BUILTIN_XRSTORS:
35546 icode = CODE_FOR_xrstors;
35547 break;
35548 case IX86_BUILTIN_XSAVEC:
35549 icode = CODE_FOR_xsavec;
35550 break;
35551 default:
35552 gcc_unreachable ();
35553 }
35554 pat = GEN_FCN (icode) (op0, op1);
35555 }
35556
35557 if (pat)
35558 emit_insn (pat);
35559 return 0;
35560
35561 case IX86_BUILTIN_LLWPCB:
35562 arg0 = CALL_EXPR_ARG (exp, 0);
35563 op0 = expand_normal (arg0);
35564 icode = CODE_FOR_lwp_llwpcb;
35565 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35566 op0 = ix86_zero_extend_to_Pmode (op0);
35567 emit_insn (gen_lwp_llwpcb (op0));
35568 return 0;
35569
35570 case IX86_BUILTIN_SLWPCB:
35571 icode = CODE_FOR_lwp_slwpcb;
35572 if (!target
35573 || !insn_data[icode].operand[0].predicate (target, Pmode))
35574 target = gen_reg_rtx (Pmode);
35575 emit_insn (gen_lwp_slwpcb (target));
35576 return target;
35577
35578 case IX86_BUILTIN_BEXTRI32:
35579 case IX86_BUILTIN_BEXTRI64:
35580 arg0 = CALL_EXPR_ARG (exp, 0);
35581 arg1 = CALL_EXPR_ARG (exp, 1);
35582 op0 = expand_normal (arg0);
35583 op1 = expand_normal (arg1);
35584 icode = (fcode == IX86_BUILTIN_BEXTRI32
35585 ? CODE_FOR_tbm_bextri_si
35586 : CODE_FOR_tbm_bextri_di);
35587 if (!CONST_INT_P (op1))
35588 {
35589 error ("last argument must be an immediate");
35590 return const0_rtx;
35591 }
35592 else
35593 {
35594 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35595 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35596 op1 = GEN_INT (length);
35597 op2 = GEN_INT (lsb_index);
35598 pat = GEN_FCN (icode) (target, op0, op1, op2);
35599 if (pat)
35600 emit_insn (pat);
35601 return target;
35602 }
35603
35604 case IX86_BUILTIN_RDRAND16_STEP:
35605 icode = CODE_FOR_rdrandhi_1;
35606 mode0 = HImode;
35607 goto rdrand_step;
35608
35609 case IX86_BUILTIN_RDRAND32_STEP:
35610 icode = CODE_FOR_rdrandsi_1;
35611 mode0 = SImode;
35612 goto rdrand_step;
35613
35614 case IX86_BUILTIN_RDRAND64_STEP:
35615 icode = CODE_FOR_rdranddi_1;
35616 mode0 = DImode;
35617
35618 rdrand_step:
35619 op0 = gen_reg_rtx (mode0);
35620 emit_insn (GEN_FCN (icode) (op0));
35621
35622 arg0 = CALL_EXPR_ARG (exp, 0);
35623 op1 = expand_normal (arg0);
35624 if (!address_operand (op1, VOIDmode))
35625 {
35626 op1 = convert_memory_address (Pmode, op1);
35627 op1 = copy_addr_to_reg (op1);
35628 }
35629 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35630
35631 op1 = gen_reg_rtx (SImode);
35632 emit_move_insn (op1, CONST1_RTX (SImode));
35633
35634 /* Emit SImode conditional move. */
35635 if (mode0 == HImode)
35636 {
35637 op2 = gen_reg_rtx (SImode);
35638 emit_insn (gen_zero_extendhisi2 (op2, op0));
35639 }
35640 else if (mode0 == SImode)
35641 op2 = op0;
35642 else
35643 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35644
35645 if (target == 0
35646 || !register_operand (target, SImode))
35647 target = gen_reg_rtx (SImode);
35648
35649 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35650 const0_rtx);
35651 emit_insn (gen_rtx_SET (VOIDmode, target,
35652 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35653 return target;
35654
35655 case IX86_BUILTIN_RDSEED16_STEP:
35656 icode = CODE_FOR_rdseedhi_1;
35657 mode0 = HImode;
35658 goto rdseed_step;
35659
35660 case IX86_BUILTIN_RDSEED32_STEP:
35661 icode = CODE_FOR_rdseedsi_1;
35662 mode0 = SImode;
35663 goto rdseed_step;
35664
35665 case IX86_BUILTIN_RDSEED64_STEP:
35666 icode = CODE_FOR_rdseeddi_1;
35667 mode0 = DImode;
35668
35669 rdseed_step:
35670 op0 = gen_reg_rtx (mode0);
35671 emit_insn (GEN_FCN (icode) (op0));
35672
35673 arg0 = CALL_EXPR_ARG (exp, 0);
35674 op1 = expand_normal (arg0);
35675 if (!address_operand (op1, VOIDmode))
35676 {
35677 op1 = convert_memory_address (Pmode, op1);
35678 op1 = copy_addr_to_reg (op1);
35679 }
35680 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35681
35682 op2 = gen_reg_rtx (QImode);
35683
35684 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35685 const0_rtx);
35686 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35687
35688 if (target == 0
35689 || !register_operand (target, SImode))
35690 target = gen_reg_rtx (SImode);
35691
35692 emit_insn (gen_zero_extendqisi2 (target, op2));
35693 return target;
35694
35695 case IX86_BUILTIN_SBB32:
35696 icode = CODE_FOR_subsi3_carry;
35697 mode0 = SImode;
35698 goto addcarryx;
35699
35700 case IX86_BUILTIN_SBB64:
35701 icode = CODE_FOR_subdi3_carry;
35702 mode0 = DImode;
35703 goto addcarryx;
35704
35705 case IX86_BUILTIN_ADDCARRYX32:
35706 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35707 mode0 = SImode;
35708 goto addcarryx;
35709
35710 case IX86_BUILTIN_ADDCARRYX64:
35711 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35712 mode0 = DImode;
35713
35714 addcarryx:
35715 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35716 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35717 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35718 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35719
35720 op0 = gen_reg_rtx (QImode);
35721
35722 /* Generate CF from input operand. */
35723 op1 = expand_normal (arg0);
35724 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35725 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35726
35727 /* Gen ADCX instruction to compute X+Y+CF. */
35728 op2 = expand_normal (arg1);
35729 op3 = expand_normal (arg2);
35730
35731 if (!REG_P (op2))
35732 op2 = copy_to_mode_reg (mode0, op2);
35733 if (!REG_P (op3))
35734 op3 = copy_to_mode_reg (mode0, op3);
35735
35736 op0 = gen_reg_rtx (mode0);
35737
35738 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35739 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35740 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35741
35742 /* Store the result. */
35743 op4 = expand_normal (arg3);
35744 if (!address_operand (op4, VOIDmode))
35745 {
35746 op4 = convert_memory_address (Pmode, op4);
35747 op4 = copy_addr_to_reg (op4);
35748 }
35749 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35750
35751 /* Return current CF value. */
35752 if (target == 0)
35753 target = gen_reg_rtx (QImode);
35754
35755 PUT_MODE (pat, QImode);
35756 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35757 return target;
35758
35759 case IX86_BUILTIN_READ_FLAGS:
35760 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35761
35762 if (optimize
35763 || target == NULL_RTX
35764 || !nonimmediate_operand (target, word_mode)
35765 || GET_MODE (target) != word_mode)
35766 target = gen_reg_rtx (word_mode);
35767
35768 emit_insn (gen_pop (target));
35769 return target;
35770
35771 case IX86_BUILTIN_WRITE_FLAGS:
35772
35773 arg0 = CALL_EXPR_ARG (exp, 0);
35774 op0 = expand_normal (arg0);
35775 if (!general_no_elim_operand (op0, word_mode))
35776 op0 = copy_to_mode_reg (word_mode, op0);
35777
35778 emit_insn (gen_push (op0));
35779 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35780 return 0;
35781
35782 case IX86_BUILTIN_KORTESTC16:
35783 icode = CODE_FOR_kortestchi;
35784 mode0 = HImode;
35785 mode1 = CCCmode;
35786 goto kortest;
35787
35788 case IX86_BUILTIN_KORTESTZ16:
35789 icode = CODE_FOR_kortestzhi;
35790 mode0 = HImode;
35791 mode1 = CCZmode;
35792
35793 kortest:
35794 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35795 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35796 op0 = expand_normal (arg0);
35797 op1 = expand_normal (arg1);
35798
35799 op0 = copy_to_reg (op0);
35800 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35801 op1 = copy_to_reg (op1);
35802 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35803
35804 target = gen_reg_rtx (QImode);
35805 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35806
35807 /* Emit kortest. */
35808 emit_insn (GEN_FCN (icode) (op0, op1));
35809 /* And use setcc to return result from flags. */
35810 ix86_expand_setcc (target, EQ,
35811 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35812 return target;
35813
35814 case IX86_BUILTIN_GATHERSIV2DF:
35815 icode = CODE_FOR_avx2_gathersiv2df;
35816 goto gather_gen;
35817 case IX86_BUILTIN_GATHERSIV4DF:
35818 icode = CODE_FOR_avx2_gathersiv4df;
35819 goto gather_gen;
35820 case IX86_BUILTIN_GATHERDIV2DF:
35821 icode = CODE_FOR_avx2_gatherdiv2df;
35822 goto gather_gen;
35823 case IX86_BUILTIN_GATHERDIV4DF:
35824 icode = CODE_FOR_avx2_gatherdiv4df;
35825 goto gather_gen;
35826 case IX86_BUILTIN_GATHERSIV4SF:
35827 icode = CODE_FOR_avx2_gathersiv4sf;
35828 goto gather_gen;
35829 case IX86_BUILTIN_GATHERSIV8SF:
35830 icode = CODE_FOR_avx2_gathersiv8sf;
35831 goto gather_gen;
35832 case IX86_BUILTIN_GATHERDIV4SF:
35833 icode = CODE_FOR_avx2_gatherdiv4sf;
35834 goto gather_gen;
35835 case IX86_BUILTIN_GATHERDIV8SF:
35836 icode = CODE_FOR_avx2_gatherdiv8sf;
35837 goto gather_gen;
35838 case IX86_BUILTIN_GATHERSIV2DI:
35839 icode = CODE_FOR_avx2_gathersiv2di;
35840 goto gather_gen;
35841 case IX86_BUILTIN_GATHERSIV4DI:
35842 icode = CODE_FOR_avx2_gathersiv4di;
35843 goto gather_gen;
35844 case IX86_BUILTIN_GATHERDIV2DI:
35845 icode = CODE_FOR_avx2_gatherdiv2di;
35846 goto gather_gen;
35847 case IX86_BUILTIN_GATHERDIV4DI:
35848 icode = CODE_FOR_avx2_gatherdiv4di;
35849 goto gather_gen;
35850 case IX86_BUILTIN_GATHERSIV4SI:
35851 icode = CODE_FOR_avx2_gathersiv4si;
35852 goto gather_gen;
35853 case IX86_BUILTIN_GATHERSIV8SI:
35854 icode = CODE_FOR_avx2_gathersiv8si;
35855 goto gather_gen;
35856 case IX86_BUILTIN_GATHERDIV4SI:
35857 icode = CODE_FOR_avx2_gatherdiv4si;
35858 goto gather_gen;
35859 case IX86_BUILTIN_GATHERDIV8SI:
35860 icode = CODE_FOR_avx2_gatherdiv8si;
35861 goto gather_gen;
35862 case IX86_BUILTIN_GATHERALTSIV4DF:
35863 icode = CODE_FOR_avx2_gathersiv4df;
35864 goto gather_gen;
35865 case IX86_BUILTIN_GATHERALTDIV8SF:
35866 icode = CODE_FOR_avx2_gatherdiv8sf;
35867 goto gather_gen;
35868 case IX86_BUILTIN_GATHERALTSIV4DI:
35869 icode = CODE_FOR_avx2_gathersiv4di;
35870 goto gather_gen;
35871 case IX86_BUILTIN_GATHERALTDIV8SI:
35872 icode = CODE_FOR_avx2_gatherdiv8si;
35873 goto gather_gen;
35874 case IX86_BUILTIN_GATHER3SIV16SF:
35875 icode = CODE_FOR_avx512f_gathersiv16sf;
35876 goto gather_gen;
35877 case IX86_BUILTIN_GATHER3SIV8DF:
35878 icode = CODE_FOR_avx512f_gathersiv8df;
35879 goto gather_gen;
35880 case IX86_BUILTIN_GATHER3DIV16SF:
35881 icode = CODE_FOR_avx512f_gatherdiv16sf;
35882 goto gather_gen;
35883 case IX86_BUILTIN_GATHER3DIV8DF:
35884 icode = CODE_FOR_avx512f_gatherdiv8df;
35885 goto gather_gen;
35886 case IX86_BUILTIN_GATHER3SIV16SI:
35887 icode = CODE_FOR_avx512f_gathersiv16si;
35888 goto gather_gen;
35889 case IX86_BUILTIN_GATHER3SIV8DI:
35890 icode = CODE_FOR_avx512f_gathersiv8di;
35891 goto gather_gen;
35892 case IX86_BUILTIN_GATHER3DIV16SI:
35893 icode = CODE_FOR_avx512f_gatherdiv16si;
35894 goto gather_gen;
35895 case IX86_BUILTIN_GATHER3DIV8DI:
35896 icode = CODE_FOR_avx512f_gatherdiv8di;
35897 goto gather_gen;
35898 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35899 icode = CODE_FOR_avx512f_gathersiv8df;
35900 goto gather_gen;
35901 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35902 icode = CODE_FOR_avx512f_gatherdiv16sf;
35903 goto gather_gen;
35904 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35905 icode = CODE_FOR_avx512f_gathersiv8di;
35906 goto gather_gen;
35907 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35908 icode = CODE_FOR_avx512f_gatherdiv16si;
35909 goto gather_gen;
35910 case IX86_BUILTIN_SCATTERSIV16SF:
35911 icode = CODE_FOR_avx512f_scattersiv16sf;
35912 goto scatter_gen;
35913 case IX86_BUILTIN_SCATTERSIV8DF:
35914 icode = CODE_FOR_avx512f_scattersiv8df;
35915 goto scatter_gen;
35916 case IX86_BUILTIN_SCATTERDIV16SF:
35917 icode = CODE_FOR_avx512f_scatterdiv16sf;
35918 goto scatter_gen;
35919 case IX86_BUILTIN_SCATTERDIV8DF:
35920 icode = CODE_FOR_avx512f_scatterdiv8df;
35921 goto scatter_gen;
35922 case IX86_BUILTIN_SCATTERSIV16SI:
35923 icode = CODE_FOR_avx512f_scattersiv16si;
35924 goto scatter_gen;
35925 case IX86_BUILTIN_SCATTERSIV8DI:
35926 icode = CODE_FOR_avx512f_scattersiv8di;
35927 goto scatter_gen;
35928 case IX86_BUILTIN_SCATTERDIV16SI:
35929 icode = CODE_FOR_avx512f_scatterdiv16si;
35930 goto scatter_gen;
35931 case IX86_BUILTIN_SCATTERDIV8DI:
35932 icode = CODE_FOR_avx512f_scatterdiv8di;
35933 goto scatter_gen;
35934
35935 case IX86_BUILTIN_GATHERPFDPD:
35936 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35937 goto vec_prefetch_gen;
35938 case IX86_BUILTIN_GATHERPFDPS:
35939 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35940 goto vec_prefetch_gen;
35941 case IX86_BUILTIN_GATHERPFQPD:
35942 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35943 goto vec_prefetch_gen;
35944 case IX86_BUILTIN_GATHERPFQPS:
35945 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35946 goto vec_prefetch_gen;
35947 case IX86_BUILTIN_SCATTERPFDPD:
35948 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35949 goto vec_prefetch_gen;
35950 case IX86_BUILTIN_SCATTERPFDPS:
35951 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35952 goto vec_prefetch_gen;
35953 case IX86_BUILTIN_SCATTERPFQPD:
35954 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35955 goto vec_prefetch_gen;
35956 case IX86_BUILTIN_SCATTERPFQPS:
35957 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35958 goto vec_prefetch_gen;
35959
35960 gather_gen:
35961 rtx half;
35962 rtx (*gen) (rtx, rtx);
35963
35964 arg0 = CALL_EXPR_ARG (exp, 0);
35965 arg1 = CALL_EXPR_ARG (exp, 1);
35966 arg2 = CALL_EXPR_ARG (exp, 2);
35967 arg3 = CALL_EXPR_ARG (exp, 3);
35968 arg4 = CALL_EXPR_ARG (exp, 4);
35969 op0 = expand_normal (arg0);
35970 op1 = expand_normal (arg1);
35971 op2 = expand_normal (arg2);
35972 op3 = expand_normal (arg3);
35973 op4 = expand_normal (arg4);
35974 /* Note the arg order is different from the operand order. */
35975 mode0 = insn_data[icode].operand[1].mode;
35976 mode2 = insn_data[icode].operand[3].mode;
35977 mode3 = insn_data[icode].operand[4].mode;
35978 mode4 = insn_data[icode].operand[5].mode;
35979
35980 if (target == NULL_RTX
35981 || GET_MODE (target) != insn_data[icode].operand[0].mode
35982 || !insn_data[icode].operand[0].predicate (target,
35983 GET_MODE (target)))
35984 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35985 else
35986 subtarget = target;
35987
35988 switch (fcode)
35989 {
35990 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35991 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35992 half = gen_reg_rtx (V8SImode);
35993 if (!nonimmediate_operand (op2, V16SImode))
35994 op2 = copy_to_mode_reg (V16SImode, op2);
35995 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35996 op2 = half;
35997 break;
35998 case IX86_BUILTIN_GATHERALTSIV4DF:
35999 case IX86_BUILTIN_GATHERALTSIV4DI:
36000 half = gen_reg_rtx (V4SImode);
36001 if (!nonimmediate_operand (op2, V8SImode))
36002 op2 = copy_to_mode_reg (V8SImode, op2);
36003 emit_insn (gen_vec_extract_lo_v8si (half, op2));
36004 op2 = half;
36005 break;
36006 case IX86_BUILTIN_GATHER3ALTDIV16SF:
36007 case IX86_BUILTIN_GATHER3ALTDIV16SI:
36008 half = gen_reg_rtx (mode0);
36009 if (mode0 == V8SFmode)
36010 gen = gen_vec_extract_lo_v16sf;
36011 else
36012 gen = gen_vec_extract_lo_v16si;
36013 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36014 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36015 emit_insn (gen (half, op0));
36016 op0 = half;
36017 if (GET_MODE (op3) != VOIDmode)
36018 {
36019 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36020 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36021 emit_insn (gen (half, op3));
36022 op3 = half;
36023 }
36024 break;
36025 case IX86_BUILTIN_GATHERALTDIV8SF:
36026 case IX86_BUILTIN_GATHERALTDIV8SI:
36027 half = gen_reg_rtx (mode0);
36028 if (mode0 == V4SFmode)
36029 gen = gen_vec_extract_lo_v8sf;
36030 else
36031 gen = gen_vec_extract_lo_v8si;
36032 if (!nonimmediate_operand (op0, GET_MODE (op0)))
36033 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
36034 emit_insn (gen (half, op0));
36035 op0 = half;
36036 if (GET_MODE (op3) != VOIDmode)
36037 {
36038 if (!nonimmediate_operand (op3, GET_MODE (op3)))
36039 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
36040 emit_insn (gen (half, op3));
36041 op3 = half;
36042 }
36043 break;
36044 default:
36045 break;
36046 }
36047
36048 /* Force memory operand only with base register here. But we
36049 don't want to do it on memory operand for other builtin
36050 functions. */
36051 op1 = ix86_zero_extend_to_Pmode (op1);
36052
36053 if (!insn_data[icode].operand[1].predicate (op0, mode0))
36054 op0 = copy_to_mode_reg (mode0, op0);
36055 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
36056 op1 = copy_to_mode_reg (Pmode, op1);
36057 if (!insn_data[icode].operand[3].predicate (op2, mode2))
36058 op2 = copy_to_mode_reg (mode2, op2);
36059 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
36060 {
36061 if (!insn_data[icode].operand[4].predicate (op3, mode3))
36062 op3 = copy_to_mode_reg (mode3, op3);
36063 }
36064 else
36065 {
36066 op3 = copy_to_reg (op3);
36067 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
36068 }
36069 if (!insn_data[icode].operand[5].predicate (op4, mode4))
36070 {
36071 error ("the last argument must be scale 1, 2, 4, 8");
36072 return const0_rtx;
36073 }
36074
36075 /* Optimize. If mask is known to have all high bits set,
36076 replace op0 with pc_rtx to signal that the instruction
36077 overwrites the whole destination and doesn't use its
36078 previous contents. */
36079 if (optimize)
36080 {
36081 if (TREE_CODE (arg3) == INTEGER_CST)
36082 {
36083 if (integer_all_onesp (arg3))
36084 op0 = pc_rtx;
36085 }
36086 else if (TREE_CODE (arg3) == VECTOR_CST)
36087 {
36088 unsigned int negative = 0;
36089 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
36090 {
36091 tree cst = VECTOR_CST_ELT (arg3, i);
36092 if (TREE_CODE (cst) == INTEGER_CST
36093 && tree_int_cst_sign_bit (cst))
36094 negative++;
36095 else if (TREE_CODE (cst) == REAL_CST
36096 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
36097 negative++;
36098 }
36099 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
36100 op0 = pc_rtx;
36101 }
36102 else if (TREE_CODE (arg3) == SSA_NAME
36103 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
36104 {
36105 /* Recognize also when mask is like:
36106 __v2df src = _mm_setzero_pd ();
36107 __v2df mask = _mm_cmpeq_pd (src, src);
36108 or
36109 __v8sf src = _mm256_setzero_ps ();
36110 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
36111 as that is a cheaper way to load all ones into
36112 a register than having to load a constant from
36113 memory. */
36114 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
36115 if (is_gimple_call (def_stmt))
36116 {
36117 tree fndecl = gimple_call_fndecl (def_stmt);
36118 if (fndecl
36119 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
36120 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
36121 {
36122 case IX86_BUILTIN_CMPPD:
36123 case IX86_BUILTIN_CMPPS:
36124 case IX86_BUILTIN_CMPPD256:
36125 case IX86_BUILTIN_CMPPS256:
36126 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
36127 break;
36128 /* FALLTHRU */
36129 case IX86_BUILTIN_CMPEQPD:
36130 case IX86_BUILTIN_CMPEQPS:
36131 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
36132 && initializer_zerop (gimple_call_arg (def_stmt,
36133 1)))
36134 op0 = pc_rtx;
36135 break;
36136 default:
36137 break;
36138 }
36139 }
36140 }
36141 }
36142
36143 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
36144 if (! pat)
36145 return const0_rtx;
36146 emit_insn (pat);
36147
36148 switch (fcode)
36149 {
36150 case IX86_BUILTIN_GATHER3DIV16SF:
36151 if (target == NULL_RTX)
36152 target = gen_reg_rtx (V8SFmode);
36153 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
36154 break;
36155 case IX86_BUILTIN_GATHER3DIV16SI:
36156 if (target == NULL_RTX)
36157 target = gen_reg_rtx (V8SImode);
36158 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
36159 break;
36160 case IX86_BUILTIN_GATHERDIV8SF:
36161 if (target == NULL_RTX)
36162 target = gen_reg_rtx (V4SFmode);
36163 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
36164 break;
36165 case IX86_BUILTIN_GATHERDIV8SI:
36166 if (target == NULL_RTX)
36167 target = gen_reg_rtx (V4SImode);
36168 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
36169 break;
36170 default:
36171 target = subtarget;
36172 break;
36173 }
36174 return target;
36175
36176 scatter_gen:
36177 arg0 = CALL_EXPR_ARG (exp, 0);
36178 arg1 = CALL_EXPR_ARG (exp, 1);
36179 arg2 = CALL_EXPR_ARG (exp, 2);
36180 arg3 = CALL_EXPR_ARG (exp, 3);
36181 arg4 = CALL_EXPR_ARG (exp, 4);
36182 op0 = expand_normal (arg0);
36183 op1 = expand_normal (arg1);
36184 op2 = expand_normal (arg2);
36185 op3 = expand_normal (arg3);
36186 op4 = expand_normal (arg4);
36187 mode1 = insn_data[icode].operand[1].mode;
36188 mode2 = insn_data[icode].operand[2].mode;
36189 mode3 = insn_data[icode].operand[3].mode;
36190 mode4 = insn_data[icode].operand[4].mode;
36191
36192 /* Force memory operand only with base register here. But we
36193 don't want to do it on memory operand for other builtin
36194 functions. */
36195 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
36196
36197 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
36198 op0 = copy_to_mode_reg (Pmode, op0);
36199
36200 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
36201 {
36202 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36203 op1 = copy_to_mode_reg (mode1, op1);
36204 }
36205 else
36206 {
36207 op1 = copy_to_reg (op1);
36208 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
36209 }
36210
36211 if (!insn_data[icode].operand[2].predicate (op2, mode2))
36212 op2 = copy_to_mode_reg (mode2, op2);
36213
36214 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36215 op3 = copy_to_mode_reg (mode3, op3);
36216
36217 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36218 {
36219 error ("the last argument must be scale 1, 2, 4, 8");
36220 return const0_rtx;
36221 }
36222
36223 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36224 if (! pat)
36225 return const0_rtx;
36226
36227 emit_insn (pat);
36228 return 0;
36229
36230 vec_prefetch_gen:
36231 arg0 = CALL_EXPR_ARG (exp, 0);
36232 arg1 = CALL_EXPR_ARG (exp, 1);
36233 arg2 = CALL_EXPR_ARG (exp, 2);
36234 arg3 = CALL_EXPR_ARG (exp, 3);
36235 arg4 = CALL_EXPR_ARG (exp, 4);
36236 op0 = expand_normal (arg0);
36237 op1 = expand_normal (arg1);
36238 op2 = expand_normal (arg2);
36239 op3 = expand_normal (arg3);
36240 op4 = expand_normal (arg4);
36241 mode0 = insn_data[icode].operand[0].mode;
36242 mode1 = insn_data[icode].operand[1].mode;
36243 mode3 = insn_data[icode].operand[3].mode;
36244 mode4 = insn_data[icode].operand[4].mode;
36245
36246 if (GET_MODE (op0) == mode0
36247 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36248 {
36249 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36250 op0 = copy_to_mode_reg (mode0, op0);
36251 }
36252 else if (op0 != constm1_rtx)
36253 {
36254 op0 = copy_to_reg (op0);
36255 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36256 }
36257
36258 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36259 op1 = copy_to_mode_reg (mode1, op1);
36260
36261 /* Force memory operand only with base register here. But we
36262 don't want to do it on memory operand for other builtin
36263 functions. */
36264 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36265
36266 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36267 op2 = copy_to_mode_reg (Pmode, op2);
36268
36269 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36270 {
36271 error ("the forth argument must be scale 1, 2, 4, 8");
36272 return const0_rtx;
36273 }
36274
36275 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36276 {
36277 error ("incorrect hint operand");
36278 return const0_rtx;
36279 }
36280
36281 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36282 if (! pat)
36283 return const0_rtx;
36284
36285 emit_insn (pat);
36286
36287 return 0;
36288
36289 case IX86_BUILTIN_XABORT:
36290 icode = CODE_FOR_xabort;
36291 arg0 = CALL_EXPR_ARG (exp, 0);
36292 op0 = expand_normal (arg0);
36293 mode0 = insn_data[icode].operand[0].mode;
36294 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36295 {
36296 error ("the xabort's argument must be an 8-bit immediate");
36297 return const0_rtx;
36298 }
36299 emit_insn (gen_xabort (op0));
36300 return 0;
36301
36302 default:
36303 break;
36304 }
36305
36306 for (i = 0, d = bdesc_special_args;
36307 i < ARRAY_SIZE (bdesc_special_args);
36308 i++, d++)
36309 if (d->code == fcode)
36310 return ix86_expand_special_args_builtin (d, exp, target);
36311
36312 for (i = 0, d = bdesc_args;
36313 i < ARRAY_SIZE (bdesc_args);
36314 i++, d++)
36315 if (d->code == fcode)
36316 switch (fcode)
36317 {
36318 case IX86_BUILTIN_FABSQ:
36319 case IX86_BUILTIN_COPYSIGNQ:
36320 if (!TARGET_SSE)
36321 /* Emit a normal call if SSE isn't available. */
36322 return expand_call (exp, target, ignore);
36323 default:
36324 return ix86_expand_args_builtin (d, exp, target);
36325 }
36326
36327 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36328 if (d->code == fcode)
36329 return ix86_expand_sse_comi (d, exp, target);
36330
36331 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36332 if (d->code == fcode)
36333 return ix86_expand_round_builtin (d, exp, target);
36334
36335 for (i = 0, d = bdesc_pcmpestr;
36336 i < ARRAY_SIZE (bdesc_pcmpestr);
36337 i++, d++)
36338 if (d->code == fcode)
36339 return ix86_expand_sse_pcmpestr (d, exp, target);
36340
36341 for (i = 0, d = bdesc_pcmpistr;
36342 i < ARRAY_SIZE (bdesc_pcmpistr);
36343 i++, d++)
36344 if (d->code == fcode)
36345 return ix86_expand_sse_pcmpistr (d, exp, target);
36346
36347 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36348 if (d->code == fcode)
36349 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36350 (enum ix86_builtin_func_type)
36351 d->flag, d->comparison);
36352
36353 gcc_unreachable ();
36354 }
36355
36356 /* This returns the target-specific builtin with code CODE if
36357 current_function_decl has visibility on this builtin, which is checked
36358 using isa flags. Returns NULL_TREE otherwise. */
36359
36360 static tree ix86_get_builtin (enum ix86_builtins code)
36361 {
36362 struct cl_target_option *opts;
36363 tree target_tree = NULL_TREE;
36364
36365 /* Determine the isa flags of current_function_decl. */
36366
36367 if (current_function_decl)
36368 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36369
36370 if (target_tree == NULL)
36371 target_tree = target_option_default_node;
36372
36373 opts = TREE_TARGET_OPTION (target_tree);
36374
36375 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36376 return ix86_builtin_decl (code, true);
36377 else
36378 return NULL_TREE;
36379 }
36380
36381 /* Returns a function decl for a vectorized version of the builtin function
36382 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36383 if it is not available. */
36384
36385 static tree
36386 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36387 tree type_in)
36388 {
36389 enum machine_mode in_mode, out_mode;
36390 int in_n, out_n;
36391 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36392
36393 if (TREE_CODE (type_out) != VECTOR_TYPE
36394 || TREE_CODE (type_in) != VECTOR_TYPE
36395 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36396 return NULL_TREE;
36397
36398 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36399 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36400 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36401 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36402
36403 switch (fn)
36404 {
36405 case BUILT_IN_SQRT:
36406 if (out_mode == DFmode && in_mode == DFmode)
36407 {
36408 if (out_n == 2 && in_n == 2)
36409 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36410 else if (out_n == 4 && in_n == 4)
36411 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36412 else if (out_n == 8 && in_n == 8)
36413 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36414 }
36415 break;
36416
36417 case BUILT_IN_EXP2F:
36418 if (out_mode == SFmode && in_mode == SFmode)
36419 {
36420 if (out_n == 16 && in_n == 16)
36421 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36422 }
36423 break;
36424
36425 case BUILT_IN_SQRTF:
36426 if (out_mode == SFmode && in_mode == SFmode)
36427 {
36428 if (out_n == 4 && in_n == 4)
36429 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36430 else if (out_n == 8 && in_n == 8)
36431 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36432 else if (out_n == 16 && in_n == 16)
36433 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36434 }
36435 break;
36436
36437 case BUILT_IN_IFLOOR:
36438 case BUILT_IN_LFLOOR:
36439 case BUILT_IN_LLFLOOR:
36440 /* The round insn does not trap on denormals. */
36441 if (flag_trapping_math || !TARGET_ROUND)
36442 break;
36443
36444 if (out_mode == SImode && in_mode == DFmode)
36445 {
36446 if (out_n == 4 && in_n == 2)
36447 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36448 else if (out_n == 8 && in_n == 4)
36449 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36450 else if (out_n == 16 && in_n == 8)
36451 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36452 }
36453 break;
36454
36455 case BUILT_IN_IFLOORF:
36456 case BUILT_IN_LFLOORF:
36457 case BUILT_IN_LLFLOORF:
36458 /* The round insn does not trap on denormals. */
36459 if (flag_trapping_math || !TARGET_ROUND)
36460 break;
36461
36462 if (out_mode == SImode && in_mode == SFmode)
36463 {
36464 if (out_n == 4 && in_n == 4)
36465 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36466 else if (out_n == 8 && in_n == 8)
36467 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36468 }
36469 break;
36470
36471 case BUILT_IN_ICEIL:
36472 case BUILT_IN_LCEIL:
36473 case BUILT_IN_LLCEIL:
36474 /* The round insn does not trap on denormals. */
36475 if (flag_trapping_math || !TARGET_ROUND)
36476 break;
36477
36478 if (out_mode == SImode && in_mode == DFmode)
36479 {
36480 if (out_n == 4 && in_n == 2)
36481 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36482 else if (out_n == 8 && in_n == 4)
36483 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36484 else if (out_n == 16 && in_n == 8)
36485 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36486 }
36487 break;
36488
36489 case BUILT_IN_ICEILF:
36490 case BUILT_IN_LCEILF:
36491 case BUILT_IN_LLCEILF:
36492 /* The round insn does not trap on denormals. */
36493 if (flag_trapping_math || !TARGET_ROUND)
36494 break;
36495
36496 if (out_mode == SImode && in_mode == SFmode)
36497 {
36498 if (out_n == 4 && in_n == 4)
36499 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36500 else if (out_n == 8 && in_n == 8)
36501 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36502 }
36503 break;
36504
36505 case BUILT_IN_IRINT:
36506 case BUILT_IN_LRINT:
36507 case BUILT_IN_LLRINT:
36508 if (out_mode == SImode && in_mode == DFmode)
36509 {
36510 if (out_n == 4 && in_n == 2)
36511 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36512 else if (out_n == 8 && in_n == 4)
36513 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36514 }
36515 break;
36516
36517 case BUILT_IN_IRINTF:
36518 case BUILT_IN_LRINTF:
36519 case BUILT_IN_LLRINTF:
36520 if (out_mode == SImode && in_mode == SFmode)
36521 {
36522 if (out_n == 4 && in_n == 4)
36523 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36524 else if (out_n == 8 && in_n == 8)
36525 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36526 }
36527 break;
36528
36529 case BUILT_IN_IROUND:
36530 case BUILT_IN_LROUND:
36531 case BUILT_IN_LLROUND:
36532 /* The round insn does not trap on denormals. */
36533 if (flag_trapping_math || !TARGET_ROUND)
36534 break;
36535
36536 if (out_mode == SImode && in_mode == DFmode)
36537 {
36538 if (out_n == 4 && in_n == 2)
36539 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36540 else if (out_n == 8 && in_n == 4)
36541 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36542 else if (out_n == 16 && in_n == 8)
36543 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36544 }
36545 break;
36546
36547 case BUILT_IN_IROUNDF:
36548 case BUILT_IN_LROUNDF:
36549 case BUILT_IN_LLROUNDF:
36550 /* The round insn does not trap on denormals. */
36551 if (flag_trapping_math || !TARGET_ROUND)
36552 break;
36553
36554 if (out_mode == SImode && in_mode == SFmode)
36555 {
36556 if (out_n == 4 && in_n == 4)
36557 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36558 else if (out_n == 8 && in_n == 8)
36559 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36560 }
36561 break;
36562
36563 case BUILT_IN_COPYSIGN:
36564 if (out_mode == DFmode && in_mode == DFmode)
36565 {
36566 if (out_n == 2 && in_n == 2)
36567 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36568 else if (out_n == 4 && in_n == 4)
36569 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36570 else if (out_n == 8 && in_n == 8)
36571 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36572 }
36573 break;
36574
36575 case BUILT_IN_COPYSIGNF:
36576 if (out_mode == SFmode && in_mode == SFmode)
36577 {
36578 if (out_n == 4 && in_n == 4)
36579 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36580 else if (out_n == 8 && in_n == 8)
36581 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36582 else if (out_n == 16 && in_n == 16)
36583 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36584 }
36585 break;
36586
36587 case BUILT_IN_FLOOR:
36588 /* The round insn does not trap on denormals. */
36589 if (flag_trapping_math || !TARGET_ROUND)
36590 break;
36591
36592 if (out_mode == DFmode && in_mode == DFmode)
36593 {
36594 if (out_n == 2 && in_n == 2)
36595 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36596 else if (out_n == 4 && in_n == 4)
36597 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36598 }
36599 break;
36600
36601 case BUILT_IN_FLOORF:
36602 /* The round insn does not trap on denormals. */
36603 if (flag_trapping_math || !TARGET_ROUND)
36604 break;
36605
36606 if (out_mode == SFmode && in_mode == SFmode)
36607 {
36608 if (out_n == 4 && in_n == 4)
36609 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36610 else if (out_n == 8 && in_n == 8)
36611 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36612 }
36613 break;
36614
36615 case BUILT_IN_CEIL:
36616 /* The round insn does not trap on denormals. */
36617 if (flag_trapping_math || !TARGET_ROUND)
36618 break;
36619
36620 if (out_mode == DFmode && in_mode == DFmode)
36621 {
36622 if (out_n == 2 && in_n == 2)
36623 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36624 else if (out_n == 4 && in_n == 4)
36625 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36626 }
36627 break;
36628
36629 case BUILT_IN_CEILF:
36630 /* The round insn does not trap on denormals. */
36631 if (flag_trapping_math || !TARGET_ROUND)
36632 break;
36633
36634 if (out_mode == SFmode && in_mode == SFmode)
36635 {
36636 if (out_n == 4 && in_n == 4)
36637 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36638 else if (out_n == 8 && in_n == 8)
36639 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36640 }
36641 break;
36642
36643 case BUILT_IN_TRUNC:
36644 /* The round insn does not trap on denormals. */
36645 if (flag_trapping_math || !TARGET_ROUND)
36646 break;
36647
36648 if (out_mode == DFmode && in_mode == DFmode)
36649 {
36650 if (out_n == 2 && in_n == 2)
36651 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36652 else if (out_n == 4 && in_n == 4)
36653 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36654 }
36655 break;
36656
36657 case BUILT_IN_TRUNCF:
36658 /* The round insn does not trap on denormals. */
36659 if (flag_trapping_math || !TARGET_ROUND)
36660 break;
36661
36662 if (out_mode == SFmode && in_mode == SFmode)
36663 {
36664 if (out_n == 4 && in_n == 4)
36665 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36666 else if (out_n == 8 && in_n == 8)
36667 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36668 }
36669 break;
36670
36671 case BUILT_IN_RINT:
36672 /* The round insn does not trap on denormals. */
36673 if (flag_trapping_math || !TARGET_ROUND)
36674 break;
36675
36676 if (out_mode == DFmode && in_mode == DFmode)
36677 {
36678 if (out_n == 2 && in_n == 2)
36679 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36680 else if (out_n == 4 && in_n == 4)
36681 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36682 }
36683 break;
36684
36685 case BUILT_IN_RINTF:
36686 /* The round insn does not trap on denormals. */
36687 if (flag_trapping_math || !TARGET_ROUND)
36688 break;
36689
36690 if (out_mode == SFmode && in_mode == SFmode)
36691 {
36692 if (out_n == 4 && in_n == 4)
36693 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36694 else if (out_n == 8 && in_n == 8)
36695 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36696 }
36697 break;
36698
36699 case BUILT_IN_ROUND:
36700 /* The round insn does not trap on denormals. */
36701 if (flag_trapping_math || !TARGET_ROUND)
36702 break;
36703
36704 if (out_mode == DFmode && in_mode == DFmode)
36705 {
36706 if (out_n == 2 && in_n == 2)
36707 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36708 else if (out_n == 4 && in_n == 4)
36709 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36710 }
36711 break;
36712
36713 case BUILT_IN_ROUNDF:
36714 /* The round insn does not trap on denormals. */
36715 if (flag_trapping_math || !TARGET_ROUND)
36716 break;
36717
36718 if (out_mode == SFmode && in_mode == SFmode)
36719 {
36720 if (out_n == 4 && in_n == 4)
36721 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36722 else if (out_n == 8 && in_n == 8)
36723 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36724 }
36725 break;
36726
36727 case BUILT_IN_FMA:
36728 if (out_mode == DFmode && in_mode == DFmode)
36729 {
36730 if (out_n == 2 && in_n == 2)
36731 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36732 if (out_n == 4 && in_n == 4)
36733 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36734 }
36735 break;
36736
36737 case BUILT_IN_FMAF:
36738 if (out_mode == SFmode && in_mode == SFmode)
36739 {
36740 if (out_n == 4 && in_n == 4)
36741 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36742 if (out_n == 8 && in_n == 8)
36743 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36744 }
36745 break;
36746
36747 default:
36748 break;
36749 }
36750
36751 /* Dispatch to a handler for a vectorization library. */
36752 if (ix86_veclib_handler)
36753 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36754 type_in);
36755
36756 return NULL_TREE;
36757 }
36758
36759 /* Handler for an SVML-style interface to
36760 a library with vectorized intrinsics. */
36761
36762 static tree
36763 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36764 {
36765 char name[20];
36766 tree fntype, new_fndecl, args;
36767 unsigned arity;
36768 const char *bname;
36769 enum machine_mode el_mode, in_mode;
36770 int n, in_n;
36771
36772 /* The SVML is suitable for unsafe math only. */
36773 if (!flag_unsafe_math_optimizations)
36774 return NULL_TREE;
36775
36776 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36777 n = TYPE_VECTOR_SUBPARTS (type_out);
36778 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36779 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36780 if (el_mode != in_mode
36781 || n != in_n)
36782 return NULL_TREE;
36783
36784 switch (fn)
36785 {
36786 case BUILT_IN_EXP:
36787 case BUILT_IN_LOG:
36788 case BUILT_IN_LOG10:
36789 case BUILT_IN_POW:
36790 case BUILT_IN_TANH:
36791 case BUILT_IN_TAN:
36792 case BUILT_IN_ATAN:
36793 case BUILT_IN_ATAN2:
36794 case BUILT_IN_ATANH:
36795 case BUILT_IN_CBRT:
36796 case BUILT_IN_SINH:
36797 case BUILT_IN_SIN:
36798 case BUILT_IN_ASINH:
36799 case BUILT_IN_ASIN:
36800 case BUILT_IN_COSH:
36801 case BUILT_IN_COS:
36802 case BUILT_IN_ACOSH:
36803 case BUILT_IN_ACOS:
36804 if (el_mode != DFmode || n != 2)
36805 return NULL_TREE;
36806 break;
36807
36808 case BUILT_IN_EXPF:
36809 case BUILT_IN_LOGF:
36810 case BUILT_IN_LOG10F:
36811 case BUILT_IN_POWF:
36812 case BUILT_IN_TANHF:
36813 case BUILT_IN_TANF:
36814 case BUILT_IN_ATANF:
36815 case BUILT_IN_ATAN2F:
36816 case BUILT_IN_ATANHF:
36817 case BUILT_IN_CBRTF:
36818 case BUILT_IN_SINHF:
36819 case BUILT_IN_SINF:
36820 case BUILT_IN_ASINHF:
36821 case BUILT_IN_ASINF:
36822 case BUILT_IN_COSHF:
36823 case BUILT_IN_COSF:
36824 case BUILT_IN_ACOSHF:
36825 case BUILT_IN_ACOSF:
36826 if (el_mode != SFmode || n != 4)
36827 return NULL_TREE;
36828 break;
36829
36830 default:
36831 return NULL_TREE;
36832 }
36833
36834 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36835
36836 if (fn == BUILT_IN_LOGF)
36837 strcpy (name, "vmlsLn4");
36838 else if (fn == BUILT_IN_LOG)
36839 strcpy (name, "vmldLn2");
36840 else if (n == 4)
36841 {
36842 sprintf (name, "vmls%s", bname+10);
36843 name[strlen (name)-1] = '4';
36844 }
36845 else
36846 sprintf (name, "vmld%s2", bname+10);
36847
36848 /* Convert to uppercase. */
36849 name[4] &= ~0x20;
36850
36851 arity = 0;
36852 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36853 args;
36854 args = TREE_CHAIN (args))
36855 arity++;
36856
36857 if (arity == 1)
36858 fntype = build_function_type_list (type_out, type_in, NULL);
36859 else
36860 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36861
36862 /* Build a function declaration for the vectorized function. */
36863 new_fndecl = build_decl (BUILTINS_LOCATION,
36864 FUNCTION_DECL, get_identifier (name), fntype);
36865 TREE_PUBLIC (new_fndecl) = 1;
36866 DECL_EXTERNAL (new_fndecl) = 1;
36867 DECL_IS_NOVOPS (new_fndecl) = 1;
36868 TREE_READONLY (new_fndecl) = 1;
36869
36870 return new_fndecl;
36871 }
36872
36873 /* Handler for an ACML-style interface to
36874 a library with vectorized intrinsics. */
36875
36876 static tree
36877 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36878 {
36879 char name[20] = "__vr.._";
36880 tree fntype, new_fndecl, args;
36881 unsigned arity;
36882 const char *bname;
36883 enum machine_mode el_mode, in_mode;
36884 int n, in_n;
36885
36886 /* The ACML is 64bits only and suitable for unsafe math only as
36887 it does not correctly support parts of IEEE with the required
36888 precision such as denormals. */
36889 if (!TARGET_64BIT
36890 || !flag_unsafe_math_optimizations)
36891 return NULL_TREE;
36892
36893 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36894 n = TYPE_VECTOR_SUBPARTS (type_out);
36895 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36896 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36897 if (el_mode != in_mode
36898 || n != in_n)
36899 return NULL_TREE;
36900
36901 switch (fn)
36902 {
36903 case BUILT_IN_SIN:
36904 case BUILT_IN_COS:
36905 case BUILT_IN_EXP:
36906 case BUILT_IN_LOG:
36907 case BUILT_IN_LOG2:
36908 case BUILT_IN_LOG10:
36909 name[4] = 'd';
36910 name[5] = '2';
36911 if (el_mode != DFmode
36912 || n != 2)
36913 return NULL_TREE;
36914 break;
36915
36916 case BUILT_IN_SINF:
36917 case BUILT_IN_COSF:
36918 case BUILT_IN_EXPF:
36919 case BUILT_IN_POWF:
36920 case BUILT_IN_LOGF:
36921 case BUILT_IN_LOG2F:
36922 case BUILT_IN_LOG10F:
36923 name[4] = 's';
36924 name[5] = '4';
36925 if (el_mode != SFmode
36926 || n != 4)
36927 return NULL_TREE;
36928 break;
36929
36930 default:
36931 return NULL_TREE;
36932 }
36933
36934 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36935 sprintf (name + 7, "%s", bname+10);
36936
36937 arity = 0;
36938 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36939 args;
36940 args = TREE_CHAIN (args))
36941 arity++;
36942
36943 if (arity == 1)
36944 fntype = build_function_type_list (type_out, type_in, NULL);
36945 else
36946 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36947
36948 /* Build a function declaration for the vectorized function. */
36949 new_fndecl = build_decl (BUILTINS_LOCATION,
36950 FUNCTION_DECL, get_identifier (name), fntype);
36951 TREE_PUBLIC (new_fndecl) = 1;
36952 DECL_EXTERNAL (new_fndecl) = 1;
36953 DECL_IS_NOVOPS (new_fndecl) = 1;
36954 TREE_READONLY (new_fndecl) = 1;
36955
36956 return new_fndecl;
36957 }
36958
36959 /* Returns a decl of a function that implements gather load with
36960 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36961 Return NULL_TREE if it is not available. */
36962
36963 static tree
36964 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36965 const_tree index_type, int scale)
36966 {
36967 bool si;
36968 enum ix86_builtins code;
36969
36970 if (! TARGET_AVX2)
36971 return NULL_TREE;
36972
36973 if ((TREE_CODE (index_type) != INTEGER_TYPE
36974 && !POINTER_TYPE_P (index_type))
36975 || (TYPE_MODE (index_type) != SImode
36976 && TYPE_MODE (index_type) != DImode))
36977 return NULL_TREE;
36978
36979 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36980 return NULL_TREE;
36981
36982 /* v*gather* insn sign extends index to pointer mode. */
36983 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36984 && TYPE_UNSIGNED (index_type))
36985 return NULL_TREE;
36986
36987 if (scale <= 0
36988 || scale > 8
36989 || (scale & (scale - 1)) != 0)
36990 return NULL_TREE;
36991
36992 si = TYPE_MODE (index_type) == SImode;
36993 switch (TYPE_MODE (mem_vectype))
36994 {
36995 case V2DFmode:
36996 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36997 break;
36998 case V4DFmode:
36999 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
37000 break;
37001 case V2DImode:
37002 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
37003 break;
37004 case V4DImode:
37005 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
37006 break;
37007 case V4SFmode:
37008 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
37009 break;
37010 case V8SFmode:
37011 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
37012 break;
37013 case V4SImode:
37014 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
37015 break;
37016 case V8SImode:
37017 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
37018 break;
37019 case V8DFmode:
37020 if (TARGET_AVX512F)
37021 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
37022 else
37023 return NULL_TREE;
37024 break;
37025 case V8DImode:
37026 if (TARGET_AVX512F)
37027 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
37028 else
37029 return NULL_TREE;
37030 break;
37031 case V16SFmode:
37032 if (TARGET_AVX512F)
37033 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
37034 else
37035 return NULL_TREE;
37036 break;
37037 case V16SImode:
37038 if (TARGET_AVX512F)
37039 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
37040 else
37041 return NULL_TREE;
37042 break;
37043 default:
37044 return NULL_TREE;
37045 }
37046
37047 return ix86_get_builtin (code);
37048 }
37049
37050 /* Returns a code for a target-specific builtin that implements
37051 reciprocal of the function, or NULL_TREE if not available. */
37052
37053 static tree
37054 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
37055 {
37056 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
37057 && flag_finite_math_only && !flag_trapping_math
37058 && flag_unsafe_math_optimizations))
37059 return NULL_TREE;
37060
37061 if (md_fn)
37062 /* Machine dependent builtins. */
37063 switch (fn)
37064 {
37065 /* Vectorized version of sqrt to rsqrt conversion. */
37066 case IX86_BUILTIN_SQRTPS_NR:
37067 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
37068
37069 case IX86_BUILTIN_SQRTPS_NR256:
37070 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
37071
37072 default:
37073 return NULL_TREE;
37074 }
37075 else
37076 /* Normal builtins. */
37077 switch (fn)
37078 {
37079 /* Sqrt to rsqrt conversion. */
37080 case BUILT_IN_SQRTF:
37081 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
37082
37083 default:
37084 return NULL_TREE;
37085 }
37086 }
37087 \f
37088 /* Helper for avx_vpermilps256_operand et al. This is also used by
37089 the expansion functions to turn the parallel back into a mask.
37090 The return value is 0 for no match and the imm8+1 for a match. */
37091
37092 int
37093 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
37094 {
37095 unsigned i, nelt = GET_MODE_NUNITS (mode);
37096 unsigned mask = 0;
37097 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
37098
37099 if (XVECLEN (par, 0) != (int) nelt)
37100 return 0;
37101
37102 /* Validate that all of the elements are constants, and not totally
37103 out of range. Copy the data into an integral array to make the
37104 subsequent checks easier. */
37105 for (i = 0; i < nelt; ++i)
37106 {
37107 rtx er = XVECEXP (par, 0, i);
37108 unsigned HOST_WIDE_INT ei;
37109
37110 if (!CONST_INT_P (er))
37111 return 0;
37112 ei = INTVAL (er);
37113 if (ei >= nelt)
37114 return 0;
37115 ipar[i] = ei;
37116 }
37117
37118 switch (mode)
37119 {
37120 case V8DFmode:
37121 /* In the 512-bit DFmode case, we can only move elements within
37122 a 128-bit lane. First fill the second part of the mask,
37123 then fallthru. */
37124 for (i = 4; i < 6; ++i)
37125 {
37126 if (ipar[i] < 4 || ipar[i] >= 6)
37127 return 0;
37128 mask |= (ipar[i] - 4) << i;
37129 }
37130 for (i = 6; i < 8; ++i)
37131 {
37132 if (ipar[i] < 6)
37133 return 0;
37134 mask |= (ipar[i] - 6) << i;
37135 }
37136 /* FALLTHRU */
37137
37138 case V4DFmode:
37139 /* In the 256-bit DFmode case, we can only move elements within
37140 a 128-bit lane. */
37141 for (i = 0; i < 2; ++i)
37142 {
37143 if (ipar[i] >= 2)
37144 return 0;
37145 mask |= ipar[i] << i;
37146 }
37147 for (i = 2; i < 4; ++i)
37148 {
37149 if (ipar[i] < 2)
37150 return 0;
37151 mask |= (ipar[i] - 2) << i;
37152 }
37153 break;
37154
37155 case V16SFmode:
37156 /* In 512 bit SFmode case, permutation in the upper 256 bits
37157 must mirror the permutation in the lower 256-bits. */
37158 for (i = 0; i < 8; ++i)
37159 if (ipar[i] + 8 != ipar[i + 8])
37160 return 0;
37161 /* FALLTHRU */
37162
37163 case V8SFmode:
37164 /* In 256 bit SFmode case, we have full freedom of
37165 movement within the low 128-bit lane, but the high 128-bit
37166 lane must mirror the exact same pattern. */
37167 for (i = 0; i < 4; ++i)
37168 if (ipar[i] + 4 != ipar[i + 4])
37169 return 0;
37170 nelt = 4;
37171 /* FALLTHRU */
37172
37173 case V2DFmode:
37174 case V4SFmode:
37175 /* In the 128-bit case, we've full freedom in the placement of
37176 the elements from the source operand. */
37177 for (i = 0; i < nelt; ++i)
37178 mask |= ipar[i] << (i * (nelt / 2));
37179 break;
37180
37181 default:
37182 gcc_unreachable ();
37183 }
37184
37185 /* Make sure success has a non-zero value by adding one. */
37186 return mask + 1;
37187 }
37188
37189 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
37190 the expansion functions to turn the parallel back into a mask.
37191 The return value is 0 for no match and the imm8+1 for a match. */
37192
37193 int
37194 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
37195 {
37196 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
37197 unsigned mask = 0;
37198 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
37199
37200 if (XVECLEN (par, 0) != (int) nelt)
37201 return 0;
37202
37203 /* Validate that all of the elements are constants, and not totally
37204 out of range. Copy the data into an integral array to make the
37205 subsequent checks easier. */
37206 for (i = 0; i < nelt; ++i)
37207 {
37208 rtx er = XVECEXP (par, 0, i);
37209 unsigned HOST_WIDE_INT ei;
37210
37211 if (!CONST_INT_P (er))
37212 return 0;
37213 ei = INTVAL (er);
37214 if (ei >= 2 * nelt)
37215 return 0;
37216 ipar[i] = ei;
37217 }
37218
37219 /* Validate that the halves of the permute are halves. */
37220 for (i = 0; i < nelt2 - 1; ++i)
37221 if (ipar[i] + 1 != ipar[i + 1])
37222 return 0;
37223 for (i = nelt2; i < nelt - 1; ++i)
37224 if (ipar[i] + 1 != ipar[i + 1])
37225 return 0;
37226
37227 /* Reconstruct the mask. */
37228 for (i = 0; i < 2; ++i)
37229 {
37230 unsigned e = ipar[i * nelt2];
37231 if (e % nelt2)
37232 return 0;
37233 e /= nelt2;
37234 mask |= e << (i * 4);
37235 }
37236
37237 /* Make sure success has a non-zero value by adding one. */
37238 return mask + 1;
37239 }
37240 \f
37241 /* Return a register priority for hard reg REGNO. */
37242 static int
37243 ix86_register_priority (int hard_regno)
37244 {
37245 /* ebp and r13 as the base always wants a displacement, r12 as the
37246 base always wants an index. So discourage their usage in an
37247 address. */
37248 if (hard_regno == R12_REG || hard_regno == R13_REG)
37249 return 0;
37250 if (hard_regno == BP_REG)
37251 return 1;
37252 /* New x86-64 int registers result in bigger code size. Discourage
37253 them. */
37254 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37255 return 2;
37256 /* New x86-64 SSE registers result in bigger code size. Discourage
37257 them. */
37258 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37259 return 2;
37260 /* Usage of AX register results in smaller code. Prefer it. */
37261 if (hard_regno == 0)
37262 return 4;
37263 return 3;
37264 }
37265
37266 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37267
37268 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37269 QImode must go into class Q_REGS.
37270 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37271 movdf to do mem-to-mem moves through integer regs. */
37272
37273 static reg_class_t
37274 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37275 {
37276 enum machine_mode mode = GET_MODE (x);
37277
37278 /* We're only allowed to return a subclass of CLASS. Many of the
37279 following checks fail for NO_REGS, so eliminate that early. */
37280 if (regclass == NO_REGS)
37281 return NO_REGS;
37282
37283 /* All classes can load zeros. */
37284 if (x == CONST0_RTX (mode))
37285 return regclass;
37286
37287 /* Force constants into memory if we are loading a (nonzero) constant into
37288 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37289 instructions to load from a constant. */
37290 if (CONSTANT_P (x)
37291 && (MAYBE_MMX_CLASS_P (regclass)
37292 || MAYBE_SSE_CLASS_P (regclass)
37293 || MAYBE_MASK_CLASS_P (regclass)))
37294 return NO_REGS;
37295
37296 /* Prefer SSE regs only, if we can use them for math. */
37297 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37298 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37299
37300 /* Floating-point constants need more complex checks. */
37301 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37302 {
37303 /* General regs can load everything. */
37304 if (reg_class_subset_p (regclass, GENERAL_REGS))
37305 return regclass;
37306
37307 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37308 zero above. We only want to wind up preferring 80387 registers if
37309 we plan on doing computation with them. */
37310 if (TARGET_80387
37311 && standard_80387_constant_p (x) > 0)
37312 {
37313 /* Limit class to non-sse. */
37314 if (regclass == FLOAT_SSE_REGS)
37315 return FLOAT_REGS;
37316 if (regclass == FP_TOP_SSE_REGS)
37317 return FP_TOP_REG;
37318 if (regclass == FP_SECOND_SSE_REGS)
37319 return FP_SECOND_REG;
37320 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37321 return regclass;
37322 }
37323
37324 return NO_REGS;
37325 }
37326
37327 /* Generally when we see PLUS here, it's the function invariant
37328 (plus soft-fp const_int). Which can only be computed into general
37329 regs. */
37330 if (GET_CODE (x) == PLUS)
37331 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37332
37333 /* QImode constants are easy to load, but non-constant QImode data
37334 must go into Q_REGS. */
37335 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37336 {
37337 if (reg_class_subset_p (regclass, Q_REGS))
37338 return regclass;
37339 if (reg_class_subset_p (Q_REGS, regclass))
37340 return Q_REGS;
37341 return NO_REGS;
37342 }
37343
37344 return regclass;
37345 }
37346
37347 /* Discourage putting floating-point values in SSE registers unless
37348 SSE math is being used, and likewise for the 387 registers. */
37349 static reg_class_t
37350 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37351 {
37352 enum machine_mode mode = GET_MODE (x);
37353
37354 /* Restrict the output reload class to the register bank that we are doing
37355 math on. If we would like not to return a subset of CLASS, reject this
37356 alternative: if reload cannot do this, it will still use its choice. */
37357 mode = GET_MODE (x);
37358 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37359 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37360
37361 if (X87_FLOAT_MODE_P (mode))
37362 {
37363 if (regclass == FP_TOP_SSE_REGS)
37364 return FP_TOP_REG;
37365 else if (regclass == FP_SECOND_SSE_REGS)
37366 return FP_SECOND_REG;
37367 else
37368 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37369 }
37370
37371 return regclass;
37372 }
37373
37374 static reg_class_t
37375 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37376 enum machine_mode mode, secondary_reload_info *sri)
37377 {
37378 /* Double-word spills from general registers to non-offsettable memory
37379 references (zero-extended addresses) require special handling. */
37380 if (TARGET_64BIT
37381 && MEM_P (x)
37382 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37383 && INTEGER_CLASS_P (rclass)
37384 && !offsettable_memref_p (x))
37385 {
37386 sri->icode = (in_p
37387 ? CODE_FOR_reload_noff_load
37388 : CODE_FOR_reload_noff_store);
37389 /* Add the cost of moving address to a temporary. */
37390 sri->extra_cost = 1;
37391
37392 return NO_REGS;
37393 }
37394
37395 /* QImode spills from non-QI registers require
37396 intermediate register on 32bit targets. */
37397 if (mode == QImode
37398 && (MAYBE_MASK_CLASS_P (rclass)
37399 || (!TARGET_64BIT && !in_p
37400 && INTEGER_CLASS_P (rclass)
37401 && MAYBE_NON_Q_CLASS_P (rclass))))
37402 {
37403 int regno;
37404
37405 if (REG_P (x))
37406 regno = REGNO (x);
37407 else
37408 regno = -1;
37409
37410 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37411 regno = true_regnum (x);
37412
37413 /* Return Q_REGS if the operand is in memory. */
37414 if (regno == -1)
37415 return Q_REGS;
37416 }
37417
37418 /* This condition handles corner case where an expression involving
37419 pointers gets vectorized. We're trying to use the address of a
37420 stack slot as a vector initializer.
37421
37422 (set (reg:V2DI 74 [ vect_cst_.2 ])
37423 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37424
37425 Eventually frame gets turned into sp+offset like this:
37426
37427 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37428 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37429 (const_int 392 [0x188]))))
37430
37431 That later gets turned into:
37432
37433 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37434 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37435 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37436
37437 We'll have the following reload recorded:
37438
37439 Reload 0: reload_in (DI) =
37440 (plus:DI (reg/f:DI 7 sp)
37441 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37442 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37443 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37444 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37445 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37446 reload_reg_rtx: (reg:V2DI 22 xmm1)
37447
37448 Which isn't going to work since SSE instructions can't handle scalar
37449 additions. Returning GENERAL_REGS forces the addition into integer
37450 register and reload can handle subsequent reloads without problems. */
37451
37452 if (in_p && GET_CODE (x) == PLUS
37453 && SSE_CLASS_P (rclass)
37454 && SCALAR_INT_MODE_P (mode))
37455 return GENERAL_REGS;
37456
37457 return NO_REGS;
37458 }
37459
37460 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37461
37462 static bool
37463 ix86_class_likely_spilled_p (reg_class_t rclass)
37464 {
37465 switch (rclass)
37466 {
37467 case AREG:
37468 case DREG:
37469 case CREG:
37470 case BREG:
37471 case AD_REGS:
37472 case SIREG:
37473 case DIREG:
37474 case SSE_FIRST_REG:
37475 case FP_TOP_REG:
37476 case FP_SECOND_REG:
37477 return true;
37478
37479 default:
37480 break;
37481 }
37482
37483 return false;
37484 }
37485
37486 /* If we are copying between general and FP registers, we need a memory
37487 location. The same is true for SSE and MMX registers.
37488
37489 To optimize register_move_cost performance, allow inline variant.
37490
37491 The macro can't work reliably when one of the CLASSES is class containing
37492 registers from multiple units (SSE, MMX, integer). We avoid this by never
37493 combining those units in single alternative in the machine description.
37494 Ensure that this constraint holds to avoid unexpected surprises.
37495
37496 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37497 enforce these sanity checks. */
37498
37499 static inline bool
37500 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37501 enum machine_mode mode, int strict)
37502 {
37503 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37504 return false;
37505 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37506 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37507 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37508 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37509 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37510 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37511 {
37512 gcc_assert (!strict || lra_in_progress);
37513 return true;
37514 }
37515
37516 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37517 return true;
37518
37519 /* Between mask and general, we have moves no larger than word size. */
37520 if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
37521 && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
37522 return true;
37523
37524 /* ??? This is a lie. We do have moves between mmx/general, and for
37525 mmx/sse2. But by saying we need secondary memory we discourage the
37526 register allocator from using the mmx registers unless needed. */
37527 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37528 return true;
37529
37530 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37531 {
37532 /* SSE1 doesn't have any direct moves from other classes. */
37533 if (!TARGET_SSE2)
37534 return true;
37535
37536 /* If the target says that inter-unit moves are more expensive
37537 than moving through memory, then don't generate them. */
37538 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37539 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37540 return true;
37541
37542 /* Between SSE and general, we have moves no larger than word size. */
37543 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37544 return true;
37545 }
37546
37547 return false;
37548 }
37549
37550 bool
37551 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37552 enum machine_mode mode, int strict)
37553 {
37554 return inline_secondary_memory_needed (class1, class2, mode, strict);
37555 }
37556
37557 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37558
37559 On the 80386, this is the size of MODE in words,
37560 except in the FP regs, where a single reg is always enough. */
37561
37562 static unsigned char
37563 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37564 {
37565 if (MAYBE_INTEGER_CLASS_P (rclass))
37566 {
37567 if (mode == XFmode)
37568 return (TARGET_64BIT ? 2 : 3);
37569 else if (mode == XCmode)
37570 return (TARGET_64BIT ? 4 : 6);
37571 else
37572 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37573 }
37574 else
37575 {
37576 if (COMPLEX_MODE_P (mode))
37577 return 2;
37578 else
37579 return 1;
37580 }
37581 }
37582
37583 /* Return true if the registers in CLASS cannot represent the change from
37584 modes FROM to TO. */
37585
37586 bool
37587 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37588 enum reg_class regclass)
37589 {
37590 if (from == to)
37591 return false;
37592
37593 /* x87 registers can't do subreg at all, as all values are reformatted
37594 to extended precision. */
37595 if (MAYBE_FLOAT_CLASS_P (regclass))
37596 return true;
37597
37598 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37599 {
37600 /* Vector registers do not support QI or HImode loads. If we don't
37601 disallow a change to these modes, reload will assume it's ok to
37602 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37603 the vec_dupv4hi pattern. */
37604 if (GET_MODE_SIZE (from) < 4)
37605 return true;
37606 }
37607
37608 return false;
37609 }
37610
37611 /* Return the cost of moving data of mode M between a
37612 register and memory. A value of 2 is the default; this cost is
37613 relative to those in `REGISTER_MOVE_COST'.
37614
37615 This function is used extensively by register_move_cost that is used to
37616 build tables at startup. Make it inline in this case.
37617 When IN is 2, return maximum of in and out move cost.
37618
37619 If moving between registers and memory is more expensive than
37620 between two registers, you should define this macro to express the
37621 relative cost.
37622
37623 Model also increased moving costs of QImode registers in non
37624 Q_REGS classes.
37625 */
37626 static inline int
37627 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37628 int in)
37629 {
37630 int cost;
37631 if (FLOAT_CLASS_P (regclass))
37632 {
37633 int index;
37634 switch (mode)
37635 {
37636 case SFmode:
37637 index = 0;
37638 break;
37639 case DFmode:
37640 index = 1;
37641 break;
37642 case XFmode:
37643 index = 2;
37644 break;
37645 default:
37646 return 100;
37647 }
37648 if (in == 2)
37649 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37650 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37651 }
37652 if (SSE_CLASS_P (regclass))
37653 {
37654 int index;
37655 switch (GET_MODE_SIZE (mode))
37656 {
37657 case 4:
37658 index = 0;
37659 break;
37660 case 8:
37661 index = 1;
37662 break;
37663 case 16:
37664 index = 2;
37665 break;
37666 default:
37667 return 100;
37668 }
37669 if (in == 2)
37670 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37671 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37672 }
37673 if (MMX_CLASS_P (regclass))
37674 {
37675 int index;
37676 switch (GET_MODE_SIZE (mode))
37677 {
37678 case 4:
37679 index = 0;
37680 break;
37681 case 8:
37682 index = 1;
37683 break;
37684 default:
37685 return 100;
37686 }
37687 if (in)
37688 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37689 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37690 }
37691 switch (GET_MODE_SIZE (mode))
37692 {
37693 case 1:
37694 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37695 {
37696 if (!in)
37697 return ix86_cost->int_store[0];
37698 if (TARGET_PARTIAL_REG_DEPENDENCY
37699 && optimize_function_for_speed_p (cfun))
37700 cost = ix86_cost->movzbl_load;
37701 else
37702 cost = ix86_cost->int_load[0];
37703 if (in == 2)
37704 return MAX (cost, ix86_cost->int_store[0]);
37705 return cost;
37706 }
37707 else
37708 {
37709 if (in == 2)
37710 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37711 if (in)
37712 return ix86_cost->movzbl_load;
37713 else
37714 return ix86_cost->int_store[0] + 4;
37715 }
37716 break;
37717 case 2:
37718 if (in == 2)
37719 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37720 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37721 default:
37722 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37723 if (mode == TFmode)
37724 mode = XFmode;
37725 if (in == 2)
37726 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37727 else if (in)
37728 cost = ix86_cost->int_load[2];
37729 else
37730 cost = ix86_cost->int_store[2];
37731 return (cost * (((int) GET_MODE_SIZE (mode)
37732 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37733 }
37734 }
37735
37736 static int
37737 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37738 bool in)
37739 {
37740 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37741 }
37742
37743
37744 /* Return the cost of moving data from a register in class CLASS1 to
37745 one in class CLASS2.
37746
37747 It is not required that the cost always equal 2 when FROM is the same as TO;
37748 on some machines it is expensive to move between registers if they are not
37749 general registers. */
37750
37751 static int
37752 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37753 reg_class_t class2_i)
37754 {
37755 enum reg_class class1 = (enum reg_class) class1_i;
37756 enum reg_class class2 = (enum reg_class) class2_i;
37757
37758 /* In case we require secondary memory, compute cost of the store followed
37759 by load. In order to avoid bad register allocation choices, we need
37760 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37761
37762 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37763 {
37764 int cost = 1;
37765
37766 cost += inline_memory_move_cost (mode, class1, 2);
37767 cost += inline_memory_move_cost (mode, class2, 2);
37768
37769 /* In case of copying from general_purpose_register we may emit multiple
37770 stores followed by single load causing memory size mismatch stall.
37771 Count this as arbitrarily high cost of 20. */
37772 if (targetm.class_max_nregs (class1, mode)
37773 > targetm.class_max_nregs (class2, mode))
37774 cost += 20;
37775
37776 /* In the case of FP/MMX moves, the registers actually overlap, and we
37777 have to switch modes in order to treat them differently. */
37778 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37779 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37780 cost += 20;
37781
37782 return cost;
37783 }
37784
37785 /* Moves between SSE/MMX and integer unit are expensive. */
37786 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37787 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37788
37789 /* ??? By keeping returned value relatively high, we limit the number
37790 of moves between integer and MMX/SSE registers for all targets.
37791 Additionally, high value prevents problem with x86_modes_tieable_p(),
37792 where integer modes in MMX/SSE registers are not tieable
37793 because of missing QImode and HImode moves to, from or between
37794 MMX/SSE registers. */
37795 return MAX (8, ix86_cost->mmxsse_to_integer);
37796
37797 if (MAYBE_FLOAT_CLASS_P (class1))
37798 return ix86_cost->fp_move;
37799 if (MAYBE_SSE_CLASS_P (class1))
37800 return ix86_cost->sse_move;
37801 if (MAYBE_MMX_CLASS_P (class1))
37802 return ix86_cost->mmx_move;
37803 return 2;
37804 }
37805
37806 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37807 MODE. */
37808
37809 bool
37810 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37811 {
37812 /* Flags and only flags can only hold CCmode values. */
37813 if (CC_REGNO_P (regno))
37814 return GET_MODE_CLASS (mode) == MODE_CC;
37815 if (GET_MODE_CLASS (mode) == MODE_CC
37816 || GET_MODE_CLASS (mode) == MODE_RANDOM
37817 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37818 return false;
37819 if (STACK_REGNO_P (regno))
37820 return VALID_FP_MODE_P (mode);
37821 if (MASK_REGNO_P (regno))
37822 return (VALID_MASK_REG_MODE (mode)
37823 || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
37824 if (SSE_REGNO_P (regno))
37825 {
37826 /* We implement the move patterns for all vector modes into and
37827 out of SSE registers, even when no operation instructions
37828 are available. */
37829
37830 /* For AVX-512 we allow, regardless of regno:
37831 - XI mode
37832 - any of 512-bit wide vector mode
37833 - any scalar mode. */
37834 if (TARGET_AVX512F
37835 && (mode == XImode
37836 || VALID_AVX512F_REG_MODE (mode)
37837 || VALID_AVX512F_SCALAR_MODE (mode)))
37838 return true;
37839
37840 /* TODO check for QI/HI scalars. */
37841 /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
37842 if (TARGET_AVX512VL
37843 && (mode == OImode
37844 || mode == TImode
37845 || VALID_AVX256_REG_MODE (mode)
37846 || VALID_AVX512VL_128_REG_MODE (mode)))
37847 return true;
37848
37849 /* xmm16-xmm31 are only available for AVX-512. */
37850 if (EXT_REX_SSE_REGNO_P (regno))
37851 return false;
37852
37853 /* OImode and AVX modes are available only when AVX is enabled. */
37854 return ((TARGET_AVX
37855 && VALID_AVX256_REG_OR_OI_MODE (mode))
37856 || VALID_SSE_REG_MODE (mode)
37857 || VALID_SSE2_REG_MODE (mode)
37858 || VALID_MMX_REG_MODE (mode)
37859 || VALID_MMX_REG_MODE_3DNOW (mode));
37860 }
37861 if (MMX_REGNO_P (regno))
37862 {
37863 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37864 so if the register is available at all, then we can move data of
37865 the given mode into or out of it. */
37866 return (VALID_MMX_REG_MODE (mode)
37867 || VALID_MMX_REG_MODE_3DNOW (mode));
37868 }
37869
37870 if (mode == QImode)
37871 {
37872 /* Take care for QImode values - they can be in non-QI regs,
37873 but then they do cause partial register stalls. */
37874 if (ANY_QI_REGNO_P (regno))
37875 return true;
37876 if (!TARGET_PARTIAL_REG_STALL)
37877 return true;
37878 /* LRA checks if the hard register is OK for the given mode.
37879 QImode values can live in non-QI regs, so we allow all
37880 registers here. */
37881 if (lra_in_progress)
37882 return true;
37883 return !can_create_pseudo_p ();
37884 }
37885 /* We handle both integer and floats in the general purpose registers. */
37886 else if (VALID_INT_MODE_P (mode))
37887 return true;
37888 else if (VALID_FP_MODE_P (mode))
37889 return true;
37890 else if (VALID_DFP_MODE_P (mode))
37891 return true;
37892 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37893 on to use that value in smaller contexts, this can easily force a
37894 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37895 supporting DImode, allow it. */
37896 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37897 return true;
37898
37899 return false;
37900 }
37901
37902 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37903 tieable integer mode. */
37904
37905 static bool
37906 ix86_tieable_integer_mode_p (enum machine_mode mode)
37907 {
37908 switch (mode)
37909 {
37910 case HImode:
37911 case SImode:
37912 return true;
37913
37914 case QImode:
37915 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37916
37917 case DImode:
37918 return TARGET_64BIT;
37919
37920 default:
37921 return false;
37922 }
37923 }
37924
37925 /* Return true if MODE1 is accessible in a register that can hold MODE2
37926 without copying. That is, all register classes that can hold MODE2
37927 can also hold MODE1. */
37928
37929 bool
37930 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37931 {
37932 if (mode1 == mode2)
37933 return true;
37934
37935 if (ix86_tieable_integer_mode_p (mode1)
37936 && ix86_tieable_integer_mode_p (mode2))
37937 return true;
37938
37939 /* MODE2 being XFmode implies fp stack or general regs, which means we
37940 can tie any smaller floating point modes to it. Note that we do not
37941 tie this with TFmode. */
37942 if (mode2 == XFmode)
37943 return mode1 == SFmode || mode1 == DFmode;
37944
37945 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37946 that we can tie it with SFmode. */
37947 if (mode2 == DFmode)
37948 return mode1 == SFmode;
37949
37950 /* If MODE2 is only appropriate for an SSE register, then tie with
37951 any other mode acceptable to SSE registers. */
37952 if (GET_MODE_SIZE (mode2) == 32
37953 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37954 return (GET_MODE_SIZE (mode1) == 32
37955 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37956 if (GET_MODE_SIZE (mode2) == 16
37957 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37958 return (GET_MODE_SIZE (mode1) == 16
37959 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37960
37961 /* If MODE2 is appropriate for an MMX register, then tie
37962 with any other mode acceptable to MMX registers. */
37963 if (GET_MODE_SIZE (mode2) == 8
37964 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37965 return (GET_MODE_SIZE (mode1) == 8
37966 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37967
37968 return false;
37969 }
37970
37971 /* Return the cost of moving between two registers of mode MODE. */
37972
37973 static int
37974 ix86_set_reg_reg_cost (enum machine_mode mode)
37975 {
37976 unsigned int units = UNITS_PER_WORD;
37977
37978 switch (GET_MODE_CLASS (mode))
37979 {
37980 default:
37981 break;
37982
37983 case MODE_CC:
37984 units = GET_MODE_SIZE (CCmode);
37985 break;
37986
37987 case MODE_FLOAT:
37988 if ((TARGET_SSE && mode == TFmode)
37989 || (TARGET_80387 && mode == XFmode)
37990 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37991 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37992 units = GET_MODE_SIZE (mode);
37993 break;
37994
37995 case MODE_COMPLEX_FLOAT:
37996 if ((TARGET_SSE && mode == TCmode)
37997 || (TARGET_80387 && mode == XCmode)
37998 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37999 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
38000 units = GET_MODE_SIZE (mode);
38001 break;
38002
38003 case MODE_VECTOR_INT:
38004 case MODE_VECTOR_FLOAT:
38005 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
38006 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
38007 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
38008 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
38009 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
38010 units = GET_MODE_SIZE (mode);
38011 }
38012
38013 /* Return the cost of moving between two registers of mode MODE,
38014 assuming that the move will be in pieces of at most UNITS bytes. */
38015 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
38016 }
38017
38018 /* Compute a (partial) cost for rtx X. Return true if the complete
38019 cost has been computed, and false if subexpressions should be
38020 scanned. In either case, *TOTAL contains the cost result. */
38021
38022 static bool
38023 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
38024 bool speed)
38025 {
38026 rtx mask;
38027 enum rtx_code code = (enum rtx_code) code_i;
38028 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
38029 enum machine_mode mode = GET_MODE (x);
38030 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
38031
38032 switch (code)
38033 {
38034 case SET:
38035 if (register_operand (SET_DEST (x), VOIDmode)
38036 && reg_or_0_operand (SET_SRC (x), VOIDmode))
38037 {
38038 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
38039 return true;
38040 }
38041 return false;
38042
38043 case CONST_INT:
38044 case CONST:
38045 case LABEL_REF:
38046 case SYMBOL_REF:
38047 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
38048 *total = 3;
38049 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
38050 *total = 2;
38051 else if (flag_pic && SYMBOLIC_CONST (x)
38052 && !(TARGET_64BIT
38053 && (GET_CODE (x) == LABEL_REF
38054 || (GET_CODE (x) == SYMBOL_REF
38055 && SYMBOL_REF_LOCAL_P (x)))))
38056 *total = 1;
38057 else
38058 *total = 0;
38059 return true;
38060
38061 case CONST_DOUBLE:
38062 if (mode == VOIDmode)
38063 {
38064 *total = 0;
38065 return true;
38066 }
38067 switch (standard_80387_constant_p (x))
38068 {
38069 case 1: /* 0.0 */
38070 *total = 1;
38071 return true;
38072 default: /* Other constants */
38073 *total = 2;
38074 return true;
38075 case 0:
38076 case -1:
38077 break;
38078 }
38079 if (SSE_FLOAT_MODE_P (mode))
38080 {
38081 case CONST_VECTOR:
38082 switch (standard_sse_constant_p (x))
38083 {
38084 case 0:
38085 break;
38086 case 1: /* 0: xor eliminates false dependency */
38087 *total = 0;
38088 return true;
38089 default: /* -1: cmp contains false dependency */
38090 *total = 1;
38091 return true;
38092 }
38093 }
38094 /* Fall back to (MEM (SYMBOL_REF)), since that's where
38095 it'll probably end up. Add a penalty for size. */
38096 *total = (COSTS_N_INSNS (1)
38097 + (flag_pic != 0 && !TARGET_64BIT)
38098 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
38099 return true;
38100
38101 case ZERO_EXTEND:
38102 /* The zero extensions is often completely free on x86_64, so make
38103 it as cheap as possible. */
38104 if (TARGET_64BIT && mode == DImode
38105 && GET_MODE (XEXP (x, 0)) == SImode)
38106 *total = 1;
38107 else if (TARGET_ZERO_EXTEND_WITH_AND)
38108 *total = cost->add;
38109 else
38110 *total = cost->movzx;
38111 return false;
38112
38113 case SIGN_EXTEND:
38114 *total = cost->movsx;
38115 return false;
38116
38117 case ASHIFT:
38118 if (SCALAR_INT_MODE_P (mode)
38119 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
38120 && CONST_INT_P (XEXP (x, 1)))
38121 {
38122 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38123 if (value == 1)
38124 {
38125 *total = cost->add;
38126 return false;
38127 }
38128 if ((value == 2 || value == 3)
38129 && cost->lea <= cost->shift_const)
38130 {
38131 *total = cost->lea;
38132 return false;
38133 }
38134 }
38135 /* FALLTHRU */
38136
38137 case ROTATE:
38138 case ASHIFTRT:
38139 case LSHIFTRT:
38140 case ROTATERT:
38141 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38142 {
38143 /* ??? Should be SSE vector operation cost. */
38144 /* At least for published AMD latencies, this really is the same
38145 as the latency for a simple fpu operation like fabs. */
38146 /* V*QImode is emulated with 1-11 insns. */
38147 if (mode == V16QImode || mode == V32QImode)
38148 {
38149 int count = 11;
38150 if (TARGET_XOP && mode == V16QImode)
38151 {
38152 /* For XOP we use vpshab, which requires a broadcast of the
38153 value to the variable shift insn. For constants this
38154 means a V16Q const in mem; even when we can perform the
38155 shift with one insn set the cost to prefer paddb. */
38156 if (CONSTANT_P (XEXP (x, 1)))
38157 {
38158 *total = (cost->fabs
38159 + rtx_cost (XEXP (x, 0), code, 0, speed)
38160 + (speed ? 2 : COSTS_N_BYTES (16)));
38161 return true;
38162 }
38163 count = 3;
38164 }
38165 else if (TARGET_SSSE3)
38166 count = 7;
38167 *total = cost->fabs * count;
38168 }
38169 else
38170 *total = cost->fabs;
38171 }
38172 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38173 {
38174 if (CONST_INT_P (XEXP (x, 1)))
38175 {
38176 if (INTVAL (XEXP (x, 1)) > 32)
38177 *total = cost->shift_const + COSTS_N_INSNS (2);
38178 else
38179 *total = cost->shift_const * 2;
38180 }
38181 else
38182 {
38183 if (GET_CODE (XEXP (x, 1)) == AND)
38184 *total = cost->shift_var * 2;
38185 else
38186 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
38187 }
38188 }
38189 else
38190 {
38191 if (CONST_INT_P (XEXP (x, 1)))
38192 *total = cost->shift_const;
38193 else if (GET_CODE (XEXP (x, 1)) == SUBREG
38194 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
38195 {
38196 /* Return the cost after shift-and truncation. */
38197 *total = cost->shift_var;
38198 return true;
38199 }
38200 else
38201 *total = cost->shift_var;
38202 }
38203 return false;
38204
38205 case FMA:
38206 {
38207 rtx sub;
38208
38209 gcc_assert (FLOAT_MODE_P (mode));
38210 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
38211
38212 /* ??? SSE scalar/vector cost should be used here. */
38213 /* ??? Bald assumption that fma has the same cost as fmul. */
38214 *total = cost->fmul;
38215 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
38216
38217 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
38218 sub = XEXP (x, 0);
38219 if (GET_CODE (sub) == NEG)
38220 sub = XEXP (sub, 0);
38221 *total += rtx_cost (sub, FMA, 0, speed);
38222
38223 sub = XEXP (x, 2);
38224 if (GET_CODE (sub) == NEG)
38225 sub = XEXP (sub, 0);
38226 *total += rtx_cost (sub, FMA, 2, speed);
38227 return true;
38228 }
38229
38230 case MULT:
38231 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38232 {
38233 /* ??? SSE scalar cost should be used here. */
38234 *total = cost->fmul;
38235 return false;
38236 }
38237 else if (X87_FLOAT_MODE_P (mode))
38238 {
38239 *total = cost->fmul;
38240 return false;
38241 }
38242 else if (FLOAT_MODE_P (mode))
38243 {
38244 /* ??? SSE vector cost should be used here. */
38245 *total = cost->fmul;
38246 return false;
38247 }
38248 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38249 {
38250 /* V*QImode is emulated with 7-13 insns. */
38251 if (mode == V16QImode || mode == V32QImode)
38252 {
38253 int extra = 11;
38254 if (TARGET_XOP && mode == V16QImode)
38255 extra = 5;
38256 else if (TARGET_SSSE3)
38257 extra = 6;
38258 *total = cost->fmul * 2 + cost->fabs * extra;
38259 }
38260 /* V*DImode is emulated with 5-8 insns. */
38261 else if (mode == V2DImode || mode == V4DImode)
38262 {
38263 if (TARGET_XOP && mode == V2DImode)
38264 *total = cost->fmul * 2 + cost->fabs * 3;
38265 else
38266 *total = cost->fmul * 3 + cost->fabs * 5;
38267 }
38268 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38269 insns, including two PMULUDQ. */
38270 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38271 *total = cost->fmul * 2 + cost->fabs * 5;
38272 else
38273 *total = cost->fmul;
38274 return false;
38275 }
38276 else
38277 {
38278 rtx op0 = XEXP (x, 0);
38279 rtx op1 = XEXP (x, 1);
38280 int nbits;
38281 if (CONST_INT_P (XEXP (x, 1)))
38282 {
38283 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38284 for (nbits = 0; value != 0; value &= value - 1)
38285 nbits++;
38286 }
38287 else
38288 /* This is arbitrary. */
38289 nbits = 7;
38290
38291 /* Compute costs correctly for widening multiplication. */
38292 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38293 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38294 == GET_MODE_SIZE (mode))
38295 {
38296 int is_mulwiden = 0;
38297 enum machine_mode inner_mode = GET_MODE (op0);
38298
38299 if (GET_CODE (op0) == GET_CODE (op1))
38300 is_mulwiden = 1, op1 = XEXP (op1, 0);
38301 else if (CONST_INT_P (op1))
38302 {
38303 if (GET_CODE (op0) == SIGN_EXTEND)
38304 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38305 == INTVAL (op1);
38306 else
38307 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38308 }
38309
38310 if (is_mulwiden)
38311 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38312 }
38313
38314 *total = (cost->mult_init[MODE_INDEX (mode)]
38315 + nbits * cost->mult_bit
38316 + rtx_cost (op0, outer_code, opno, speed)
38317 + rtx_cost (op1, outer_code, opno, speed));
38318
38319 return true;
38320 }
38321
38322 case DIV:
38323 case UDIV:
38324 case MOD:
38325 case UMOD:
38326 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38327 /* ??? SSE cost should be used here. */
38328 *total = cost->fdiv;
38329 else if (X87_FLOAT_MODE_P (mode))
38330 *total = cost->fdiv;
38331 else if (FLOAT_MODE_P (mode))
38332 /* ??? SSE vector cost should be used here. */
38333 *total = cost->fdiv;
38334 else
38335 *total = cost->divide[MODE_INDEX (mode)];
38336 return false;
38337
38338 case PLUS:
38339 if (GET_MODE_CLASS (mode) == MODE_INT
38340 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38341 {
38342 if (GET_CODE (XEXP (x, 0)) == PLUS
38343 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38344 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38345 && CONSTANT_P (XEXP (x, 1)))
38346 {
38347 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38348 if (val == 2 || val == 4 || val == 8)
38349 {
38350 *total = cost->lea;
38351 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38352 outer_code, opno, speed);
38353 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38354 outer_code, opno, speed);
38355 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38356 return true;
38357 }
38358 }
38359 else if (GET_CODE (XEXP (x, 0)) == MULT
38360 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38361 {
38362 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38363 if (val == 2 || val == 4 || val == 8)
38364 {
38365 *total = cost->lea;
38366 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38367 outer_code, opno, speed);
38368 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38369 return true;
38370 }
38371 }
38372 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38373 {
38374 *total = cost->lea;
38375 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38376 outer_code, opno, speed);
38377 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38378 outer_code, opno, speed);
38379 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38380 return true;
38381 }
38382 }
38383 /* FALLTHRU */
38384
38385 case MINUS:
38386 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38387 {
38388 /* ??? SSE cost should be used here. */
38389 *total = cost->fadd;
38390 return false;
38391 }
38392 else if (X87_FLOAT_MODE_P (mode))
38393 {
38394 *total = cost->fadd;
38395 return false;
38396 }
38397 else if (FLOAT_MODE_P (mode))
38398 {
38399 /* ??? SSE vector cost should be used here. */
38400 *total = cost->fadd;
38401 return false;
38402 }
38403 /* FALLTHRU */
38404
38405 case AND:
38406 case IOR:
38407 case XOR:
38408 if (GET_MODE_CLASS (mode) == MODE_INT
38409 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38410 {
38411 *total = (cost->add * 2
38412 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38413 << (GET_MODE (XEXP (x, 0)) != DImode))
38414 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38415 << (GET_MODE (XEXP (x, 1)) != DImode)));
38416 return true;
38417 }
38418 /* FALLTHRU */
38419
38420 case NEG:
38421 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38422 {
38423 /* ??? SSE cost should be used here. */
38424 *total = cost->fchs;
38425 return false;
38426 }
38427 else if (X87_FLOAT_MODE_P (mode))
38428 {
38429 *total = cost->fchs;
38430 return false;
38431 }
38432 else if (FLOAT_MODE_P (mode))
38433 {
38434 /* ??? SSE vector cost should be used here. */
38435 *total = cost->fchs;
38436 return false;
38437 }
38438 /* FALLTHRU */
38439
38440 case NOT:
38441 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38442 {
38443 /* ??? Should be SSE vector operation cost. */
38444 /* At least for published AMD latencies, this really is the same
38445 as the latency for a simple fpu operation like fabs. */
38446 *total = cost->fabs;
38447 }
38448 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38449 *total = cost->add * 2;
38450 else
38451 *total = cost->add;
38452 return false;
38453
38454 case COMPARE:
38455 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38456 && XEXP (XEXP (x, 0), 1) == const1_rtx
38457 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38458 && XEXP (x, 1) == const0_rtx)
38459 {
38460 /* This kind of construct is implemented using test[bwl].
38461 Treat it as if we had an AND. */
38462 *total = (cost->add
38463 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38464 + rtx_cost (const1_rtx, outer_code, opno, speed));
38465 return true;
38466 }
38467 return false;
38468
38469 case FLOAT_EXTEND:
38470 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38471 *total = 0;
38472 return false;
38473
38474 case ABS:
38475 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38476 /* ??? SSE cost should be used here. */
38477 *total = cost->fabs;
38478 else if (X87_FLOAT_MODE_P (mode))
38479 *total = cost->fabs;
38480 else if (FLOAT_MODE_P (mode))
38481 /* ??? SSE vector cost should be used here. */
38482 *total = cost->fabs;
38483 return false;
38484
38485 case SQRT:
38486 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38487 /* ??? SSE cost should be used here. */
38488 *total = cost->fsqrt;
38489 else if (X87_FLOAT_MODE_P (mode))
38490 *total = cost->fsqrt;
38491 else if (FLOAT_MODE_P (mode))
38492 /* ??? SSE vector cost should be used here. */
38493 *total = cost->fsqrt;
38494 return false;
38495
38496 case UNSPEC:
38497 if (XINT (x, 1) == UNSPEC_TP)
38498 *total = 0;
38499 return false;
38500
38501 case VEC_SELECT:
38502 case VEC_CONCAT:
38503 case VEC_DUPLICATE:
38504 /* ??? Assume all of these vector manipulation patterns are
38505 recognizable. In which case they all pretty much have the
38506 same cost. */
38507 *total = cost->fabs;
38508 return true;
38509 case VEC_MERGE:
38510 mask = XEXP (x, 2);
38511 /* This is masked instruction, assume the same cost,
38512 as nonmasked variant. */
38513 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38514 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38515 else
38516 *total = cost->fabs;
38517 return true;
38518
38519 default:
38520 return false;
38521 }
38522 }
38523
38524 #if TARGET_MACHO
38525
38526 static int current_machopic_label_num;
38527
38528 /* Given a symbol name and its associated stub, write out the
38529 definition of the stub. */
38530
38531 void
38532 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38533 {
38534 unsigned int length;
38535 char *binder_name, *symbol_name, lazy_ptr_name[32];
38536 int label = ++current_machopic_label_num;
38537
38538 /* For 64-bit we shouldn't get here. */
38539 gcc_assert (!TARGET_64BIT);
38540
38541 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38542 symb = targetm.strip_name_encoding (symb);
38543
38544 length = strlen (stub);
38545 binder_name = XALLOCAVEC (char, length + 32);
38546 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38547
38548 length = strlen (symb);
38549 symbol_name = XALLOCAVEC (char, length + 32);
38550 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38551
38552 sprintf (lazy_ptr_name, "L%d$lz", label);
38553
38554 if (MACHOPIC_ATT_STUB)
38555 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38556 else if (MACHOPIC_PURE)
38557 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38558 else
38559 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38560
38561 fprintf (file, "%s:\n", stub);
38562 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38563
38564 if (MACHOPIC_ATT_STUB)
38565 {
38566 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38567 }
38568 else if (MACHOPIC_PURE)
38569 {
38570 /* PIC stub. */
38571 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38572 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38573 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38574 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38575 label, lazy_ptr_name, label);
38576 fprintf (file, "\tjmp\t*%%ecx\n");
38577 }
38578 else
38579 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38580
38581 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38582 it needs no stub-binding-helper. */
38583 if (MACHOPIC_ATT_STUB)
38584 return;
38585
38586 fprintf (file, "%s:\n", binder_name);
38587
38588 if (MACHOPIC_PURE)
38589 {
38590 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38591 fprintf (file, "\tpushl\t%%ecx\n");
38592 }
38593 else
38594 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38595
38596 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38597
38598 /* N.B. Keep the correspondence of these
38599 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38600 old-pic/new-pic/non-pic stubs; altering this will break
38601 compatibility with existing dylibs. */
38602 if (MACHOPIC_PURE)
38603 {
38604 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38605 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38606 }
38607 else
38608 /* 16-byte -mdynamic-no-pic stub. */
38609 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38610
38611 fprintf (file, "%s:\n", lazy_ptr_name);
38612 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38613 fprintf (file, ASM_LONG "%s\n", binder_name);
38614 }
38615 #endif /* TARGET_MACHO */
38616
38617 /* Order the registers for register allocator. */
38618
38619 void
38620 x86_order_regs_for_local_alloc (void)
38621 {
38622 int pos = 0;
38623 int i;
38624
38625 /* First allocate the local general purpose registers. */
38626 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38627 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38628 reg_alloc_order [pos++] = i;
38629
38630 /* Global general purpose registers. */
38631 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38632 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38633 reg_alloc_order [pos++] = i;
38634
38635 /* x87 registers come first in case we are doing FP math
38636 using them. */
38637 if (!TARGET_SSE_MATH)
38638 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38639 reg_alloc_order [pos++] = i;
38640
38641 /* SSE registers. */
38642 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38643 reg_alloc_order [pos++] = i;
38644 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38645 reg_alloc_order [pos++] = i;
38646
38647 /* Extended REX SSE registers. */
38648 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38649 reg_alloc_order [pos++] = i;
38650
38651 /* Mask register. */
38652 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38653 reg_alloc_order [pos++] = i;
38654
38655 /* x87 registers. */
38656 if (TARGET_SSE_MATH)
38657 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38658 reg_alloc_order [pos++] = i;
38659
38660 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38661 reg_alloc_order [pos++] = i;
38662
38663 /* Initialize the rest of array as we do not allocate some registers
38664 at all. */
38665 while (pos < FIRST_PSEUDO_REGISTER)
38666 reg_alloc_order [pos++] = 0;
38667 }
38668
38669 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38670 in struct attribute_spec handler. */
38671 static tree
38672 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38673 tree args,
38674 int,
38675 bool *no_add_attrs)
38676 {
38677 if (TREE_CODE (*node) != FUNCTION_TYPE
38678 && TREE_CODE (*node) != METHOD_TYPE
38679 && TREE_CODE (*node) != FIELD_DECL
38680 && TREE_CODE (*node) != TYPE_DECL)
38681 {
38682 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38683 name);
38684 *no_add_attrs = true;
38685 return NULL_TREE;
38686 }
38687 if (TARGET_64BIT)
38688 {
38689 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38690 name);
38691 *no_add_attrs = true;
38692 return NULL_TREE;
38693 }
38694 if (is_attribute_p ("callee_pop_aggregate_return", name))
38695 {
38696 tree cst;
38697
38698 cst = TREE_VALUE (args);
38699 if (TREE_CODE (cst) != INTEGER_CST)
38700 {
38701 warning (OPT_Wattributes,
38702 "%qE attribute requires an integer constant argument",
38703 name);
38704 *no_add_attrs = true;
38705 }
38706 else if (compare_tree_int (cst, 0) != 0
38707 && compare_tree_int (cst, 1) != 0)
38708 {
38709 warning (OPT_Wattributes,
38710 "argument to %qE attribute is neither zero, nor one",
38711 name);
38712 *no_add_attrs = true;
38713 }
38714
38715 return NULL_TREE;
38716 }
38717
38718 return NULL_TREE;
38719 }
38720
38721 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38722 struct attribute_spec.handler. */
38723 static tree
38724 ix86_handle_abi_attribute (tree *node, tree name, tree, int,
38725 bool *no_add_attrs)
38726 {
38727 if (TREE_CODE (*node) != FUNCTION_TYPE
38728 && TREE_CODE (*node) != METHOD_TYPE
38729 && TREE_CODE (*node) != FIELD_DECL
38730 && TREE_CODE (*node) != TYPE_DECL)
38731 {
38732 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38733 name);
38734 *no_add_attrs = true;
38735 return NULL_TREE;
38736 }
38737
38738 /* Can combine regparm with all attributes but fastcall. */
38739 if (is_attribute_p ("ms_abi", name))
38740 {
38741 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38742 {
38743 error ("ms_abi and sysv_abi attributes are not compatible");
38744 }
38745
38746 return NULL_TREE;
38747 }
38748 else if (is_attribute_p ("sysv_abi", name))
38749 {
38750 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38751 {
38752 error ("ms_abi and sysv_abi attributes are not compatible");
38753 }
38754
38755 return NULL_TREE;
38756 }
38757
38758 return NULL_TREE;
38759 }
38760
38761 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38762 struct attribute_spec.handler. */
38763 static tree
38764 ix86_handle_struct_attribute (tree *node, tree name, tree, int,
38765 bool *no_add_attrs)
38766 {
38767 tree *type = NULL;
38768 if (DECL_P (*node))
38769 {
38770 if (TREE_CODE (*node) == TYPE_DECL)
38771 type = &TREE_TYPE (*node);
38772 }
38773 else
38774 type = node;
38775
38776 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38777 {
38778 warning (OPT_Wattributes, "%qE attribute ignored",
38779 name);
38780 *no_add_attrs = true;
38781 }
38782
38783 else if ((is_attribute_p ("ms_struct", name)
38784 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38785 || ((is_attribute_p ("gcc_struct", name)
38786 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38787 {
38788 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38789 name);
38790 *no_add_attrs = true;
38791 }
38792
38793 return NULL_TREE;
38794 }
38795
38796 static tree
38797 ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
38798 bool *no_add_attrs)
38799 {
38800 if (TREE_CODE (*node) != FUNCTION_DECL)
38801 {
38802 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38803 name);
38804 *no_add_attrs = true;
38805 }
38806 return NULL_TREE;
38807 }
38808
38809 static bool
38810 ix86_ms_bitfield_layout_p (const_tree record_type)
38811 {
38812 return ((TARGET_MS_BITFIELD_LAYOUT
38813 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38814 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38815 }
38816
38817 /* Returns an expression indicating where the this parameter is
38818 located on entry to the FUNCTION. */
38819
38820 static rtx
38821 x86_this_parameter (tree function)
38822 {
38823 tree type = TREE_TYPE (function);
38824 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38825 int nregs;
38826
38827 if (TARGET_64BIT)
38828 {
38829 const int *parm_regs;
38830
38831 if (ix86_function_type_abi (type) == MS_ABI)
38832 parm_regs = x86_64_ms_abi_int_parameter_registers;
38833 else
38834 parm_regs = x86_64_int_parameter_registers;
38835 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38836 }
38837
38838 nregs = ix86_function_regparm (type, function);
38839
38840 if (nregs > 0 && !stdarg_p (type))
38841 {
38842 int regno;
38843 unsigned int ccvt = ix86_get_callcvt (type);
38844
38845 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38846 regno = aggr ? DX_REG : CX_REG;
38847 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38848 {
38849 regno = CX_REG;
38850 if (aggr)
38851 return gen_rtx_MEM (SImode,
38852 plus_constant (Pmode, stack_pointer_rtx, 4));
38853 }
38854 else
38855 {
38856 regno = AX_REG;
38857 if (aggr)
38858 {
38859 regno = DX_REG;
38860 if (nregs == 1)
38861 return gen_rtx_MEM (SImode,
38862 plus_constant (Pmode,
38863 stack_pointer_rtx, 4));
38864 }
38865 }
38866 return gen_rtx_REG (SImode, regno);
38867 }
38868
38869 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38870 aggr ? 8 : 4));
38871 }
38872
38873 /* Determine whether x86_output_mi_thunk can succeed. */
38874
38875 static bool
38876 x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
38877 const_tree function)
38878 {
38879 /* 64-bit can handle anything. */
38880 if (TARGET_64BIT)
38881 return true;
38882
38883 /* For 32-bit, everything's fine if we have one free register. */
38884 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38885 return true;
38886
38887 /* Need a free register for vcall_offset. */
38888 if (vcall_offset)
38889 return false;
38890
38891 /* Need a free register for GOT references. */
38892 if (flag_pic && !targetm.binds_local_p (function))
38893 return false;
38894
38895 /* Otherwise ok. */
38896 return true;
38897 }
38898
38899 /* Output the assembler code for a thunk function. THUNK_DECL is the
38900 declaration for the thunk function itself, FUNCTION is the decl for
38901 the target function. DELTA is an immediate constant offset to be
38902 added to THIS. If VCALL_OFFSET is nonzero, the word at
38903 *(*this + vcall_offset) should be added to THIS. */
38904
38905 static void
38906 x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
38907 HOST_WIDE_INT vcall_offset, tree function)
38908 {
38909 rtx this_param = x86_this_parameter (function);
38910 rtx this_reg, tmp, fnaddr;
38911 unsigned int tmp_regno;
38912 rtx_insn *insn;
38913
38914 if (TARGET_64BIT)
38915 tmp_regno = R10_REG;
38916 else
38917 {
38918 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38919 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38920 tmp_regno = AX_REG;
38921 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38922 tmp_regno = DX_REG;
38923 else
38924 tmp_regno = CX_REG;
38925 }
38926
38927 emit_note (NOTE_INSN_PROLOGUE_END);
38928
38929 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38930 pull it in now and let DELTA benefit. */
38931 if (REG_P (this_param))
38932 this_reg = this_param;
38933 else if (vcall_offset)
38934 {
38935 /* Put the this parameter into %eax. */
38936 this_reg = gen_rtx_REG (Pmode, AX_REG);
38937 emit_move_insn (this_reg, this_param);
38938 }
38939 else
38940 this_reg = NULL_RTX;
38941
38942 /* Adjust the this parameter by a fixed constant. */
38943 if (delta)
38944 {
38945 rtx delta_rtx = GEN_INT (delta);
38946 rtx delta_dst = this_reg ? this_reg : this_param;
38947
38948 if (TARGET_64BIT)
38949 {
38950 if (!x86_64_general_operand (delta_rtx, Pmode))
38951 {
38952 tmp = gen_rtx_REG (Pmode, tmp_regno);
38953 emit_move_insn (tmp, delta_rtx);
38954 delta_rtx = tmp;
38955 }
38956 }
38957
38958 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38959 }
38960
38961 /* Adjust the this parameter by a value stored in the vtable. */
38962 if (vcall_offset)
38963 {
38964 rtx vcall_addr, vcall_mem, this_mem;
38965
38966 tmp = gen_rtx_REG (Pmode, tmp_regno);
38967
38968 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38969 if (Pmode != ptr_mode)
38970 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38971 emit_move_insn (tmp, this_mem);
38972
38973 /* Adjust the this parameter. */
38974 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38975 if (TARGET_64BIT
38976 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38977 {
38978 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38979 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38980 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38981 }
38982
38983 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38984 if (Pmode != ptr_mode)
38985 emit_insn (gen_addsi_1_zext (this_reg,
38986 gen_rtx_REG (ptr_mode,
38987 REGNO (this_reg)),
38988 vcall_mem));
38989 else
38990 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38991 }
38992
38993 /* If necessary, drop THIS back to its stack slot. */
38994 if (this_reg && this_reg != this_param)
38995 emit_move_insn (this_param, this_reg);
38996
38997 fnaddr = XEXP (DECL_RTL (function), 0);
38998 if (TARGET_64BIT)
38999 {
39000 if (!flag_pic || targetm.binds_local_p (function)
39001 || TARGET_PECOFF)
39002 ;
39003 else
39004 {
39005 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
39006 tmp = gen_rtx_CONST (Pmode, tmp);
39007 fnaddr = gen_const_mem (Pmode, tmp);
39008 }
39009 }
39010 else
39011 {
39012 if (!flag_pic || targetm.binds_local_p (function))
39013 ;
39014 #if TARGET_MACHO
39015 else if (TARGET_MACHO)
39016 {
39017 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
39018 fnaddr = XEXP (fnaddr, 0);
39019 }
39020 #endif /* TARGET_MACHO */
39021 else
39022 {
39023 tmp = gen_rtx_REG (Pmode, CX_REG);
39024 output_set_got (tmp, NULL_RTX);
39025
39026 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
39027 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
39028 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
39029 fnaddr = gen_const_mem (Pmode, fnaddr);
39030 }
39031 }
39032
39033 /* Our sibling call patterns do not allow memories, because we have no
39034 predicate that can distinguish between frame and non-frame memory.
39035 For our purposes here, we can get away with (ab)using a jump pattern,
39036 because we're going to do no optimization. */
39037 if (MEM_P (fnaddr))
39038 {
39039 if (sibcall_insn_operand (fnaddr, word_mode))
39040 {
39041 fnaddr = XEXP (DECL_RTL (function), 0);
39042 tmp = gen_rtx_MEM (QImode, fnaddr);
39043 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39044 tmp = emit_call_insn (tmp);
39045 SIBLING_CALL_P (tmp) = 1;
39046 }
39047 else
39048 emit_jump_insn (gen_indirect_jump (fnaddr));
39049 }
39050 else
39051 {
39052 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
39053 fnaddr = legitimize_pic_address (fnaddr,
39054 gen_rtx_REG (Pmode, tmp_regno));
39055
39056 if (!sibcall_insn_operand (fnaddr, word_mode))
39057 {
39058 tmp = gen_rtx_REG (word_mode, tmp_regno);
39059 if (GET_MODE (fnaddr) != word_mode)
39060 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
39061 emit_move_insn (tmp, fnaddr);
39062 fnaddr = tmp;
39063 }
39064
39065 tmp = gen_rtx_MEM (QImode, fnaddr);
39066 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
39067 tmp = emit_call_insn (tmp);
39068 SIBLING_CALL_P (tmp) = 1;
39069 }
39070 emit_barrier ();
39071
39072 /* Emit just enough of rest_of_compilation to get the insns emitted.
39073 Note that use_thunk calls assemble_start_function et al. */
39074 insn = get_insns ();
39075 shorten_branches (insn);
39076 final_start_function (insn, file, 1);
39077 final (insn, file, 1);
39078 final_end_function ();
39079 }
39080
39081 static void
39082 x86_file_start (void)
39083 {
39084 default_file_start ();
39085 if (TARGET_16BIT)
39086 fputs ("\t.code16gcc\n", asm_out_file);
39087 #if TARGET_MACHO
39088 darwin_file_start ();
39089 #endif
39090 if (X86_FILE_START_VERSION_DIRECTIVE)
39091 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
39092 if (X86_FILE_START_FLTUSED)
39093 fputs ("\t.global\t__fltused\n", asm_out_file);
39094 if (ix86_asm_dialect == ASM_INTEL)
39095 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
39096 }
39097
39098 int
39099 x86_field_alignment (tree field, int computed)
39100 {
39101 enum machine_mode mode;
39102 tree type = TREE_TYPE (field);
39103
39104 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
39105 return computed;
39106 mode = TYPE_MODE (strip_array_types (type));
39107 if (mode == DFmode || mode == DCmode
39108 || GET_MODE_CLASS (mode) == MODE_INT
39109 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
39110 return MIN (32, computed);
39111 return computed;
39112 }
39113
39114 /* Print call to TARGET to FILE. */
39115
39116 static void
39117 x86_print_call_or_nop (FILE *file, const char *target)
39118 {
39119 if (flag_nop_mcount)
39120 fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
39121 else
39122 fprintf (file, "1:\tcall\t%s\n", target);
39123 }
39124
39125 /* Output assembler code to FILE to increment profiler label # LABELNO
39126 for profiling a function entry. */
39127 void
39128 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
39129 {
39130 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
39131 : MCOUNT_NAME);
39132 if (TARGET_64BIT)
39133 {
39134 #ifndef NO_PROFILE_COUNTERS
39135 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
39136 #endif
39137
39138 if (!TARGET_PECOFF && flag_pic)
39139 fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
39140 else
39141 x86_print_call_or_nop (file, mcount_name);
39142 }
39143 else if (flag_pic)
39144 {
39145 #ifndef NO_PROFILE_COUNTERS
39146 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
39147 LPREFIX, labelno);
39148 #endif
39149 fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
39150 }
39151 else
39152 {
39153 #ifndef NO_PROFILE_COUNTERS
39154 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
39155 LPREFIX, labelno);
39156 #endif
39157 x86_print_call_or_nop (file, mcount_name);
39158 }
39159
39160 if (flag_record_mcount)
39161 {
39162 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
39163 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
39164 fprintf (file, "\t.previous\n");
39165 }
39166 }
39167
39168 /* We don't have exact information about the insn sizes, but we may assume
39169 quite safely that we are informed about all 1 byte insns and memory
39170 address sizes. This is enough to eliminate unnecessary padding in
39171 99% of cases. */
39172
39173 static int
39174 min_insn_size (rtx_insn *insn)
39175 {
39176 int l = 0, len;
39177
39178 if (!INSN_P (insn) || !active_insn_p (insn))
39179 return 0;
39180
39181 /* Discard alignments we've emit and jump instructions. */
39182 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
39183 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
39184 return 0;
39185
39186 /* Important case - calls are always 5 bytes.
39187 It is common to have many calls in the row. */
39188 if (CALL_P (insn)
39189 && symbolic_reference_mentioned_p (PATTERN (insn))
39190 && !SIBLING_CALL_P (insn))
39191 return 5;
39192 len = get_attr_length (insn);
39193 if (len <= 1)
39194 return 1;
39195
39196 /* For normal instructions we rely on get_attr_length being exact,
39197 with a few exceptions. */
39198 if (!JUMP_P (insn))
39199 {
39200 enum attr_type type = get_attr_type (insn);
39201
39202 switch (type)
39203 {
39204 case TYPE_MULTI:
39205 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
39206 || asm_noperands (PATTERN (insn)) >= 0)
39207 return 0;
39208 break;
39209 case TYPE_OTHER:
39210 case TYPE_FCMP:
39211 break;
39212 default:
39213 /* Otherwise trust get_attr_length. */
39214 return len;
39215 }
39216
39217 l = get_attr_length_address (insn);
39218 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
39219 l = 4;
39220 }
39221 if (l)
39222 return 1+l;
39223 else
39224 return 2;
39225 }
39226
39227 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39228
39229 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
39230 window. */
39231
39232 static void
39233 ix86_avoid_jump_mispredicts (void)
39234 {
39235 rtx_insn *insn, *start = get_insns ();
39236 int nbytes = 0, njumps = 0;
39237 int isjump = 0;
39238
39239 /* Look for all minimal intervals of instructions containing 4 jumps.
39240 The intervals are bounded by START and INSN. NBYTES is the total
39241 size of instructions in the interval including INSN and not including
39242 START. When the NBYTES is smaller than 16 bytes, it is possible
39243 that the end of START and INSN ends up in the same 16byte page.
39244
39245 The smallest offset in the page INSN can start is the case where START
39246 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
39247 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
39248
39249 Don't consider asm goto as jump, while it can contain a jump, it doesn't
39250 have to, control transfer to label(s) can be performed through other
39251 means, and also we estimate minimum length of all asm stmts as 0. */
39252 for (insn = start; insn; insn = NEXT_INSN (insn))
39253 {
39254 int min_size;
39255
39256 if (LABEL_P (insn))
39257 {
39258 int align = label_to_alignment (insn);
39259 int max_skip = label_to_max_skip (insn);
39260
39261 if (max_skip > 15)
39262 max_skip = 15;
39263 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39264 already in the current 16 byte page, because otherwise
39265 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39266 bytes to reach 16 byte boundary. */
39267 if (align <= 0
39268 || (align <= 3 && max_skip != (1 << align) - 1))
39269 max_skip = 0;
39270 if (dump_file)
39271 fprintf (dump_file, "Label %i with max_skip %i\n",
39272 INSN_UID (insn), max_skip);
39273 if (max_skip)
39274 {
39275 while (nbytes + max_skip >= 16)
39276 {
39277 start = NEXT_INSN (start);
39278 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39279 || CALL_P (start))
39280 njumps--, isjump = 1;
39281 else
39282 isjump = 0;
39283 nbytes -= min_insn_size (start);
39284 }
39285 }
39286 continue;
39287 }
39288
39289 min_size = min_insn_size (insn);
39290 nbytes += min_size;
39291 if (dump_file)
39292 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39293 INSN_UID (insn), min_size);
39294 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39295 || CALL_P (insn))
39296 njumps++;
39297 else
39298 continue;
39299
39300 while (njumps > 3)
39301 {
39302 start = NEXT_INSN (start);
39303 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39304 || CALL_P (start))
39305 njumps--, isjump = 1;
39306 else
39307 isjump = 0;
39308 nbytes -= min_insn_size (start);
39309 }
39310 gcc_assert (njumps >= 0);
39311 if (dump_file)
39312 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39313 INSN_UID (start), INSN_UID (insn), nbytes);
39314
39315 if (njumps == 3 && isjump && nbytes < 16)
39316 {
39317 int padsize = 15 - nbytes + min_insn_size (insn);
39318
39319 if (dump_file)
39320 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39321 INSN_UID (insn), padsize);
39322 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39323 }
39324 }
39325 }
39326 #endif
39327
39328 /* AMD Athlon works faster
39329 when RET is not destination of conditional jump or directly preceded
39330 by other jump instruction. We avoid the penalty by inserting NOP just
39331 before the RET instructions in such cases. */
39332 static void
39333 ix86_pad_returns (void)
39334 {
39335 edge e;
39336 edge_iterator ei;
39337
39338 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39339 {
39340 basic_block bb = e->src;
39341 rtx_insn *ret = BB_END (bb);
39342 rtx_insn *prev;
39343 bool replace = false;
39344
39345 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39346 || optimize_bb_for_size_p (bb))
39347 continue;
39348 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39349 if (active_insn_p (prev) || LABEL_P (prev))
39350 break;
39351 if (prev && LABEL_P (prev))
39352 {
39353 edge e;
39354 edge_iterator ei;
39355
39356 FOR_EACH_EDGE (e, ei, bb->preds)
39357 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39358 && !(e->flags & EDGE_FALLTHRU))
39359 {
39360 replace = true;
39361 break;
39362 }
39363 }
39364 if (!replace)
39365 {
39366 prev = prev_active_insn (ret);
39367 if (prev
39368 && ((JUMP_P (prev) && any_condjump_p (prev))
39369 || CALL_P (prev)))
39370 replace = true;
39371 /* Empty functions get branch mispredict even when
39372 the jump destination is not visible to us. */
39373 if (!prev && !optimize_function_for_size_p (cfun))
39374 replace = true;
39375 }
39376 if (replace)
39377 {
39378 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39379 delete_insn (ret);
39380 }
39381 }
39382 }
39383
39384 /* Count the minimum number of instructions in BB. Return 4 if the
39385 number of instructions >= 4. */
39386
39387 static int
39388 ix86_count_insn_bb (basic_block bb)
39389 {
39390 rtx_insn *insn;
39391 int insn_count = 0;
39392
39393 /* Count number of instructions in this block. Return 4 if the number
39394 of instructions >= 4. */
39395 FOR_BB_INSNS (bb, insn)
39396 {
39397 /* Only happen in exit blocks. */
39398 if (JUMP_P (insn)
39399 && ANY_RETURN_P (PATTERN (insn)))
39400 break;
39401
39402 if (NONDEBUG_INSN_P (insn)
39403 && GET_CODE (PATTERN (insn)) != USE
39404 && GET_CODE (PATTERN (insn)) != CLOBBER)
39405 {
39406 insn_count++;
39407 if (insn_count >= 4)
39408 return insn_count;
39409 }
39410 }
39411
39412 return insn_count;
39413 }
39414
39415
39416 /* Count the minimum number of instructions in code path in BB.
39417 Return 4 if the number of instructions >= 4. */
39418
39419 static int
39420 ix86_count_insn (basic_block bb)
39421 {
39422 edge e;
39423 edge_iterator ei;
39424 int min_prev_count;
39425
39426 /* Only bother counting instructions along paths with no
39427 more than 2 basic blocks between entry and exit. Given
39428 that BB has an edge to exit, determine if a predecessor
39429 of BB has an edge from entry. If so, compute the number
39430 of instructions in the predecessor block. If there
39431 happen to be multiple such blocks, compute the minimum. */
39432 min_prev_count = 4;
39433 FOR_EACH_EDGE (e, ei, bb->preds)
39434 {
39435 edge prev_e;
39436 edge_iterator prev_ei;
39437
39438 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39439 {
39440 min_prev_count = 0;
39441 break;
39442 }
39443 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39444 {
39445 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39446 {
39447 int count = ix86_count_insn_bb (e->src);
39448 if (count < min_prev_count)
39449 min_prev_count = count;
39450 break;
39451 }
39452 }
39453 }
39454
39455 if (min_prev_count < 4)
39456 min_prev_count += ix86_count_insn_bb (bb);
39457
39458 return min_prev_count;
39459 }
39460
39461 /* Pad short function to 4 instructions. */
39462
39463 static void
39464 ix86_pad_short_function (void)
39465 {
39466 edge e;
39467 edge_iterator ei;
39468
39469 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39470 {
39471 rtx_insn *ret = BB_END (e->src);
39472 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39473 {
39474 int insn_count = ix86_count_insn (e->src);
39475
39476 /* Pad short function. */
39477 if (insn_count < 4)
39478 {
39479 rtx_insn *insn = ret;
39480
39481 /* Find epilogue. */
39482 while (insn
39483 && (!NOTE_P (insn)
39484 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39485 insn = PREV_INSN (insn);
39486
39487 if (!insn)
39488 insn = ret;
39489
39490 /* Two NOPs count as one instruction. */
39491 insn_count = 2 * (4 - insn_count);
39492 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39493 }
39494 }
39495 }
39496 }
39497
39498 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39499 the epilogue, the Windows system unwinder will apply epilogue logic and
39500 produce incorrect offsets. This can be avoided by adding a nop between
39501 the last insn that can throw and the first insn of the epilogue. */
39502
39503 static void
39504 ix86_seh_fixup_eh_fallthru (void)
39505 {
39506 edge e;
39507 edge_iterator ei;
39508
39509 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39510 {
39511 rtx_insn *insn, *next;
39512
39513 /* Find the beginning of the epilogue. */
39514 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39515 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39516 break;
39517 if (insn == NULL)
39518 continue;
39519
39520 /* We only care about preceding insns that can throw. */
39521 insn = prev_active_insn (insn);
39522 if (insn == NULL || !can_throw_internal (insn))
39523 continue;
39524
39525 /* Do not separate calls from their debug information. */
39526 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39527 if (NOTE_P (next)
39528 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39529 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39530 insn = next;
39531 else
39532 break;
39533
39534 emit_insn_after (gen_nops (const1_rtx), insn);
39535 }
39536 }
39537
39538 /* Implement machine specific optimizations. We implement padding of returns
39539 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39540 static void
39541 ix86_reorg (void)
39542 {
39543 /* We are freeing block_for_insn in the toplev to keep compatibility
39544 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39545 compute_bb_for_insn ();
39546
39547 if (TARGET_SEH && current_function_has_exception_handlers ())
39548 ix86_seh_fixup_eh_fallthru ();
39549
39550 if (optimize && optimize_function_for_speed_p (cfun))
39551 {
39552 if (TARGET_PAD_SHORT_FUNCTION)
39553 ix86_pad_short_function ();
39554 else if (TARGET_PAD_RETURNS)
39555 ix86_pad_returns ();
39556 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39557 if (TARGET_FOUR_JUMP_LIMIT)
39558 ix86_avoid_jump_mispredicts ();
39559 #endif
39560 }
39561 }
39562
39563 /* Return nonzero when QImode register that must be represented via REX prefix
39564 is used. */
39565 bool
39566 x86_extended_QIreg_mentioned_p (rtx_insn *insn)
39567 {
39568 int i;
39569 extract_insn_cached (insn);
39570 for (i = 0; i < recog_data.n_operands; i++)
39571 if (GENERAL_REG_P (recog_data.operand[i])
39572 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39573 return true;
39574 return false;
39575 }
39576
39577 /* Return nonzero when P points to register encoded via REX prefix.
39578 Called via for_each_rtx. */
39579 static int
39580 extended_reg_mentioned_1 (rtx *p, void *)
39581 {
39582 unsigned int regno;
39583 if (!REG_P (*p))
39584 return 0;
39585 regno = REGNO (*p);
39586 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39587 }
39588
39589 /* Return true when INSN mentions register that must be encoded using REX
39590 prefix. */
39591 bool
39592 x86_extended_reg_mentioned_p (rtx insn)
39593 {
39594 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39595 extended_reg_mentioned_1, NULL);
39596 }
39597
39598 /* If profitable, negate (without causing overflow) integer constant
39599 of mode MODE at location LOC. Return true in this case. */
39600 bool
39601 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39602 {
39603 HOST_WIDE_INT val;
39604
39605 if (!CONST_INT_P (*loc))
39606 return false;
39607
39608 switch (mode)
39609 {
39610 case DImode:
39611 /* DImode x86_64 constants must fit in 32 bits. */
39612 gcc_assert (x86_64_immediate_operand (*loc, mode));
39613
39614 mode = SImode;
39615 break;
39616
39617 case SImode:
39618 case HImode:
39619 case QImode:
39620 break;
39621
39622 default:
39623 gcc_unreachable ();
39624 }
39625
39626 /* Avoid overflows. */
39627 if (mode_signbit_p (mode, *loc))
39628 return false;
39629
39630 val = INTVAL (*loc);
39631
39632 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39633 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39634 if ((val < 0 && val != -128)
39635 || val == 128)
39636 {
39637 *loc = GEN_INT (-val);
39638 return true;
39639 }
39640
39641 return false;
39642 }
39643
39644 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39645 optabs would emit if we didn't have TFmode patterns. */
39646
39647 void
39648 x86_emit_floatuns (rtx operands[2])
39649 {
39650 rtx_code_label *neglab, *donelab;
39651 rtx i0, i1, f0, in, out;
39652 enum machine_mode mode, inmode;
39653
39654 inmode = GET_MODE (operands[1]);
39655 gcc_assert (inmode == SImode || inmode == DImode);
39656
39657 out = operands[0];
39658 in = force_reg (inmode, operands[1]);
39659 mode = GET_MODE (out);
39660 neglab = gen_label_rtx ();
39661 donelab = gen_label_rtx ();
39662 f0 = gen_reg_rtx (mode);
39663
39664 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39665
39666 expand_float (out, in, 0);
39667
39668 emit_jump_insn (gen_jump (donelab));
39669 emit_barrier ();
39670
39671 emit_label (neglab);
39672
39673 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39674 1, OPTAB_DIRECT);
39675 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39676 1, OPTAB_DIRECT);
39677 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39678
39679 expand_float (f0, i0, 0);
39680
39681 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39682
39683 emit_label (donelab);
39684 }
39685 \f
39686 /* AVX512F does support 64-byte integer vector operations,
39687 thus the longest vector we are faced with is V64QImode. */
39688 #define MAX_VECT_LEN 64
39689
39690 struct expand_vec_perm_d
39691 {
39692 rtx target, op0, op1;
39693 unsigned char perm[MAX_VECT_LEN];
39694 enum machine_mode vmode;
39695 unsigned char nelt;
39696 bool one_operand_p;
39697 bool testing_p;
39698 };
39699
39700 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39701 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39702 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39703 static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
39704
39705 /* Get a vector mode of the same size as the original but with elements
39706 twice as wide. This is only guaranteed to apply to integral vectors. */
39707
39708 static inline enum machine_mode
39709 get_mode_wider_vector (enum machine_mode o)
39710 {
39711 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39712 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39713 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39714 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39715 return n;
39716 }
39717
39718 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39719 fill target with val via vec_duplicate. */
39720
39721 static bool
39722 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39723 {
39724 bool ok;
39725 rtx_insn *insn;
39726 rtx dup;
39727
39728 /* First attempt to recognize VAL as-is. */
39729 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39730 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39731 if (recog_memoized (insn) < 0)
39732 {
39733 rtx_insn *seq;
39734 /* If that fails, force VAL into a register. */
39735
39736 start_sequence ();
39737 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39738 seq = get_insns ();
39739 end_sequence ();
39740 if (seq)
39741 emit_insn_before (seq, insn);
39742
39743 ok = recog_memoized (insn) >= 0;
39744 gcc_assert (ok);
39745 }
39746 return true;
39747 }
39748
39749 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39750 with all elements equal to VAR. Return true if successful. */
39751
39752 static bool
39753 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39754 rtx target, rtx val)
39755 {
39756 bool ok;
39757
39758 switch (mode)
39759 {
39760 case V2SImode:
39761 case V2SFmode:
39762 if (!mmx_ok)
39763 return false;
39764 /* FALLTHRU */
39765
39766 case V4DFmode:
39767 case V4DImode:
39768 case V8SFmode:
39769 case V8SImode:
39770 case V2DFmode:
39771 case V2DImode:
39772 case V4SFmode:
39773 case V4SImode:
39774 case V16SImode:
39775 case V8DImode:
39776 case V16SFmode:
39777 case V8DFmode:
39778 return ix86_vector_duplicate_value (mode, target, val);
39779
39780 case V4HImode:
39781 if (!mmx_ok)
39782 return false;
39783 if (TARGET_SSE || TARGET_3DNOW_A)
39784 {
39785 rtx x;
39786
39787 val = gen_lowpart (SImode, val);
39788 x = gen_rtx_TRUNCATE (HImode, val);
39789 x = gen_rtx_VEC_DUPLICATE (mode, x);
39790 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39791 return true;
39792 }
39793 goto widen;
39794
39795 case V8QImode:
39796 if (!mmx_ok)
39797 return false;
39798 goto widen;
39799
39800 case V8HImode:
39801 if (TARGET_SSE2)
39802 {
39803 struct expand_vec_perm_d dperm;
39804 rtx tmp1, tmp2;
39805
39806 permute:
39807 memset (&dperm, 0, sizeof (dperm));
39808 dperm.target = target;
39809 dperm.vmode = mode;
39810 dperm.nelt = GET_MODE_NUNITS (mode);
39811 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39812 dperm.one_operand_p = true;
39813
39814 /* Extend to SImode using a paradoxical SUBREG. */
39815 tmp1 = gen_reg_rtx (SImode);
39816 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39817
39818 /* Insert the SImode value as low element of a V4SImode vector. */
39819 tmp2 = gen_reg_rtx (V4SImode);
39820 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39821 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39822
39823 ok = (expand_vec_perm_1 (&dperm)
39824 || expand_vec_perm_broadcast_1 (&dperm));
39825 gcc_assert (ok);
39826 return ok;
39827 }
39828 goto widen;
39829
39830 case V16QImode:
39831 if (TARGET_SSE2)
39832 goto permute;
39833 goto widen;
39834
39835 widen:
39836 /* Replicate the value once into the next wider mode and recurse. */
39837 {
39838 enum machine_mode smode, wsmode, wvmode;
39839 rtx x;
39840
39841 smode = GET_MODE_INNER (mode);
39842 wvmode = get_mode_wider_vector (mode);
39843 wsmode = GET_MODE_INNER (wvmode);
39844
39845 val = convert_modes (wsmode, smode, val, true);
39846 x = expand_simple_binop (wsmode, ASHIFT, val,
39847 GEN_INT (GET_MODE_BITSIZE (smode)),
39848 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39849 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39850
39851 x = gen_reg_rtx (wvmode);
39852 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39853 gcc_assert (ok);
39854 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39855 return ok;
39856 }
39857
39858 case V16HImode:
39859 case V32QImode:
39860 {
39861 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39862 rtx x = gen_reg_rtx (hvmode);
39863
39864 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39865 gcc_assert (ok);
39866
39867 x = gen_rtx_VEC_CONCAT (mode, x, x);
39868 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39869 }
39870 return true;
39871
39872 default:
39873 return false;
39874 }
39875 }
39876
39877 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39878 whose ONE_VAR element is VAR, and other elements are zero. Return true
39879 if successful. */
39880
39881 static bool
39882 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39883 rtx target, rtx var, int one_var)
39884 {
39885 enum machine_mode vsimode;
39886 rtx new_target;
39887 rtx x, tmp;
39888 bool use_vector_set = false;
39889
39890 switch (mode)
39891 {
39892 case V2DImode:
39893 /* For SSE4.1, we normally use vector set. But if the second
39894 element is zero and inter-unit moves are OK, we use movq
39895 instead. */
39896 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39897 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39898 && one_var == 0));
39899 break;
39900 case V16QImode:
39901 case V4SImode:
39902 case V4SFmode:
39903 use_vector_set = TARGET_SSE4_1;
39904 break;
39905 case V8HImode:
39906 use_vector_set = TARGET_SSE2;
39907 break;
39908 case V4HImode:
39909 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39910 break;
39911 case V32QImode:
39912 case V16HImode:
39913 case V8SImode:
39914 case V8SFmode:
39915 case V4DFmode:
39916 use_vector_set = TARGET_AVX;
39917 break;
39918 case V4DImode:
39919 /* Use ix86_expand_vector_set in 64bit mode only. */
39920 use_vector_set = TARGET_AVX && TARGET_64BIT;
39921 break;
39922 default:
39923 break;
39924 }
39925
39926 if (use_vector_set)
39927 {
39928 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39929 var = force_reg (GET_MODE_INNER (mode), var);
39930 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39931 return true;
39932 }
39933
39934 switch (mode)
39935 {
39936 case V2SFmode:
39937 case V2SImode:
39938 if (!mmx_ok)
39939 return false;
39940 /* FALLTHRU */
39941
39942 case V2DFmode:
39943 case V2DImode:
39944 if (one_var != 0)
39945 return false;
39946 var = force_reg (GET_MODE_INNER (mode), var);
39947 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39948 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39949 return true;
39950
39951 case V4SFmode:
39952 case V4SImode:
39953 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39954 new_target = gen_reg_rtx (mode);
39955 else
39956 new_target = target;
39957 var = force_reg (GET_MODE_INNER (mode), var);
39958 x = gen_rtx_VEC_DUPLICATE (mode, var);
39959 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39960 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39961 if (one_var != 0)
39962 {
39963 /* We need to shuffle the value to the correct position, so
39964 create a new pseudo to store the intermediate result. */
39965
39966 /* With SSE2, we can use the integer shuffle insns. */
39967 if (mode != V4SFmode && TARGET_SSE2)
39968 {
39969 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39970 const1_rtx,
39971 GEN_INT (one_var == 1 ? 0 : 1),
39972 GEN_INT (one_var == 2 ? 0 : 1),
39973 GEN_INT (one_var == 3 ? 0 : 1)));
39974 if (target != new_target)
39975 emit_move_insn (target, new_target);
39976 return true;
39977 }
39978
39979 /* Otherwise convert the intermediate result to V4SFmode and
39980 use the SSE1 shuffle instructions. */
39981 if (mode != V4SFmode)
39982 {
39983 tmp = gen_reg_rtx (V4SFmode);
39984 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39985 }
39986 else
39987 tmp = new_target;
39988
39989 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39990 const1_rtx,
39991 GEN_INT (one_var == 1 ? 0 : 1),
39992 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39993 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39994
39995 if (mode != V4SFmode)
39996 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39997 else if (tmp != target)
39998 emit_move_insn (target, tmp);
39999 }
40000 else if (target != new_target)
40001 emit_move_insn (target, new_target);
40002 return true;
40003
40004 case V8HImode:
40005 case V16QImode:
40006 vsimode = V4SImode;
40007 goto widen;
40008 case V4HImode:
40009 case V8QImode:
40010 if (!mmx_ok)
40011 return false;
40012 vsimode = V2SImode;
40013 goto widen;
40014 widen:
40015 if (one_var != 0)
40016 return false;
40017
40018 /* Zero extend the variable element to SImode and recurse. */
40019 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
40020
40021 x = gen_reg_rtx (vsimode);
40022 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
40023 var, one_var))
40024 gcc_unreachable ();
40025
40026 emit_move_insn (target, gen_lowpart (mode, x));
40027 return true;
40028
40029 default:
40030 return false;
40031 }
40032 }
40033
40034 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
40035 consisting of the values in VALS. It is known that all elements
40036 except ONE_VAR are constants. Return true if successful. */
40037
40038 static bool
40039 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
40040 rtx target, rtx vals, int one_var)
40041 {
40042 rtx var = XVECEXP (vals, 0, one_var);
40043 enum machine_mode wmode;
40044 rtx const_vec, x;
40045
40046 const_vec = copy_rtx (vals);
40047 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
40048 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
40049
40050 switch (mode)
40051 {
40052 case V2DFmode:
40053 case V2DImode:
40054 case V2SFmode:
40055 case V2SImode:
40056 /* For the two element vectors, it's just as easy to use
40057 the general case. */
40058 return false;
40059
40060 case V4DImode:
40061 /* Use ix86_expand_vector_set in 64bit mode only. */
40062 if (!TARGET_64BIT)
40063 return false;
40064 case V4DFmode:
40065 case V8SFmode:
40066 case V8SImode:
40067 case V16HImode:
40068 case V32QImode:
40069 case V4SFmode:
40070 case V4SImode:
40071 case V8HImode:
40072 case V4HImode:
40073 break;
40074
40075 case V16QImode:
40076 if (TARGET_SSE4_1)
40077 break;
40078 wmode = V8HImode;
40079 goto widen;
40080 case V8QImode:
40081 wmode = V4HImode;
40082 goto widen;
40083 widen:
40084 /* There's no way to set one QImode entry easily. Combine
40085 the variable value with its adjacent constant value, and
40086 promote to an HImode set. */
40087 x = XVECEXP (vals, 0, one_var ^ 1);
40088 if (one_var & 1)
40089 {
40090 var = convert_modes (HImode, QImode, var, true);
40091 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
40092 NULL_RTX, 1, OPTAB_LIB_WIDEN);
40093 x = GEN_INT (INTVAL (x) & 0xff);
40094 }
40095 else
40096 {
40097 var = convert_modes (HImode, QImode, var, true);
40098 x = gen_int_mode (INTVAL (x) << 8, HImode);
40099 }
40100 if (x != const0_rtx)
40101 var = expand_simple_binop (HImode, IOR, var, x, var,
40102 1, OPTAB_LIB_WIDEN);
40103
40104 x = gen_reg_rtx (wmode);
40105 emit_move_insn (x, gen_lowpart (wmode, const_vec));
40106 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
40107
40108 emit_move_insn (target, gen_lowpart (mode, x));
40109 return true;
40110
40111 default:
40112 return false;
40113 }
40114
40115 emit_move_insn (target, const_vec);
40116 ix86_expand_vector_set (mmx_ok, target, var, one_var);
40117 return true;
40118 }
40119
40120 /* A subroutine of ix86_expand_vector_init_general. Use vector
40121 concatenate to handle the most general case: all values variable,
40122 and none identical. */
40123
40124 static void
40125 ix86_expand_vector_init_concat (enum machine_mode mode,
40126 rtx target, rtx *ops, int n)
40127 {
40128 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
40129 rtx first[16], second[8], third[4];
40130 rtvec v;
40131 int i, j;
40132
40133 switch (n)
40134 {
40135 case 2:
40136 switch (mode)
40137 {
40138 case V16SImode:
40139 cmode = V8SImode;
40140 break;
40141 case V16SFmode:
40142 cmode = V8SFmode;
40143 break;
40144 case V8DImode:
40145 cmode = V4DImode;
40146 break;
40147 case V8DFmode:
40148 cmode = V4DFmode;
40149 break;
40150 case V8SImode:
40151 cmode = V4SImode;
40152 break;
40153 case V8SFmode:
40154 cmode = V4SFmode;
40155 break;
40156 case V4DImode:
40157 cmode = V2DImode;
40158 break;
40159 case V4DFmode:
40160 cmode = V2DFmode;
40161 break;
40162 case V4SImode:
40163 cmode = V2SImode;
40164 break;
40165 case V4SFmode:
40166 cmode = V2SFmode;
40167 break;
40168 case V2DImode:
40169 cmode = DImode;
40170 break;
40171 case V2SImode:
40172 cmode = SImode;
40173 break;
40174 case V2DFmode:
40175 cmode = DFmode;
40176 break;
40177 case V2SFmode:
40178 cmode = SFmode;
40179 break;
40180 default:
40181 gcc_unreachable ();
40182 }
40183
40184 if (!register_operand (ops[1], cmode))
40185 ops[1] = force_reg (cmode, ops[1]);
40186 if (!register_operand (ops[0], cmode))
40187 ops[0] = force_reg (cmode, ops[0]);
40188 emit_insn (gen_rtx_SET (VOIDmode, target,
40189 gen_rtx_VEC_CONCAT (mode, ops[0],
40190 ops[1])));
40191 break;
40192
40193 case 4:
40194 switch (mode)
40195 {
40196 case V4DImode:
40197 cmode = V2DImode;
40198 break;
40199 case V4DFmode:
40200 cmode = V2DFmode;
40201 break;
40202 case V4SImode:
40203 cmode = V2SImode;
40204 break;
40205 case V4SFmode:
40206 cmode = V2SFmode;
40207 break;
40208 default:
40209 gcc_unreachable ();
40210 }
40211 goto half;
40212
40213 case 8:
40214 switch (mode)
40215 {
40216 case V8DImode:
40217 cmode = V2DImode;
40218 hmode = V4DImode;
40219 break;
40220 case V8DFmode:
40221 cmode = V2DFmode;
40222 hmode = V4DFmode;
40223 break;
40224 case V8SImode:
40225 cmode = V2SImode;
40226 hmode = V4SImode;
40227 break;
40228 case V8SFmode:
40229 cmode = V2SFmode;
40230 hmode = V4SFmode;
40231 break;
40232 default:
40233 gcc_unreachable ();
40234 }
40235 goto half;
40236
40237 case 16:
40238 switch (mode)
40239 {
40240 case V16SImode:
40241 cmode = V2SImode;
40242 hmode = V4SImode;
40243 gmode = V8SImode;
40244 break;
40245 case V16SFmode:
40246 cmode = V2SFmode;
40247 hmode = V4SFmode;
40248 gmode = V8SFmode;
40249 break;
40250 default:
40251 gcc_unreachable ();
40252 }
40253 goto half;
40254
40255 half:
40256 /* FIXME: We process inputs backward to help RA. PR 36222. */
40257 i = n - 1;
40258 j = (n >> 1) - 1;
40259 for (; i > 0; i -= 2, j--)
40260 {
40261 first[j] = gen_reg_rtx (cmode);
40262 v = gen_rtvec (2, ops[i - 1], ops[i]);
40263 ix86_expand_vector_init (false, first[j],
40264 gen_rtx_PARALLEL (cmode, v));
40265 }
40266
40267 n >>= 1;
40268 if (n > 4)
40269 {
40270 gcc_assert (hmode != VOIDmode);
40271 gcc_assert (gmode != VOIDmode);
40272 for (i = j = 0; i < n; i += 2, j++)
40273 {
40274 second[j] = gen_reg_rtx (hmode);
40275 ix86_expand_vector_init_concat (hmode, second [j],
40276 &first [i], 2);
40277 }
40278 n >>= 1;
40279 for (i = j = 0; i < n; i += 2, j++)
40280 {
40281 third[j] = gen_reg_rtx (gmode);
40282 ix86_expand_vector_init_concat (gmode, third[j],
40283 &second[i], 2);
40284 }
40285 n >>= 1;
40286 ix86_expand_vector_init_concat (mode, target, third, n);
40287 }
40288 else if (n > 2)
40289 {
40290 gcc_assert (hmode != VOIDmode);
40291 for (i = j = 0; i < n; i += 2, j++)
40292 {
40293 second[j] = gen_reg_rtx (hmode);
40294 ix86_expand_vector_init_concat (hmode, second [j],
40295 &first [i], 2);
40296 }
40297 n >>= 1;
40298 ix86_expand_vector_init_concat (mode, target, second, n);
40299 }
40300 else
40301 ix86_expand_vector_init_concat (mode, target, first, n);
40302 break;
40303
40304 default:
40305 gcc_unreachable ();
40306 }
40307 }
40308
40309 /* A subroutine of ix86_expand_vector_init_general. Use vector
40310 interleave to handle the most general case: all values variable,
40311 and none identical. */
40312
40313 static void
40314 ix86_expand_vector_init_interleave (enum machine_mode mode,
40315 rtx target, rtx *ops, int n)
40316 {
40317 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40318 int i, j;
40319 rtx op0, op1;
40320 rtx (*gen_load_even) (rtx, rtx, rtx);
40321 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40322 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40323
40324 switch (mode)
40325 {
40326 case V8HImode:
40327 gen_load_even = gen_vec_setv8hi;
40328 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40329 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40330 inner_mode = HImode;
40331 first_imode = V4SImode;
40332 second_imode = V2DImode;
40333 third_imode = VOIDmode;
40334 break;
40335 case V16QImode:
40336 gen_load_even = gen_vec_setv16qi;
40337 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40338 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40339 inner_mode = QImode;
40340 first_imode = V8HImode;
40341 second_imode = V4SImode;
40342 third_imode = V2DImode;
40343 break;
40344 default:
40345 gcc_unreachable ();
40346 }
40347
40348 for (i = 0; i < n; i++)
40349 {
40350 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40351 op0 = gen_reg_rtx (SImode);
40352 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40353
40354 /* Insert the SImode value as low element of V4SImode vector. */
40355 op1 = gen_reg_rtx (V4SImode);
40356 op0 = gen_rtx_VEC_MERGE (V4SImode,
40357 gen_rtx_VEC_DUPLICATE (V4SImode,
40358 op0),
40359 CONST0_RTX (V4SImode),
40360 const1_rtx);
40361 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40362
40363 /* Cast the V4SImode vector back to a vector in orignal mode. */
40364 op0 = gen_reg_rtx (mode);
40365 emit_move_insn (op0, gen_lowpart (mode, op1));
40366
40367 /* Load even elements into the second position. */
40368 emit_insn (gen_load_even (op0,
40369 force_reg (inner_mode,
40370 ops [i + i + 1]),
40371 const1_rtx));
40372
40373 /* Cast vector to FIRST_IMODE vector. */
40374 ops[i] = gen_reg_rtx (first_imode);
40375 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40376 }
40377
40378 /* Interleave low FIRST_IMODE vectors. */
40379 for (i = j = 0; i < n; i += 2, j++)
40380 {
40381 op0 = gen_reg_rtx (first_imode);
40382 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40383
40384 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40385 ops[j] = gen_reg_rtx (second_imode);
40386 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40387 }
40388
40389 /* Interleave low SECOND_IMODE vectors. */
40390 switch (second_imode)
40391 {
40392 case V4SImode:
40393 for (i = j = 0; i < n / 2; i += 2, j++)
40394 {
40395 op0 = gen_reg_rtx (second_imode);
40396 emit_insn (gen_interleave_second_low (op0, ops[i],
40397 ops[i + 1]));
40398
40399 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40400 vector. */
40401 ops[j] = gen_reg_rtx (third_imode);
40402 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40403 }
40404 second_imode = V2DImode;
40405 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40406 /* FALLTHRU */
40407
40408 case V2DImode:
40409 op0 = gen_reg_rtx (second_imode);
40410 emit_insn (gen_interleave_second_low (op0, ops[0],
40411 ops[1]));
40412
40413 /* Cast the SECOND_IMODE vector back to a vector on original
40414 mode. */
40415 emit_insn (gen_rtx_SET (VOIDmode, target,
40416 gen_lowpart (mode, op0)));
40417 break;
40418
40419 default:
40420 gcc_unreachable ();
40421 }
40422 }
40423
40424 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40425 all values variable, and none identical. */
40426
40427 static void
40428 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40429 rtx target, rtx vals)
40430 {
40431 rtx ops[64], op0, op1;
40432 enum machine_mode half_mode = VOIDmode;
40433 int n, i;
40434
40435 switch (mode)
40436 {
40437 case V2SFmode:
40438 case V2SImode:
40439 if (!mmx_ok && !TARGET_SSE)
40440 break;
40441 /* FALLTHRU */
40442
40443 case V16SImode:
40444 case V16SFmode:
40445 case V8DFmode:
40446 case V8DImode:
40447 case V8SFmode:
40448 case V8SImode:
40449 case V4DFmode:
40450 case V4DImode:
40451 case V4SFmode:
40452 case V4SImode:
40453 case V2DFmode:
40454 case V2DImode:
40455 n = GET_MODE_NUNITS (mode);
40456 for (i = 0; i < n; i++)
40457 ops[i] = XVECEXP (vals, 0, i);
40458 ix86_expand_vector_init_concat (mode, target, ops, n);
40459 return;
40460
40461 case V32QImode:
40462 half_mode = V16QImode;
40463 goto half;
40464
40465 case V16HImode:
40466 half_mode = V8HImode;
40467 goto half;
40468
40469 half:
40470 n = GET_MODE_NUNITS (mode);
40471 for (i = 0; i < n; i++)
40472 ops[i] = XVECEXP (vals, 0, i);
40473 op0 = gen_reg_rtx (half_mode);
40474 op1 = gen_reg_rtx (half_mode);
40475 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40476 n >> 2);
40477 ix86_expand_vector_init_interleave (half_mode, op1,
40478 &ops [n >> 1], n >> 2);
40479 emit_insn (gen_rtx_SET (VOIDmode, target,
40480 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40481 return;
40482
40483 case V16QImode:
40484 if (!TARGET_SSE4_1)
40485 break;
40486 /* FALLTHRU */
40487
40488 case V8HImode:
40489 if (!TARGET_SSE2)
40490 break;
40491
40492 /* Don't use ix86_expand_vector_init_interleave if we can't
40493 move from GPR to SSE register directly. */
40494 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40495 break;
40496
40497 n = GET_MODE_NUNITS (mode);
40498 for (i = 0; i < n; i++)
40499 ops[i] = XVECEXP (vals, 0, i);
40500 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40501 return;
40502
40503 case V4HImode:
40504 case V8QImode:
40505 break;
40506
40507 default:
40508 gcc_unreachable ();
40509 }
40510
40511 {
40512 int i, j, n_elts, n_words, n_elt_per_word;
40513 enum machine_mode inner_mode;
40514 rtx words[4], shift;
40515
40516 inner_mode = GET_MODE_INNER (mode);
40517 n_elts = GET_MODE_NUNITS (mode);
40518 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40519 n_elt_per_word = n_elts / n_words;
40520 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40521
40522 for (i = 0; i < n_words; ++i)
40523 {
40524 rtx word = NULL_RTX;
40525
40526 for (j = 0; j < n_elt_per_word; ++j)
40527 {
40528 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40529 elt = convert_modes (word_mode, inner_mode, elt, true);
40530
40531 if (j == 0)
40532 word = elt;
40533 else
40534 {
40535 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40536 word, 1, OPTAB_LIB_WIDEN);
40537 word = expand_simple_binop (word_mode, IOR, word, elt,
40538 word, 1, OPTAB_LIB_WIDEN);
40539 }
40540 }
40541
40542 words[i] = word;
40543 }
40544
40545 if (n_words == 1)
40546 emit_move_insn (target, gen_lowpart (mode, words[0]));
40547 else if (n_words == 2)
40548 {
40549 rtx tmp = gen_reg_rtx (mode);
40550 emit_clobber (tmp);
40551 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40552 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40553 emit_move_insn (target, tmp);
40554 }
40555 else if (n_words == 4)
40556 {
40557 rtx tmp = gen_reg_rtx (V4SImode);
40558 gcc_assert (word_mode == SImode);
40559 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40560 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40561 emit_move_insn (target, gen_lowpart (mode, tmp));
40562 }
40563 else
40564 gcc_unreachable ();
40565 }
40566 }
40567
40568 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40569 instructions unless MMX_OK is true. */
40570
40571 void
40572 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40573 {
40574 enum machine_mode mode = GET_MODE (target);
40575 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40576 int n_elts = GET_MODE_NUNITS (mode);
40577 int n_var = 0, one_var = -1;
40578 bool all_same = true, all_const_zero = true;
40579 int i;
40580 rtx x;
40581
40582 for (i = 0; i < n_elts; ++i)
40583 {
40584 x = XVECEXP (vals, 0, i);
40585 if (!(CONST_INT_P (x)
40586 || GET_CODE (x) == CONST_DOUBLE
40587 || GET_CODE (x) == CONST_FIXED))
40588 n_var++, one_var = i;
40589 else if (x != CONST0_RTX (inner_mode))
40590 all_const_zero = false;
40591 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40592 all_same = false;
40593 }
40594
40595 /* Constants are best loaded from the constant pool. */
40596 if (n_var == 0)
40597 {
40598 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40599 return;
40600 }
40601
40602 /* If all values are identical, broadcast the value. */
40603 if (all_same
40604 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40605 XVECEXP (vals, 0, 0)))
40606 return;
40607
40608 /* Values where only one field is non-constant are best loaded from
40609 the pool and overwritten via move later. */
40610 if (n_var == 1)
40611 {
40612 if (all_const_zero
40613 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40614 XVECEXP (vals, 0, one_var),
40615 one_var))
40616 return;
40617
40618 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40619 return;
40620 }
40621
40622 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40623 }
40624
40625 void
40626 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40627 {
40628 enum machine_mode mode = GET_MODE (target);
40629 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40630 enum machine_mode half_mode;
40631 bool use_vec_merge = false;
40632 rtx tmp;
40633 static rtx (*gen_extract[6][2]) (rtx, rtx)
40634 = {
40635 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40636 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40637 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40638 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40639 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40640 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40641 };
40642 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40643 = {
40644 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40645 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40646 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40647 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40648 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40649 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40650 };
40651 int i, j, n;
40652
40653 switch (mode)
40654 {
40655 case V2SFmode:
40656 case V2SImode:
40657 if (mmx_ok)
40658 {
40659 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40660 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40661 if (elt == 0)
40662 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40663 else
40664 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40665 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40666 return;
40667 }
40668 break;
40669
40670 case V2DImode:
40671 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40672 if (use_vec_merge)
40673 break;
40674
40675 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40676 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40677 if (elt == 0)
40678 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40679 else
40680 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40681 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40682 return;
40683
40684 case V2DFmode:
40685 {
40686 rtx op0, op1;
40687
40688 /* For the two element vectors, we implement a VEC_CONCAT with
40689 the extraction of the other element. */
40690
40691 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40692 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40693
40694 if (elt == 0)
40695 op0 = val, op1 = tmp;
40696 else
40697 op0 = tmp, op1 = val;
40698
40699 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40700 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40701 }
40702 return;
40703
40704 case V4SFmode:
40705 use_vec_merge = TARGET_SSE4_1;
40706 if (use_vec_merge)
40707 break;
40708
40709 switch (elt)
40710 {
40711 case 0:
40712 use_vec_merge = true;
40713 break;
40714
40715 case 1:
40716 /* tmp = target = A B C D */
40717 tmp = copy_to_reg (target);
40718 /* target = A A B B */
40719 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40720 /* target = X A B B */
40721 ix86_expand_vector_set (false, target, val, 0);
40722 /* target = A X C D */
40723 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40724 const1_rtx, const0_rtx,
40725 GEN_INT (2+4), GEN_INT (3+4)));
40726 return;
40727
40728 case 2:
40729 /* tmp = target = A B C D */
40730 tmp = copy_to_reg (target);
40731 /* tmp = X B C D */
40732 ix86_expand_vector_set (false, tmp, val, 0);
40733 /* target = A B X D */
40734 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40735 const0_rtx, const1_rtx,
40736 GEN_INT (0+4), GEN_INT (3+4)));
40737 return;
40738
40739 case 3:
40740 /* tmp = target = A B C D */
40741 tmp = copy_to_reg (target);
40742 /* tmp = X B C D */
40743 ix86_expand_vector_set (false, tmp, val, 0);
40744 /* target = A B X D */
40745 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40746 const0_rtx, const1_rtx,
40747 GEN_INT (2+4), GEN_INT (0+4)));
40748 return;
40749
40750 default:
40751 gcc_unreachable ();
40752 }
40753 break;
40754
40755 case V4SImode:
40756 use_vec_merge = TARGET_SSE4_1;
40757 if (use_vec_merge)
40758 break;
40759
40760 /* Element 0 handled by vec_merge below. */
40761 if (elt == 0)
40762 {
40763 use_vec_merge = true;
40764 break;
40765 }
40766
40767 if (TARGET_SSE2)
40768 {
40769 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40770 store into element 0, then shuffle them back. */
40771
40772 rtx order[4];
40773
40774 order[0] = GEN_INT (elt);
40775 order[1] = const1_rtx;
40776 order[2] = const2_rtx;
40777 order[3] = GEN_INT (3);
40778 order[elt] = const0_rtx;
40779
40780 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40781 order[1], order[2], order[3]));
40782
40783 ix86_expand_vector_set (false, target, val, 0);
40784
40785 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40786 order[1], order[2], order[3]));
40787 }
40788 else
40789 {
40790 /* For SSE1, we have to reuse the V4SF code. */
40791 rtx t = gen_reg_rtx (V4SFmode);
40792 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40793 emit_move_insn (target, gen_lowpart (mode, t));
40794 }
40795 return;
40796
40797 case V8HImode:
40798 use_vec_merge = TARGET_SSE2;
40799 break;
40800 case V4HImode:
40801 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40802 break;
40803
40804 case V16QImode:
40805 use_vec_merge = TARGET_SSE4_1;
40806 break;
40807
40808 case V8QImode:
40809 break;
40810
40811 case V32QImode:
40812 half_mode = V16QImode;
40813 j = 0;
40814 n = 16;
40815 goto half;
40816
40817 case V16HImode:
40818 half_mode = V8HImode;
40819 j = 1;
40820 n = 8;
40821 goto half;
40822
40823 case V8SImode:
40824 half_mode = V4SImode;
40825 j = 2;
40826 n = 4;
40827 goto half;
40828
40829 case V4DImode:
40830 half_mode = V2DImode;
40831 j = 3;
40832 n = 2;
40833 goto half;
40834
40835 case V8SFmode:
40836 half_mode = V4SFmode;
40837 j = 4;
40838 n = 4;
40839 goto half;
40840
40841 case V4DFmode:
40842 half_mode = V2DFmode;
40843 j = 5;
40844 n = 2;
40845 goto half;
40846
40847 half:
40848 /* Compute offset. */
40849 i = elt / n;
40850 elt %= n;
40851
40852 gcc_assert (i <= 1);
40853
40854 /* Extract the half. */
40855 tmp = gen_reg_rtx (half_mode);
40856 emit_insn (gen_extract[j][i] (tmp, target));
40857
40858 /* Put val in tmp at elt. */
40859 ix86_expand_vector_set (false, tmp, val, elt);
40860
40861 /* Put it back. */
40862 emit_insn (gen_insert[j][i] (target, target, tmp));
40863 return;
40864
40865 default:
40866 break;
40867 }
40868
40869 if (use_vec_merge)
40870 {
40871 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40872 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40873 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40874 }
40875 else
40876 {
40877 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40878
40879 emit_move_insn (mem, target);
40880
40881 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40882 emit_move_insn (tmp, val);
40883
40884 emit_move_insn (target, mem);
40885 }
40886 }
40887
40888 void
40889 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40890 {
40891 enum machine_mode mode = GET_MODE (vec);
40892 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40893 bool use_vec_extr = false;
40894 rtx tmp;
40895
40896 switch (mode)
40897 {
40898 case V2SImode:
40899 case V2SFmode:
40900 if (!mmx_ok)
40901 break;
40902 /* FALLTHRU */
40903
40904 case V2DFmode:
40905 case V2DImode:
40906 use_vec_extr = true;
40907 break;
40908
40909 case V4SFmode:
40910 use_vec_extr = TARGET_SSE4_1;
40911 if (use_vec_extr)
40912 break;
40913
40914 switch (elt)
40915 {
40916 case 0:
40917 tmp = vec;
40918 break;
40919
40920 case 1:
40921 case 3:
40922 tmp = gen_reg_rtx (mode);
40923 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40924 GEN_INT (elt), GEN_INT (elt),
40925 GEN_INT (elt+4), GEN_INT (elt+4)));
40926 break;
40927
40928 case 2:
40929 tmp = gen_reg_rtx (mode);
40930 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40931 break;
40932
40933 default:
40934 gcc_unreachable ();
40935 }
40936 vec = tmp;
40937 use_vec_extr = true;
40938 elt = 0;
40939 break;
40940
40941 case V4SImode:
40942 use_vec_extr = TARGET_SSE4_1;
40943 if (use_vec_extr)
40944 break;
40945
40946 if (TARGET_SSE2)
40947 {
40948 switch (elt)
40949 {
40950 case 0:
40951 tmp = vec;
40952 break;
40953
40954 case 1:
40955 case 3:
40956 tmp = gen_reg_rtx (mode);
40957 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40958 GEN_INT (elt), GEN_INT (elt),
40959 GEN_INT (elt), GEN_INT (elt)));
40960 break;
40961
40962 case 2:
40963 tmp = gen_reg_rtx (mode);
40964 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40965 break;
40966
40967 default:
40968 gcc_unreachable ();
40969 }
40970 vec = tmp;
40971 use_vec_extr = true;
40972 elt = 0;
40973 }
40974 else
40975 {
40976 /* For SSE1, we have to reuse the V4SF code. */
40977 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40978 gen_lowpart (V4SFmode, vec), elt);
40979 return;
40980 }
40981 break;
40982
40983 case V8HImode:
40984 use_vec_extr = TARGET_SSE2;
40985 break;
40986 case V4HImode:
40987 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40988 break;
40989
40990 case V16QImode:
40991 use_vec_extr = TARGET_SSE4_1;
40992 break;
40993
40994 case V8SFmode:
40995 if (TARGET_AVX)
40996 {
40997 tmp = gen_reg_rtx (V4SFmode);
40998 if (elt < 4)
40999 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
41000 else
41001 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
41002 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41003 return;
41004 }
41005 break;
41006
41007 case V4DFmode:
41008 if (TARGET_AVX)
41009 {
41010 tmp = gen_reg_rtx (V2DFmode);
41011 if (elt < 2)
41012 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
41013 else
41014 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
41015 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41016 return;
41017 }
41018 break;
41019
41020 case V32QImode:
41021 if (TARGET_AVX)
41022 {
41023 tmp = gen_reg_rtx (V16QImode);
41024 if (elt < 16)
41025 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
41026 else
41027 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
41028 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41029 return;
41030 }
41031 break;
41032
41033 case V16HImode:
41034 if (TARGET_AVX)
41035 {
41036 tmp = gen_reg_rtx (V8HImode);
41037 if (elt < 8)
41038 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
41039 else
41040 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
41041 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41042 return;
41043 }
41044 break;
41045
41046 case V8SImode:
41047 if (TARGET_AVX)
41048 {
41049 tmp = gen_reg_rtx (V4SImode);
41050 if (elt < 4)
41051 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
41052 else
41053 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
41054 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41055 return;
41056 }
41057 break;
41058
41059 case V4DImode:
41060 if (TARGET_AVX)
41061 {
41062 tmp = gen_reg_rtx (V2DImode);
41063 if (elt < 2)
41064 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
41065 else
41066 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
41067 ix86_expand_vector_extract (false, target, tmp, elt & 1);
41068 return;
41069 }
41070 break;
41071
41072 case V32HImode:
41073 if (TARGET_AVX512BW)
41074 {
41075 tmp = gen_reg_rtx (V16HImode);
41076 if (elt < 16)
41077 emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
41078 else
41079 emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
41080 ix86_expand_vector_extract (false, target, tmp, elt & 15);
41081 return;
41082 }
41083 break;
41084
41085 case V64QImode:
41086 if (TARGET_AVX512BW)
41087 {
41088 tmp = gen_reg_rtx (V32QImode);
41089 if (elt < 32)
41090 emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
41091 else
41092 emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
41093 ix86_expand_vector_extract (false, target, tmp, elt & 31);
41094 return;
41095 }
41096 break;
41097
41098 case V16SFmode:
41099 tmp = gen_reg_rtx (V8SFmode);
41100 if (elt < 8)
41101 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
41102 else
41103 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
41104 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41105 return;
41106
41107 case V8DFmode:
41108 tmp = gen_reg_rtx (V4DFmode);
41109 if (elt < 4)
41110 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
41111 else
41112 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
41113 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41114 return;
41115
41116 case V16SImode:
41117 tmp = gen_reg_rtx (V8SImode);
41118 if (elt < 8)
41119 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
41120 else
41121 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
41122 ix86_expand_vector_extract (false, target, tmp, elt & 7);
41123 return;
41124
41125 case V8DImode:
41126 tmp = gen_reg_rtx (V4DImode);
41127 if (elt < 4)
41128 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
41129 else
41130 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
41131 ix86_expand_vector_extract (false, target, tmp, elt & 3);
41132 return;
41133
41134 case V8QImode:
41135 /* ??? Could extract the appropriate HImode element and shift. */
41136 default:
41137 break;
41138 }
41139
41140 if (use_vec_extr)
41141 {
41142 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
41143 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
41144
41145 /* Let the rtl optimizers know about the zero extension performed. */
41146 if (inner_mode == QImode || inner_mode == HImode)
41147 {
41148 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
41149 target = gen_lowpart (SImode, target);
41150 }
41151
41152 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
41153 }
41154 else
41155 {
41156 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
41157
41158 emit_move_insn (mem, vec);
41159
41160 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
41161 emit_move_insn (target, tmp);
41162 }
41163 }
41164
41165 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
41166 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
41167 The upper bits of DEST are undefined, though they shouldn't cause
41168 exceptions (some bits from src or all zeros are ok). */
41169
41170 static void
41171 emit_reduc_half (rtx dest, rtx src, int i)
41172 {
41173 rtx tem, d = dest;
41174 switch (GET_MODE (src))
41175 {
41176 case V4SFmode:
41177 if (i == 128)
41178 tem = gen_sse_movhlps (dest, src, src);
41179 else
41180 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
41181 GEN_INT (1 + 4), GEN_INT (1 + 4));
41182 break;
41183 case V2DFmode:
41184 tem = gen_vec_interleave_highv2df (dest, src, src);
41185 break;
41186 case V16QImode:
41187 case V8HImode:
41188 case V4SImode:
41189 case V2DImode:
41190 d = gen_reg_rtx (V1TImode);
41191 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
41192 GEN_INT (i / 2));
41193 break;
41194 case V8SFmode:
41195 if (i == 256)
41196 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
41197 else
41198 tem = gen_avx_shufps256 (dest, src, src,
41199 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
41200 break;
41201 case V4DFmode:
41202 if (i == 256)
41203 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
41204 else
41205 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
41206 break;
41207 case V32QImode:
41208 case V16HImode:
41209 case V8SImode:
41210 case V4DImode:
41211 if (i == 256)
41212 {
41213 if (GET_MODE (dest) != V4DImode)
41214 d = gen_reg_rtx (V4DImode);
41215 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
41216 gen_lowpart (V4DImode, src),
41217 const1_rtx);
41218 }
41219 else
41220 {
41221 d = gen_reg_rtx (V2TImode);
41222 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
41223 GEN_INT (i / 2));
41224 }
41225 break;
41226 case V64QImode:
41227 case V32HImode:
41228 case V16SImode:
41229 case V16SFmode:
41230 case V8DImode:
41231 case V8DFmode:
41232 if (i > 128)
41233 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
41234 gen_lowpart (V16SImode, src),
41235 gen_lowpart (V16SImode, src),
41236 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
41237 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
41238 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
41239 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
41240 GEN_INT (0xC), GEN_INT (0xD),
41241 GEN_INT (0xE), GEN_INT (0xF),
41242 GEN_INT (0x10), GEN_INT (0x11),
41243 GEN_INT (0x12), GEN_INT (0x13),
41244 GEN_INT (0x14), GEN_INT (0x15),
41245 GEN_INT (0x16), GEN_INT (0x17));
41246 else
41247 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
41248 gen_lowpart (V16SImode, src),
41249 GEN_INT (i == 128 ? 0x2 : 0x1),
41250 GEN_INT (0x3),
41251 GEN_INT (0x3),
41252 GEN_INT (0x3),
41253 GEN_INT (i == 128 ? 0x6 : 0x5),
41254 GEN_INT (0x7),
41255 GEN_INT (0x7),
41256 GEN_INT (0x7),
41257 GEN_INT (i == 128 ? 0xA : 0x9),
41258 GEN_INT (0xB),
41259 GEN_INT (0xB),
41260 GEN_INT (0xB),
41261 GEN_INT (i == 128 ? 0xE : 0xD),
41262 GEN_INT (0xF),
41263 GEN_INT (0xF),
41264 GEN_INT (0xF));
41265 break;
41266 default:
41267 gcc_unreachable ();
41268 }
41269 emit_insn (tem);
41270 if (d != dest)
41271 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
41272 }
41273
41274 /* Expand a vector reduction. FN is the binary pattern to reduce;
41275 DEST is the destination; IN is the input vector. */
41276
41277 void
41278 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
41279 {
41280 rtx half, dst, vec = in;
41281 enum machine_mode mode = GET_MODE (in);
41282 int i;
41283
41284 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
41285 if (TARGET_SSE4_1
41286 && mode == V8HImode
41287 && fn == gen_uminv8hi3)
41288 {
41289 emit_insn (gen_sse4_1_phminposuw (dest, in));
41290 return;
41291 }
41292
41293 for (i = GET_MODE_BITSIZE (mode);
41294 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41295 i >>= 1)
41296 {
41297 half = gen_reg_rtx (mode);
41298 emit_reduc_half (half, vec, i);
41299 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41300 dst = dest;
41301 else
41302 dst = gen_reg_rtx (mode);
41303 emit_insn (fn (dst, half, vec));
41304 vec = dst;
41305 }
41306 }
41307 \f
41308 /* Target hook for scalar_mode_supported_p. */
41309 static bool
41310 ix86_scalar_mode_supported_p (enum machine_mode mode)
41311 {
41312 if (DECIMAL_FLOAT_MODE_P (mode))
41313 return default_decimal_float_supported_p ();
41314 else if (mode == TFmode)
41315 return true;
41316 else
41317 return default_scalar_mode_supported_p (mode);
41318 }
41319
41320 /* Implements target hook vector_mode_supported_p. */
41321 static bool
41322 ix86_vector_mode_supported_p (enum machine_mode mode)
41323 {
41324 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41325 return true;
41326 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41327 return true;
41328 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41329 return true;
41330 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41331 return true;
41332 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41333 return true;
41334 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41335 return true;
41336 return false;
41337 }
41338
41339 /* Implement target hook libgcc_floating_mode_supported_p. */
41340 static bool
41341 ix86_libgcc_floating_mode_supported_p (enum machine_mode mode)
41342 {
41343 switch (mode)
41344 {
41345 case SFmode:
41346 case DFmode:
41347 case XFmode:
41348 return true;
41349
41350 case TFmode:
41351 #ifdef IX86_NO_LIBGCC_TFMODE
41352 return false;
41353 #elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
41354 return TARGET_LONG_DOUBLE_128;
41355 #else
41356 return true;
41357 #endif
41358
41359 default:
41360 return false;
41361 }
41362 }
41363
41364 /* Target hook for c_mode_for_suffix. */
41365 static enum machine_mode
41366 ix86_c_mode_for_suffix (char suffix)
41367 {
41368 if (suffix == 'q')
41369 return TFmode;
41370 if (suffix == 'w')
41371 return XFmode;
41372
41373 return VOIDmode;
41374 }
41375
41376 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41377
41378 We do this in the new i386 backend to maintain source compatibility
41379 with the old cc0-based compiler. */
41380
41381 static tree
41382 ix86_md_asm_clobbers (tree, tree, tree clobbers)
41383 {
41384 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41385 clobbers);
41386 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41387 clobbers);
41388 return clobbers;
41389 }
41390
41391 /* Implements target vector targetm.asm.encode_section_info. */
41392
41393 static void ATTRIBUTE_UNUSED
41394 ix86_encode_section_info (tree decl, rtx rtl, int first)
41395 {
41396 default_encode_section_info (decl, rtl, first);
41397
41398 if (TREE_CODE (decl) == VAR_DECL
41399 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41400 && ix86_in_large_data_p (decl))
41401 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41402 }
41403
41404 /* Worker function for REVERSE_CONDITION. */
41405
41406 enum rtx_code
41407 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41408 {
41409 return (mode != CCFPmode && mode != CCFPUmode
41410 ? reverse_condition (code)
41411 : reverse_condition_maybe_unordered (code));
41412 }
41413
41414 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41415 to OPERANDS[0]. */
41416
41417 const char *
41418 output_387_reg_move (rtx insn, rtx *operands)
41419 {
41420 if (REG_P (operands[0]))
41421 {
41422 if (REG_P (operands[1])
41423 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41424 {
41425 if (REGNO (operands[0]) == FIRST_STACK_REG)
41426 return output_387_ffreep (operands, 0);
41427 return "fstp\t%y0";
41428 }
41429 if (STACK_TOP_P (operands[0]))
41430 return "fld%Z1\t%y1";
41431 return "fst\t%y0";
41432 }
41433 else if (MEM_P (operands[0]))
41434 {
41435 gcc_assert (REG_P (operands[1]));
41436 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41437 return "fstp%Z0\t%y0";
41438 else
41439 {
41440 /* There is no non-popping store to memory for XFmode.
41441 So if we need one, follow the store with a load. */
41442 if (GET_MODE (operands[0]) == XFmode)
41443 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41444 else
41445 return "fst%Z0\t%y0";
41446 }
41447 }
41448 else
41449 gcc_unreachable();
41450 }
41451
41452 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41453 FP status register is set. */
41454
41455 void
41456 ix86_emit_fp_unordered_jump (rtx label)
41457 {
41458 rtx reg = gen_reg_rtx (HImode);
41459 rtx temp;
41460
41461 emit_insn (gen_x86_fnstsw_1 (reg));
41462
41463 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41464 {
41465 emit_insn (gen_x86_sahf_1 (reg));
41466
41467 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41468 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41469 }
41470 else
41471 {
41472 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41473
41474 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41475 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41476 }
41477
41478 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41479 gen_rtx_LABEL_REF (VOIDmode, label),
41480 pc_rtx);
41481 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41482
41483 emit_jump_insn (temp);
41484 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41485 }
41486
41487 /* Output code to perform a log1p XFmode calculation. */
41488
41489 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41490 {
41491 rtx_code_label *label1 = gen_label_rtx ();
41492 rtx_code_label *label2 = gen_label_rtx ();
41493
41494 rtx tmp = gen_reg_rtx (XFmode);
41495 rtx tmp2 = gen_reg_rtx (XFmode);
41496 rtx test;
41497
41498 emit_insn (gen_absxf2 (tmp, op1));
41499 test = gen_rtx_GE (VOIDmode, tmp,
41500 CONST_DOUBLE_FROM_REAL_VALUE (
41501 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41502 XFmode));
41503 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41504
41505 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41506 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41507 emit_jump (label2);
41508
41509 emit_label (label1);
41510 emit_move_insn (tmp, CONST1_RTX (XFmode));
41511 emit_insn (gen_addxf3 (tmp, op1, tmp));
41512 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41513 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41514
41515 emit_label (label2);
41516 }
41517
41518 /* Emit code for round calculation. */
41519 void ix86_emit_i387_round (rtx op0, rtx op1)
41520 {
41521 enum machine_mode inmode = GET_MODE (op1);
41522 enum machine_mode outmode = GET_MODE (op0);
41523 rtx e1, e2, res, tmp, tmp1, half;
41524 rtx scratch = gen_reg_rtx (HImode);
41525 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41526 rtx_code_label *jump_label = gen_label_rtx ();
41527 rtx insn;
41528 rtx (*gen_abs) (rtx, rtx);
41529 rtx (*gen_neg) (rtx, rtx);
41530
41531 switch (inmode)
41532 {
41533 case SFmode:
41534 gen_abs = gen_abssf2;
41535 break;
41536 case DFmode:
41537 gen_abs = gen_absdf2;
41538 break;
41539 case XFmode:
41540 gen_abs = gen_absxf2;
41541 break;
41542 default:
41543 gcc_unreachable ();
41544 }
41545
41546 switch (outmode)
41547 {
41548 case SFmode:
41549 gen_neg = gen_negsf2;
41550 break;
41551 case DFmode:
41552 gen_neg = gen_negdf2;
41553 break;
41554 case XFmode:
41555 gen_neg = gen_negxf2;
41556 break;
41557 case HImode:
41558 gen_neg = gen_neghi2;
41559 break;
41560 case SImode:
41561 gen_neg = gen_negsi2;
41562 break;
41563 case DImode:
41564 gen_neg = gen_negdi2;
41565 break;
41566 default:
41567 gcc_unreachable ();
41568 }
41569
41570 e1 = gen_reg_rtx (inmode);
41571 e2 = gen_reg_rtx (inmode);
41572 res = gen_reg_rtx (outmode);
41573
41574 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41575
41576 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41577
41578 /* scratch = fxam(op1) */
41579 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41580 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41581 UNSPEC_FXAM)));
41582 /* e1 = fabs(op1) */
41583 emit_insn (gen_abs (e1, op1));
41584
41585 /* e2 = e1 + 0.5 */
41586 half = force_reg (inmode, half);
41587 emit_insn (gen_rtx_SET (VOIDmode, e2,
41588 gen_rtx_PLUS (inmode, e1, half)));
41589
41590 /* res = floor(e2) */
41591 if (inmode != XFmode)
41592 {
41593 tmp1 = gen_reg_rtx (XFmode);
41594
41595 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41596 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41597 }
41598 else
41599 tmp1 = e2;
41600
41601 switch (outmode)
41602 {
41603 case SFmode:
41604 case DFmode:
41605 {
41606 rtx tmp0 = gen_reg_rtx (XFmode);
41607
41608 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41609
41610 emit_insn (gen_rtx_SET (VOIDmode, res,
41611 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41612 UNSPEC_TRUNC_NOOP)));
41613 }
41614 break;
41615 case XFmode:
41616 emit_insn (gen_frndintxf2_floor (res, tmp1));
41617 break;
41618 case HImode:
41619 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41620 break;
41621 case SImode:
41622 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41623 break;
41624 case DImode:
41625 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41626 break;
41627 default:
41628 gcc_unreachable ();
41629 }
41630
41631 /* flags = signbit(a) */
41632 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41633
41634 /* if (flags) then res = -res */
41635 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41636 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41637 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41638 pc_rtx);
41639 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41640 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41641 JUMP_LABEL (insn) = jump_label;
41642
41643 emit_insn (gen_neg (res, res));
41644
41645 emit_label (jump_label);
41646 LABEL_NUSES (jump_label) = 1;
41647
41648 emit_move_insn (op0, res);
41649 }
41650
41651 /* Output code to perform a Newton-Rhapson approximation of a single precision
41652 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41653
41654 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41655 {
41656 rtx x0, x1, e0, e1;
41657
41658 x0 = gen_reg_rtx (mode);
41659 e0 = gen_reg_rtx (mode);
41660 e1 = gen_reg_rtx (mode);
41661 x1 = gen_reg_rtx (mode);
41662
41663 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41664
41665 b = force_reg (mode, b);
41666
41667 /* x0 = rcp(b) estimate */
41668 if (mode == V16SFmode || mode == V8DFmode)
41669 emit_insn (gen_rtx_SET (VOIDmode, x0,
41670 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41671 UNSPEC_RCP14)));
41672 else
41673 emit_insn (gen_rtx_SET (VOIDmode, x0,
41674 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41675 UNSPEC_RCP)));
41676
41677 /* e0 = x0 * b */
41678 emit_insn (gen_rtx_SET (VOIDmode, e0,
41679 gen_rtx_MULT (mode, x0, b)));
41680
41681 /* e0 = x0 * e0 */
41682 emit_insn (gen_rtx_SET (VOIDmode, e0,
41683 gen_rtx_MULT (mode, x0, e0)));
41684
41685 /* e1 = x0 + x0 */
41686 emit_insn (gen_rtx_SET (VOIDmode, e1,
41687 gen_rtx_PLUS (mode, x0, x0)));
41688
41689 /* x1 = e1 - e0 */
41690 emit_insn (gen_rtx_SET (VOIDmode, x1,
41691 gen_rtx_MINUS (mode, e1, e0)));
41692
41693 /* res = a * x1 */
41694 emit_insn (gen_rtx_SET (VOIDmode, res,
41695 gen_rtx_MULT (mode, a, x1)));
41696 }
41697
41698 /* Output code to perform a Newton-Rhapson approximation of a
41699 single precision floating point [reciprocal] square root. */
41700
41701 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41702 bool recip)
41703 {
41704 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41705 REAL_VALUE_TYPE r;
41706 int unspec;
41707
41708 x0 = gen_reg_rtx (mode);
41709 e0 = gen_reg_rtx (mode);
41710 e1 = gen_reg_rtx (mode);
41711 e2 = gen_reg_rtx (mode);
41712 e3 = gen_reg_rtx (mode);
41713
41714 real_from_integer (&r, VOIDmode, -3, SIGNED);
41715 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41716
41717 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41718 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41719 unspec = UNSPEC_RSQRT;
41720
41721 if (VECTOR_MODE_P (mode))
41722 {
41723 mthree = ix86_build_const_vector (mode, true, mthree);
41724 mhalf = ix86_build_const_vector (mode, true, mhalf);
41725 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41726 if (GET_MODE_SIZE (mode) == 64)
41727 unspec = UNSPEC_RSQRT14;
41728 }
41729
41730 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41731 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41732
41733 a = force_reg (mode, a);
41734
41735 /* x0 = rsqrt(a) estimate */
41736 emit_insn (gen_rtx_SET (VOIDmode, x0,
41737 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41738 unspec)));
41739
41740 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41741 if (!recip)
41742 {
41743 rtx zero, mask;
41744
41745 zero = gen_reg_rtx (mode);
41746 mask = gen_reg_rtx (mode);
41747
41748 zero = force_reg (mode, CONST0_RTX(mode));
41749
41750 /* Handle masked compare. */
41751 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41752 {
41753 mask = gen_reg_rtx (HImode);
41754 /* Imm value 0x4 corresponds to not-equal comparison. */
41755 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41756 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41757 }
41758 else
41759 {
41760 emit_insn (gen_rtx_SET (VOIDmode, mask,
41761 gen_rtx_NE (mode, zero, a)));
41762
41763 emit_insn (gen_rtx_SET (VOIDmode, x0,
41764 gen_rtx_AND (mode, x0, mask)));
41765 }
41766 }
41767
41768 /* e0 = x0 * a */
41769 emit_insn (gen_rtx_SET (VOIDmode, e0,
41770 gen_rtx_MULT (mode, x0, a)));
41771 /* e1 = e0 * x0 */
41772 emit_insn (gen_rtx_SET (VOIDmode, e1,
41773 gen_rtx_MULT (mode, e0, x0)));
41774
41775 /* e2 = e1 - 3. */
41776 mthree = force_reg (mode, mthree);
41777 emit_insn (gen_rtx_SET (VOIDmode, e2,
41778 gen_rtx_PLUS (mode, e1, mthree)));
41779
41780 mhalf = force_reg (mode, mhalf);
41781 if (recip)
41782 /* e3 = -.5 * x0 */
41783 emit_insn (gen_rtx_SET (VOIDmode, e3,
41784 gen_rtx_MULT (mode, x0, mhalf)));
41785 else
41786 /* e3 = -.5 * e0 */
41787 emit_insn (gen_rtx_SET (VOIDmode, e3,
41788 gen_rtx_MULT (mode, e0, mhalf)));
41789 /* ret = e2 * e3 */
41790 emit_insn (gen_rtx_SET (VOIDmode, res,
41791 gen_rtx_MULT (mode, e2, e3)));
41792 }
41793
41794 #ifdef TARGET_SOLARIS
41795 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41796
41797 static void
41798 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41799 tree decl)
41800 {
41801 /* With Binutils 2.15, the "@unwind" marker must be specified on
41802 every occurrence of the ".eh_frame" section, not just the first
41803 one. */
41804 if (TARGET_64BIT
41805 && strcmp (name, ".eh_frame") == 0)
41806 {
41807 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41808 flags & SECTION_WRITE ? "aw" : "a");
41809 return;
41810 }
41811
41812 #ifndef USE_GAS
41813 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41814 {
41815 solaris_elf_asm_comdat_section (name, flags, decl);
41816 return;
41817 }
41818 #endif
41819
41820 default_elf_asm_named_section (name, flags, decl);
41821 }
41822 #endif /* TARGET_SOLARIS */
41823
41824 /* Return the mangling of TYPE if it is an extended fundamental type. */
41825
41826 static const char *
41827 ix86_mangle_type (const_tree type)
41828 {
41829 type = TYPE_MAIN_VARIANT (type);
41830
41831 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41832 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41833 return NULL;
41834
41835 switch (TYPE_MODE (type))
41836 {
41837 case TFmode:
41838 /* __float128 is "g". */
41839 return "g";
41840 case XFmode:
41841 /* "long double" or __float80 is "e". */
41842 return "e";
41843 default:
41844 return NULL;
41845 }
41846 }
41847
41848 /* For 32-bit code we can save PIC register setup by using
41849 __stack_chk_fail_local hidden function instead of calling
41850 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41851 register, so it is better to call __stack_chk_fail directly. */
41852
41853 static tree ATTRIBUTE_UNUSED
41854 ix86_stack_protect_fail (void)
41855 {
41856 return TARGET_64BIT
41857 ? default_external_stack_protect_fail ()
41858 : default_hidden_stack_protect_fail ();
41859 }
41860
41861 /* Select a format to encode pointers in exception handling data. CODE
41862 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41863 true if the symbol may be affected by dynamic relocations.
41864
41865 ??? All x86 object file formats are capable of representing this.
41866 After all, the relocation needed is the same as for the call insn.
41867 Whether or not a particular assembler allows us to enter such, I
41868 guess we'll have to see. */
41869 int
41870 asm_preferred_eh_data_format (int code, int global)
41871 {
41872 if (flag_pic)
41873 {
41874 int type = DW_EH_PE_sdata8;
41875 if (!TARGET_64BIT
41876 || ix86_cmodel == CM_SMALL_PIC
41877 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41878 type = DW_EH_PE_sdata4;
41879 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41880 }
41881 if (ix86_cmodel == CM_SMALL
41882 || (ix86_cmodel == CM_MEDIUM && code))
41883 return DW_EH_PE_udata4;
41884 return DW_EH_PE_absptr;
41885 }
41886 \f
41887 /* Expand copysign from SIGN to the positive value ABS_VALUE
41888 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41889 the sign-bit. */
41890 static void
41891 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41892 {
41893 enum machine_mode mode = GET_MODE (sign);
41894 rtx sgn = gen_reg_rtx (mode);
41895 if (mask == NULL_RTX)
41896 {
41897 enum machine_mode vmode;
41898
41899 if (mode == SFmode)
41900 vmode = V4SFmode;
41901 else if (mode == DFmode)
41902 vmode = V2DFmode;
41903 else
41904 vmode = mode;
41905
41906 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41907 if (!VECTOR_MODE_P (mode))
41908 {
41909 /* We need to generate a scalar mode mask in this case. */
41910 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41911 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41912 mask = gen_reg_rtx (mode);
41913 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41914 }
41915 }
41916 else
41917 mask = gen_rtx_NOT (mode, mask);
41918 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41919 gen_rtx_AND (mode, mask, sign)));
41920 emit_insn (gen_rtx_SET (VOIDmode, result,
41921 gen_rtx_IOR (mode, abs_value, sgn)));
41922 }
41923
41924 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41925 mask for masking out the sign-bit is stored in *SMASK, if that is
41926 non-null. */
41927 static rtx
41928 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41929 {
41930 enum machine_mode vmode, mode = GET_MODE (op0);
41931 rtx xa, mask;
41932
41933 xa = gen_reg_rtx (mode);
41934 if (mode == SFmode)
41935 vmode = V4SFmode;
41936 else if (mode == DFmode)
41937 vmode = V2DFmode;
41938 else
41939 vmode = mode;
41940 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41941 if (!VECTOR_MODE_P (mode))
41942 {
41943 /* We need to generate a scalar mode mask in this case. */
41944 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41945 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41946 mask = gen_reg_rtx (mode);
41947 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41948 }
41949 emit_insn (gen_rtx_SET (VOIDmode, xa,
41950 gen_rtx_AND (mode, op0, mask)));
41951
41952 if (smask)
41953 *smask = mask;
41954
41955 return xa;
41956 }
41957
41958 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41959 swapping the operands if SWAP_OPERANDS is true. The expanded
41960 code is a forward jump to a newly created label in case the
41961 comparison is true. The generated label rtx is returned. */
41962 static rtx_code_label *
41963 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41964 bool swap_operands)
41965 {
41966 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41967 rtx_code_label *label;
41968 rtx tmp;
41969
41970 if (swap_operands)
41971 {
41972 tmp = op0;
41973 op0 = op1;
41974 op1 = tmp;
41975 }
41976
41977 label = gen_label_rtx ();
41978 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41979 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41980 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41981 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41982 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41983 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41984 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41985 JUMP_LABEL (tmp) = label;
41986
41987 return label;
41988 }
41989
41990 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41991 using comparison code CODE. Operands are swapped for the comparison if
41992 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41993 static rtx
41994 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41995 bool swap_operands)
41996 {
41997 rtx (*insn)(rtx, rtx, rtx, rtx);
41998 enum machine_mode mode = GET_MODE (op0);
41999 rtx mask = gen_reg_rtx (mode);
42000
42001 if (swap_operands)
42002 {
42003 rtx tmp = op0;
42004 op0 = op1;
42005 op1 = tmp;
42006 }
42007
42008 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
42009
42010 emit_insn (insn (mask, op0, op1,
42011 gen_rtx_fmt_ee (code, mode, op0, op1)));
42012 return mask;
42013 }
42014
42015 /* Generate and return a rtx of mode MODE for 2**n where n is the number
42016 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
42017 static rtx
42018 ix86_gen_TWO52 (enum machine_mode mode)
42019 {
42020 REAL_VALUE_TYPE TWO52r;
42021 rtx TWO52;
42022
42023 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
42024 TWO52 = const_double_from_real_value (TWO52r, mode);
42025 TWO52 = force_reg (mode, TWO52);
42026
42027 return TWO52;
42028 }
42029
42030 /* Expand SSE sequence for computing lround from OP1 storing
42031 into OP0. */
42032 void
42033 ix86_expand_lround (rtx op0, rtx op1)
42034 {
42035 /* C code for the stuff we're doing below:
42036 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
42037 return (long)tmp;
42038 */
42039 enum machine_mode mode = GET_MODE (op1);
42040 const struct real_format *fmt;
42041 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42042 rtx adj;
42043
42044 /* load nextafter (0.5, 0.0) */
42045 fmt = REAL_MODE_FORMAT (mode);
42046 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42047 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42048
42049 /* adj = copysign (0.5, op1) */
42050 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
42051 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
42052
42053 /* adj = op1 + adj */
42054 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
42055
42056 /* op0 = (imode)adj */
42057 expand_fix (op0, adj, 0);
42058 }
42059
42060 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
42061 into OPERAND0. */
42062 void
42063 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
42064 {
42065 /* C code for the stuff we're doing below (for do_floor):
42066 xi = (long)op1;
42067 xi -= (double)xi > op1 ? 1 : 0;
42068 return xi;
42069 */
42070 enum machine_mode fmode = GET_MODE (op1);
42071 enum machine_mode imode = GET_MODE (op0);
42072 rtx ireg, freg, tmp;
42073 rtx_code_label *label;
42074
42075 /* reg = (long)op1 */
42076 ireg = gen_reg_rtx (imode);
42077 expand_fix (ireg, op1, 0);
42078
42079 /* freg = (double)reg */
42080 freg = gen_reg_rtx (fmode);
42081 expand_float (freg, ireg, 0);
42082
42083 /* ireg = (freg > op1) ? ireg - 1 : ireg */
42084 label = ix86_expand_sse_compare_and_jump (UNLE,
42085 freg, op1, !do_floor);
42086 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
42087 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
42088 emit_move_insn (ireg, tmp);
42089
42090 emit_label (label);
42091 LABEL_NUSES (label) = 1;
42092
42093 emit_move_insn (op0, ireg);
42094 }
42095
42096 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
42097 result in OPERAND0. */
42098 void
42099 ix86_expand_rint (rtx operand0, rtx operand1)
42100 {
42101 /* C code for the stuff we're doing below:
42102 xa = fabs (operand1);
42103 if (!isless (xa, 2**52))
42104 return operand1;
42105 xa = xa + 2**52 - 2**52;
42106 return copysign (xa, operand1);
42107 */
42108 enum machine_mode mode = GET_MODE (operand0);
42109 rtx res, xa, TWO52, mask;
42110 rtx_code_label *label;
42111
42112 res = gen_reg_rtx (mode);
42113 emit_move_insn (res, operand1);
42114
42115 /* xa = abs (operand1) */
42116 xa = ix86_expand_sse_fabs (res, &mask);
42117
42118 /* if (!isless (xa, TWO52)) goto label; */
42119 TWO52 = ix86_gen_TWO52 (mode);
42120 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42121
42122 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42123 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42124
42125 ix86_sse_copysign_to_positive (res, xa, res, mask);
42126
42127 emit_label (label);
42128 LABEL_NUSES (label) = 1;
42129
42130 emit_move_insn (operand0, res);
42131 }
42132
42133 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42134 into OPERAND0. */
42135 void
42136 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
42137 {
42138 /* C code for the stuff we expand below.
42139 double xa = fabs (x), x2;
42140 if (!isless (xa, TWO52))
42141 return x;
42142 xa = xa + TWO52 - TWO52;
42143 x2 = copysign (xa, x);
42144 Compensate. Floor:
42145 if (x2 > x)
42146 x2 -= 1;
42147 Compensate. Ceil:
42148 if (x2 < x)
42149 x2 -= -1;
42150 return x2;
42151 */
42152 enum machine_mode mode = GET_MODE (operand0);
42153 rtx xa, TWO52, tmp, one, res, mask;
42154 rtx_code_label *label;
42155
42156 TWO52 = ix86_gen_TWO52 (mode);
42157
42158 /* Temporary for holding the result, initialized to the input
42159 operand to ease control flow. */
42160 res = gen_reg_rtx (mode);
42161 emit_move_insn (res, operand1);
42162
42163 /* xa = abs (operand1) */
42164 xa = ix86_expand_sse_fabs (res, &mask);
42165
42166 /* if (!isless (xa, TWO52)) goto label; */
42167 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42168
42169 /* xa = xa + TWO52 - TWO52; */
42170 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42171 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
42172
42173 /* xa = copysign (xa, operand1) */
42174 ix86_sse_copysign_to_positive (xa, xa, res, mask);
42175
42176 /* generate 1.0 or -1.0 */
42177 one = force_reg (mode,
42178 const_double_from_real_value (do_floor
42179 ? dconst1 : dconstm1, mode));
42180
42181 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42182 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42183 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42184 gen_rtx_AND (mode, one, tmp)));
42185 /* We always need to subtract here to preserve signed zero. */
42186 tmp = expand_simple_binop (mode, MINUS,
42187 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42188 emit_move_insn (res, tmp);
42189
42190 emit_label (label);
42191 LABEL_NUSES (label) = 1;
42192
42193 emit_move_insn (operand0, res);
42194 }
42195
42196 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
42197 into OPERAND0. */
42198 void
42199 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
42200 {
42201 /* C code for the stuff we expand below.
42202 double xa = fabs (x), x2;
42203 if (!isless (xa, TWO52))
42204 return x;
42205 x2 = (double)(long)x;
42206 Compensate. Floor:
42207 if (x2 > x)
42208 x2 -= 1;
42209 Compensate. Ceil:
42210 if (x2 < x)
42211 x2 += 1;
42212 if (HONOR_SIGNED_ZEROS (mode))
42213 return copysign (x2, x);
42214 return x2;
42215 */
42216 enum machine_mode mode = GET_MODE (operand0);
42217 rtx xa, xi, TWO52, tmp, one, res, mask;
42218 rtx_code_label *label;
42219
42220 TWO52 = ix86_gen_TWO52 (mode);
42221
42222 /* Temporary for holding the result, initialized to the input
42223 operand to ease control flow. */
42224 res = gen_reg_rtx (mode);
42225 emit_move_insn (res, operand1);
42226
42227 /* xa = abs (operand1) */
42228 xa = ix86_expand_sse_fabs (res, &mask);
42229
42230 /* if (!isless (xa, TWO52)) goto label; */
42231 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42232
42233 /* xa = (double)(long)x */
42234 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42235 expand_fix (xi, res, 0);
42236 expand_float (xa, xi, 0);
42237
42238 /* generate 1.0 */
42239 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42240
42241 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
42242 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
42243 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42244 gen_rtx_AND (mode, one, tmp)));
42245 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
42246 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42247 emit_move_insn (res, tmp);
42248
42249 if (HONOR_SIGNED_ZEROS (mode))
42250 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42251
42252 emit_label (label);
42253 LABEL_NUSES (label) = 1;
42254
42255 emit_move_insn (operand0, res);
42256 }
42257
42258 /* Expand SSE sequence for computing round from OPERAND1 storing
42259 into OPERAND0. Sequence that works without relying on DImode truncation
42260 via cvttsd2siq that is only available on 64bit targets. */
42261 void
42262 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
42263 {
42264 /* C code for the stuff we expand below.
42265 double xa = fabs (x), xa2, x2;
42266 if (!isless (xa, TWO52))
42267 return x;
42268 Using the absolute value and copying back sign makes
42269 -0.0 -> -0.0 correct.
42270 xa2 = xa + TWO52 - TWO52;
42271 Compensate.
42272 dxa = xa2 - xa;
42273 if (dxa <= -0.5)
42274 xa2 += 1;
42275 else if (dxa > 0.5)
42276 xa2 -= 1;
42277 x2 = copysign (xa2, x);
42278 return x2;
42279 */
42280 enum machine_mode mode = GET_MODE (operand0);
42281 rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
42282 rtx_code_label *label;
42283
42284 TWO52 = ix86_gen_TWO52 (mode);
42285
42286 /* Temporary for holding the result, initialized to the input
42287 operand to ease control flow. */
42288 res = gen_reg_rtx (mode);
42289 emit_move_insn (res, operand1);
42290
42291 /* xa = abs (operand1) */
42292 xa = ix86_expand_sse_fabs (res, &mask);
42293
42294 /* if (!isless (xa, TWO52)) goto label; */
42295 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42296
42297 /* xa2 = xa + TWO52 - TWO52; */
42298 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42299 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
42300
42301 /* dxa = xa2 - xa; */
42302 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
42303
42304 /* generate 0.5, 1.0 and -0.5 */
42305 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
42306 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
42307 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
42308 0, OPTAB_DIRECT);
42309
42310 /* Compensate. */
42311 tmp = gen_reg_rtx (mode);
42312 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
42313 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
42314 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42315 gen_rtx_AND (mode, one, tmp)));
42316 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42317 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42318 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42319 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42320 gen_rtx_AND (mode, one, tmp)));
42321 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42322
42323 /* res = copysign (xa2, operand1) */
42324 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42325
42326 emit_label (label);
42327 LABEL_NUSES (label) = 1;
42328
42329 emit_move_insn (operand0, res);
42330 }
42331
42332 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42333 into OPERAND0. */
42334 void
42335 ix86_expand_trunc (rtx operand0, rtx operand1)
42336 {
42337 /* C code for SSE variant we expand below.
42338 double xa = fabs (x), x2;
42339 if (!isless (xa, TWO52))
42340 return x;
42341 x2 = (double)(long)x;
42342 if (HONOR_SIGNED_ZEROS (mode))
42343 return copysign (x2, x);
42344 return x2;
42345 */
42346 enum machine_mode mode = GET_MODE (operand0);
42347 rtx xa, xi, TWO52, res, mask;
42348 rtx_code_label *label;
42349
42350 TWO52 = ix86_gen_TWO52 (mode);
42351
42352 /* Temporary for holding the result, initialized to the input
42353 operand to ease control flow. */
42354 res = gen_reg_rtx (mode);
42355 emit_move_insn (res, operand1);
42356
42357 /* xa = abs (operand1) */
42358 xa = ix86_expand_sse_fabs (res, &mask);
42359
42360 /* if (!isless (xa, TWO52)) goto label; */
42361 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42362
42363 /* x = (double)(long)x */
42364 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42365 expand_fix (xi, res, 0);
42366 expand_float (res, xi, 0);
42367
42368 if (HONOR_SIGNED_ZEROS (mode))
42369 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42370
42371 emit_label (label);
42372 LABEL_NUSES (label) = 1;
42373
42374 emit_move_insn (operand0, res);
42375 }
42376
42377 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42378 into OPERAND0. */
42379 void
42380 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42381 {
42382 enum machine_mode mode = GET_MODE (operand0);
42383 rtx xa, mask, TWO52, one, res, smask, tmp;
42384 rtx_code_label *label;
42385
42386 /* C code for SSE variant we expand below.
42387 double xa = fabs (x), x2;
42388 if (!isless (xa, TWO52))
42389 return x;
42390 xa2 = xa + TWO52 - TWO52;
42391 Compensate:
42392 if (xa2 > xa)
42393 xa2 -= 1.0;
42394 x2 = copysign (xa2, x);
42395 return x2;
42396 */
42397
42398 TWO52 = ix86_gen_TWO52 (mode);
42399
42400 /* Temporary for holding the result, initialized to the input
42401 operand to ease control flow. */
42402 res = gen_reg_rtx (mode);
42403 emit_move_insn (res, operand1);
42404
42405 /* xa = abs (operand1) */
42406 xa = ix86_expand_sse_fabs (res, &smask);
42407
42408 /* if (!isless (xa, TWO52)) goto label; */
42409 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42410
42411 /* res = xa + TWO52 - TWO52; */
42412 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42413 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42414 emit_move_insn (res, tmp);
42415
42416 /* generate 1.0 */
42417 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42418
42419 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42420 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42421 emit_insn (gen_rtx_SET (VOIDmode, mask,
42422 gen_rtx_AND (mode, mask, one)));
42423 tmp = expand_simple_binop (mode, MINUS,
42424 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42425 emit_move_insn (res, tmp);
42426
42427 /* res = copysign (res, operand1) */
42428 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42429
42430 emit_label (label);
42431 LABEL_NUSES (label) = 1;
42432
42433 emit_move_insn (operand0, res);
42434 }
42435
42436 /* Expand SSE sequence for computing round from OPERAND1 storing
42437 into OPERAND0. */
42438 void
42439 ix86_expand_round (rtx operand0, rtx operand1)
42440 {
42441 /* C code for the stuff we're doing below:
42442 double xa = fabs (x);
42443 if (!isless (xa, TWO52))
42444 return x;
42445 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42446 return copysign (xa, x);
42447 */
42448 enum machine_mode mode = GET_MODE (operand0);
42449 rtx res, TWO52, xa, xi, half, mask;
42450 rtx_code_label *label;
42451 const struct real_format *fmt;
42452 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42453
42454 /* Temporary for holding the result, initialized to the input
42455 operand to ease control flow. */
42456 res = gen_reg_rtx (mode);
42457 emit_move_insn (res, operand1);
42458
42459 TWO52 = ix86_gen_TWO52 (mode);
42460 xa = ix86_expand_sse_fabs (res, &mask);
42461 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42462
42463 /* load nextafter (0.5, 0.0) */
42464 fmt = REAL_MODE_FORMAT (mode);
42465 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42466 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42467
42468 /* xa = xa + 0.5 */
42469 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42470 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42471
42472 /* xa = (double)(int64_t)xa */
42473 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42474 expand_fix (xi, xa, 0);
42475 expand_float (xa, xi, 0);
42476
42477 /* res = copysign (xa, operand1) */
42478 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42479
42480 emit_label (label);
42481 LABEL_NUSES (label) = 1;
42482
42483 emit_move_insn (operand0, res);
42484 }
42485
42486 /* Expand SSE sequence for computing round
42487 from OP1 storing into OP0 using sse4 round insn. */
42488 void
42489 ix86_expand_round_sse4 (rtx op0, rtx op1)
42490 {
42491 enum machine_mode mode = GET_MODE (op0);
42492 rtx e1, e2, res, half;
42493 const struct real_format *fmt;
42494 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42495 rtx (*gen_copysign) (rtx, rtx, rtx);
42496 rtx (*gen_round) (rtx, rtx, rtx);
42497
42498 switch (mode)
42499 {
42500 case SFmode:
42501 gen_copysign = gen_copysignsf3;
42502 gen_round = gen_sse4_1_roundsf2;
42503 break;
42504 case DFmode:
42505 gen_copysign = gen_copysigndf3;
42506 gen_round = gen_sse4_1_rounddf2;
42507 break;
42508 default:
42509 gcc_unreachable ();
42510 }
42511
42512 /* round (a) = trunc (a + copysign (0.5, a)) */
42513
42514 /* load nextafter (0.5, 0.0) */
42515 fmt = REAL_MODE_FORMAT (mode);
42516 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42517 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42518 half = const_double_from_real_value (pred_half, mode);
42519
42520 /* e1 = copysign (0.5, op1) */
42521 e1 = gen_reg_rtx (mode);
42522 emit_insn (gen_copysign (e1, half, op1));
42523
42524 /* e2 = op1 + e1 */
42525 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42526
42527 /* res = trunc (e2) */
42528 res = gen_reg_rtx (mode);
42529 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42530
42531 emit_move_insn (op0, res);
42532 }
42533 \f
42534
42535 /* Table of valid machine attributes. */
42536 static const struct attribute_spec ix86_attribute_table[] =
42537 {
42538 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42539 affects_type_identity } */
42540 /* Stdcall attribute says callee is responsible for popping arguments
42541 if they are not variable. */
42542 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42543 true },
42544 /* Fastcall attribute says callee is responsible for popping arguments
42545 if they are not variable. */
42546 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42547 true },
42548 /* Thiscall attribute says callee is responsible for popping arguments
42549 if they are not variable. */
42550 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42551 true },
42552 /* Cdecl attribute says the callee is a normal C declaration */
42553 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42554 true },
42555 /* Regparm attribute specifies how many integer arguments are to be
42556 passed in registers. */
42557 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42558 true },
42559 /* Sseregparm attribute says we are using x86_64 calling conventions
42560 for FP arguments. */
42561 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42562 true },
42563 /* The transactional memory builtins are implicitly regparm or fastcall
42564 depending on the ABI. Override the generic do-nothing attribute that
42565 these builtins were declared with. */
42566 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42567 true },
42568 /* force_align_arg_pointer says this function realigns the stack at entry. */
42569 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42570 false, true, true, ix86_handle_cconv_attribute, false },
42571 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42572 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42573 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42574 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42575 false },
42576 #endif
42577 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42578 false },
42579 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42580 false },
42581 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42582 SUBTARGET_ATTRIBUTE_TABLE,
42583 #endif
42584 /* ms_abi and sysv_abi calling convention function attributes. */
42585 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42586 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42587 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42588 false },
42589 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42590 ix86_handle_callee_pop_aggregate_return, true },
42591 /* End element. */
42592 { NULL, 0, 0, false, false, false, NULL, false }
42593 };
42594
42595 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42596 static int
42597 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42598 tree vectype, int)
42599 {
42600 unsigned elements;
42601
42602 switch (type_of_cost)
42603 {
42604 case scalar_stmt:
42605 return ix86_cost->scalar_stmt_cost;
42606
42607 case scalar_load:
42608 return ix86_cost->scalar_load_cost;
42609
42610 case scalar_store:
42611 return ix86_cost->scalar_store_cost;
42612
42613 case vector_stmt:
42614 return ix86_cost->vec_stmt_cost;
42615
42616 case vector_load:
42617 return ix86_cost->vec_align_load_cost;
42618
42619 case vector_store:
42620 return ix86_cost->vec_store_cost;
42621
42622 case vec_to_scalar:
42623 return ix86_cost->vec_to_scalar_cost;
42624
42625 case scalar_to_vec:
42626 return ix86_cost->scalar_to_vec_cost;
42627
42628 case unaligned_load:
42629 case unaligned_store:
42630 return ix86_cost->vec_unalign_load_cost;
42631
42632 case cond_branch_taken:
42633 return ix86_cost->cond_taken_branch_cost;
42634
42635 case cond_branch_not_taken:
42636 return ix86_cost->cond_not_taken_branch_cost;
42637
42638 case vec_perm:
42639 case vec_promote_demote:
42640 return ix86_cost->vec_stmt_cost;
42641
42642 case vec_construct:
42643 elements = TYPE_VECTOR_SUBPARTS (vectype);
42644 return elements / 2 + 1;
42645
42646 default:
42647 gcc_unreachable ();
42648 }
42649 }
42650
42651 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42652 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42653 insn every time. */
42654
42655 static GTY(()) rtx_insn *vselect_insn;
42656
42657 /* Initialize vselect_insn. */
42658
42659 static void
42660 init_vselect_insn (void)
42661 {
42662 unsigned i;
42663 rtx x;
42664
42665 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42666 for (i = 0; i < MAX_VECT_LEN; ++i)
42667 XVECEXP (x, 0, i) = const0_rtx;
42668 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42669 const0_rtx), x);
42670 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42671 start_sequence ();
42672 vselect_insn = emit_insn (x);
42673 end_sequence ();
42674 }
42675
42676 /* Construct (set target (vec_select op0 (parallel perm))) and
42677 return true if that's a valid instruction in the active ISA. */
42678
42679 static bool
42680 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42681 unsigned nelt, bool testing_p)
42682 {
42683 unsigned int i;
42684 rtx x, save_vconcat;
42685 int icode;
42686
42687 if (vselect_insn == NULL_RTX)
42688 init_vselect_insn ();
42689
42690 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42691 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42692 for (i = 0; i < nelt; ++i)
42693 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42694 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42695 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42696 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42697 SET_DEST (PATTERN (vselect_insn)) = target;
42698 icode = recog_memoized (vselect_insn);
42699
42700 if (icode >= 0 && !testing_p)
42701 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42702
42703 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42704 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42705 INSN_CODE (vselect_insn) = -1;
42706
42707 return icode >= 0;
42708 }
42709
42710 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42711
42712 static bool
42713 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42714 const unsigned char *perm, unsigned nelt,
42715 bool testing_p)
42716 {
42717 enum machine_mode v2mode;
42718 rtx x;
42719 bool ok;
42720
42721 if (vselect_insn == NULL_RTX)
42722 init_vselect_insn ();
42723
42724 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42725 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42726 PUT_MODE (x, v2mode);
42727 XEXP (x, 0) = op0;
42728 XEXP (x, 1) = op1;
42729 ok = expand_vselect (target, x, perm, nelt, testing_p);
42730 XEXP (x, 0) = const0_rtx;
42731 XEXP (x, 1) = const0_rtx;
42732 return ok;
42733 }
42734
42735 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42736 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42737
42738 static bool
42739 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42740 {
42741 enum machine_mode vmode = d->vmode;
42742 unsigned i, mask, nelt = d->nelt;
42743 rtx target, op0, op1, x;
42744 rtx rperm[32], vperm;
42745
42746 if (d->one_operand_p)
42747 return false;
42748 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42749 ;
42750 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42751 ;
42752 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42753 ;
42754 else
42755 return false;
42756
42757 /* This is a blend, not a permute. Elements must stay in their
42758 respective lanes. */
42759 for (i = 0; i < nelt; ++i)
42760 {
42761 unsigned e = d->perm[i];
42762 if (!(e == i || e == i + nelt))
42763 return false;
42764 }
42765
42766 if (d->testing_p)
42767 return true;
42768
42769 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42770 decision should be extracted elsewhere, so that we only try that
42771 sequence once all budget==3 options have been tried. */
42772 target = d->target;
42773 op0 = d->op0;
42774 op1 = d->op1;
42775 mask = 0;
42776
42777 switch (vmode)
42778 {
42779 case V4DFmode:
42780 case V8SFmode:
42781 case V2DFmode:
42782 case V4SFmode:
42783 case V8HImode:
42784 case V8SImode:
42785 for (i = 0; i < nelt; ++i)
42786 mask |= (d->perm[i] >= nelt) << i;
42787 break;
42788
42789 case V2DImode:
42790 for (i = 0; i < 2; ++i)
42791 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42792 vmode = V8HImode;
42793 goto do_subreg;
42794
42795 case V4SImode:
42796 for (i = 0; i < 4; ++i)
42797 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42798 vmode = V8HImode;
42799 goto do_subreg;
42800
42801 case V16QImode:
42802 /* See if bytes move in pairs so we can use pblendw with
42803 an immediate argument, rather than pblendvb with a vector
42804 argument. */
42805 for (i = 0; i < 16; i += 2)
42806 if (d->perm[i] + 1 != d->perm[i + 1])
42807 {
42808 use_pblendvb:
42809 for (i = 0; i < nelt; ++i)
42810 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42811
42812 finish_pblendvb:
42813 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42814 vperm = force_reg (vmode, vperm);
42815
42816 if (GET_MODE_SIZE (vmode) == 16)
42817 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42818 else
42819 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42820 if (target != d->target)
42821 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42822 return true;
42823 }
42824
42825 for (i = 0; i < 8; ++i)
42826 mask |= (d->perm[i * 2] >= 16) << i;
42827 vmode = V8HImode;
42828 /* FALLTHRU */
42829
42830 do_subreg:
42831 target = gen_reg_rtx (vmode);
42832 op0 = gen_lowpart (vmode, op0);
42833 op1 = gen_lowpart (vmode, op1);
42834 break;
42835
42836 case V32QImode:
42837 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42838 for (i = 0; i < 32; i += 2)
42839 if (d->perm[i] + 1 != d->perm[i + 1])
42840 goto use_pblendvb;
42841 /* See if bytes move in quadruplets. If yes, vpblendd
42842 with immediate can be used. */
42843 for (i = 0; i < 32; i += 4)
42844 if (d->perm[i] + 2 != d->perm[i + 2])
42845 break;
42846 if (i < 32)
42847 {
42848 /* See if bytes move the same in both lanes. If yes,
42849 vpblendw with immediate can be used. */
42850 for (i = 0; i < 16; i += 2)
42851 if (d->perm[i] + 16 != d->perm[i + 16])
42852 goto use_pblendvb;
42853
42854 /* Use vpblendw. */
42855 for (i = 0; i < 16; ++i)
42856 mask |= (d->perm[i * 2] >= 32) << i;
42857 vmode = V16HImode;
42858 goto do_subreg;
42859 }
42860
42861 /* Use vpblendd. */
42862 for (i = 0; i < 8; ++i)
42863 mask |= (d->perm[i * 4] >= 32) << i;
42864 vmode = V8SImode;
42865 goto do_subreg;
42866
42867 case V16HImode:
42868 /* See if words move in pairs. If yes, vpblendd can be used. */
42869 for (i = 0; i < 16; i += 2)
42870 if (d->perm[i] + 1 != d->perm[i + 1])
42871 break;
42872 if (i < 16)
42873 {
42874 /* See if words move the same in both lanes. If not,
42875 vpblendvb must be used. */
42876 for (i = 0; i < 8; i++)
42877 if (d->perm[i] + 8 != d->perm[i + 8])
42878 {
42879 /* Use vpblendvb. */
42880 for (i = 0; i < 32; ++i)
42881 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42882
42883 vmode = V32QImode;
42884 nelt = 32;
42885 target = gen_reg_rtx (vmode);
42886 op0 = gen_lowpart (vmode, op0);
42887 op1 = gen_lowpart (vmode, op1);
42888 goto finish_pblendvb;
42889 }
42890
42891 /* Use vpblendw. */
42892 for (i = 0; i < 16; ++i)
42893 mask |= (d->perm[i] >= 16) << i;
42894 break;
42895 }
42896
42897 /* Use vpblendd. */
42898 for (i = 0; i < 8; ++i)
42899 mask |= (d->perm[i * 2] >= 16) << i;
42900 vmode = V8SImode;
42901 goto do_subreg;
42902
42903 case V4DImode:
42904 /* Use vpblendd. */
42905 for (i = 0; i < 4; ++i)
42906 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42907 vmode = V8SImode;
42908 goto do_subreg;
42909
42910 default:
42911 gcc_unreachable ();
42912 }
42913
42914 /* This matches five different patterns with the different modes. */
42915 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42916 x = gen_rtx_SET (VOIDmode, target, x);
42917 emit_insn (x);
42918 if (target != d->target)
42919 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42920
42921 return true;
42922 }
42923
42924 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42925 in terms of the variable form of vpermilps.
42926
42927 Note that we will have already failed the immediate input vpermilps,
42928 which requires that the high and low part shuffle be identical; the
42929 variable form doesn't require that. */
42930
42931 static bool
42932 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42933 {
42934 rtx rperm[8], vperm;
42935 unsigned i;
42936
42937 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42938 return false;
42939
42940 /* We can only permute within the 128-bit lane. */
42941 for (i = 0; i < 8; ++i)
42942 {
42943 unsigned e = d->perm[i];
42944 if (i < 4 ? e >= 4 : e < 4)
42945 return false;
42946 }
42947
42948 if (d->testing_p)
42949 return true;
42950
42951 for (i = 0; i < 8; ++i)
42952 {
42953 unsigned e = d->perm[i];
42954
42955 /* Within each 128-bit lane, the elements of op0 are numbered
42956 from 0 and the elements of op1 are numbered from 4. */
42957 if (e >= 8 + 4)
42958 e -= 8;
42959 else if (e >= 4)
42960 e -= 4;
42961
42962 rperm[i] = GEN_INT (e);
42963 }
42964
42965 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42966 vperm = force_reg (V8SImode, vperm);
42967 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42968
42969 return true;
42970 }
42971
42972 /* Return true if permutation D can be performed as VMODE permutation
42973 instead. */
42974
42975 static bool
42976 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42977 {
42978 unsigned int i, j, chunk;
42979
42980 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42981 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42982 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42983 return false;
42984
42985 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42986 return true;
42987
42988 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42989 for (i = 0; i < d->nelt; i += chunk)
42990 if (d->perm[i] & (chunk - 1))
42991 return false;
42992 else
42993 for (j = 1; j < chunk; ++j)
42994 if (d->perm[i] + j != d->perm[i + j])
42995 return false;
42996
42997 return true;
42998 }
42999
43000 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43001 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
43002
43003 static bool
43004 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
43005 {
43006 unsigned i, nelt, eltsz, mask;
43007 unsigned char perm[32];
43008 enum machine_mode vmode = V16QImode;
43009 rtx rperm[32], vperm, target, op0, op1;
43010
43011 nelt = d->nelt;
43012
43013 if (!d->one_operand_p)
43014 {
43015 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
43016 {
43017 if (TARGET_AVX2
43018 && valid_perm_using_mode_p (V2TImode, d))
43019 {
43020 if (d->testing_p)
43021 return true;
43022
43023 /* Use vperm2i128 insn. The pattern uses
43024 V4DImode instead of V2TImode. */
43025 target = d->target;
43026 if (d->vmode != V4DImode)
43027 target = gen_reg_rtx (V4DImode);
43028 op0 = gen_lowpart (V4DImode, d->op0);
43029 op1 = gen_lowpart (V4DImode, d->op1);
43030 rperm[0]
43031 = GEN_INT ((d->perm[0] / (nelt / 2))
43032 | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
43033 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
43034 if (target != d->target)
43035 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43036 return true;
43037 }
43038 return false;
43039 }
43040 }
43041 else
43042 {
43043 if (GET_MODE_SIZE (d->vmode) == 16)
43044 {
43045 if (!TARGET_SSSE3)
43046 return false;
43047 }
43048 else if (GET_MODE_SIZE (d->vmode) == 32)
43049 {
43050 if (!TARGET_AVX2)
43051 return false;
43052
43053 /* V4DImode should be already handled through
43054 expand_vselect by vpermq instruction. */
43055 gcc_assert (d->vmode != V4DImode);
43056
43057 vmode = V32QImode;
43058 if (d->vmode == V8SImode
43059 || d->vmode == V16HImode
43060 || d->vmode == V32QImode)
43061 {
43062 /* First see if vpermq can be used for
43063 V8SImode/V16HImode/V32QImode. */
43064 if (valid_perm_using_mode_p (V4DImode, d))
43065 {
43066 for (i = 0; i < 4; i++)
43067 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
43068 if (d->testing_p)
43069 return true;
43070 target = gen_reg_rtx (V4DImode);
43071 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
43072 perm, 4, false))
43073 {
43074 emit_move_insn (d->target,
43075 gen_lowpart (d->vmode, target));
43076 return true;
43077 }
43078 return false;
43079 }
43080
43081 /* Next see if vpermd can be used. */
43082 if (valid_perm_using_mode_p (V8SImode, d))
43083 vmode = V8SImode;
43084 }
43085 /* Or if vpermps can be used. */
43086 else if (d->vmode == V8SFmode)
43087 vmode = V8SImode;
43088
43089 if (vmode == V32QImode)
43090 {
43091 /* vpshufb only works intra lanes, it is not
43092 possible to shuffle bytes in between the lanes. */
43093 for (i = 0; i < nelt; ++i)
43094 if ((d->perm[i] ^ i) & (nelt / 2))
43095 return false;
43096 }
43097 }
43098 else
43099 return false;
43100 }
43101
43102 if (d->testing_p)
43103 return true;
43104
43105 if (vmode == V8SImode)
43106 for (i = 0; i < 8; ++i)
43107 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
43108 else
43109 {
43110 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43111 if (!d->one_operand_p)
43112 mask = 2 * nelt - 1;
43113 else if (vmode == V16QImode)
43114 mask = nelt - 1;
43115 else
43116 mask = nelt / 2 - 1;
43117
43118 for (i = 0; i < nelt; ++i)
43119 {
43120 unsigned j, e = d->perm[i] & mask;
43121 for (j = 0; j < eltsz; ++j)
43122 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
43123 }
43124 }
43125
43126 vperm = gen_rtx_CONST_VECTOR (vmode,
43127 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
43128 vperm = force_reg (vmode, vperm);
43129
43130 target = d->target;
43131 if (d->vmode != vmode)
43132 target = gen_reg_rtx (vmode);
43133 op0 = gen_lowpart (vmode, d->op0);
43134 if (d->one_operand_p)
43135 {
43136 if (vmode == V16QImode)
43137 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
43138 else if (vmode == V32QImode)
43139 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
43140 else if (vmode == V8SFmode)
43141 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
43142 else
43143 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
43144 }
43145 else
43146 {
43147 op1 = gen_lowpart (vmode, d->op1);
43148 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
43149 }
43150 if (target != d->target)
43151 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
43152
43153 return true;
43154 }
43155
43156 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
43157 in a single instruction. */
43158
43159 static bool
43160 expand_vec_perm_1 (struct expand_vec_perm_d *d)
43161 {
43162 unsigned i, nelt = d->nelt;
43163 unsigned char perm2[MAX_VECT_LEN];
43164
43165 /* Check plain VEC_SELECT first, because AVX has instructions that could
43166 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
43167 input where SEL+CONCAT may not. */
43168 if (d->one_operand_p)
43169 {
43170 int mask = nelt - 1;
43171 bool identity_perm = true;
43172 bool broadcast_perm = true;
43173
43174 for (i = 0; i < nelt; i++)
43175 {
43176 perm2[i] = d->perm[i] & mask;
43177 if (perm2[i] != i)
43178 identity_perm = false;
43179 if (perm2[i])
43180 broadcast_perm = false;
43181 }
43182
43183 if (identity_perm)
43184 {
43185 if (!d->testing_p)
43186 emit_move_insn (d->target, d->op0);
43187 return true;
43188 }
43189 else if (broadcast_perm && TARGET_AVX2)
43190 {
43191 /* Use vpbroadcast{b,w,d}. */
43192 rtx (*gen) (rtx, rtx) = NULL;
43193 switch (d->vmode)
43194 {
43195 case V32QImode:
43196 gen = gen_avx2_pbroadcastv32qi_1;
43197 break;
43198 case V16HImode:
43199 gen = gen_avx2_pbroadcastv16hi_1;
43200 break;
43201 case V8SImode:
43202 gen = gen_avx2_pbroadcastv8si_1;
43203 break;
43204 case V16QImode:
43205 gen = gen_avx2_pbroadcastv16qi;
43206 break;
43207 case V8HImode:
43208 gen = gen_avx2_pbroadcastv8hi;
43209 break;
43210 case V8SFmode:
43211 gen = gen_avx2_vec_dupv8sf_1;
43212 break;
43213 /* For other modes prefer other shuffles this function creates. */
43214 default: break;
43215 }
43216 if (gen != NULL)
43217 {
43218 if (!d->testing_p)
43219 emit_insn (gen (d->target, d->op0));
43220 return true;
43221 }
43222 }
43223
43224 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
43225 return true;
43226
43227 /* There are plenty of patterns in sse.md that are written for
43228 SEL+CONCAT and are not replicated for a single op. Perhaps
43229 that should be changed, to avoid the nastiness here. */
43230
43231 /* Recognize interleave style patterns, which means incrementing
43232 every other permutation operand. */
43233 for (i = 0; i < nelt; i += 2)
43234 {
43235 perm2[i] = d->perm[i] & mask;
43236 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
43237 }
43238 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43239 d->testing_p))
43240 return true;
43241
43242 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
43243 if (nelt >= 4)
43244 {
43245 for (i = 0; i < nelt; i += 4)
43246 {
43247 perm2[i + 0] = d->perm[i + 0] & mask;
43248 perm2[i + 1] = d->perm[i + 1] & mask;
43249 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
43250 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
43251 }
43252
43253 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
43254 d->testing_p))
43255 return true;
43256 }
43257 }
43258
43259 /* Finally, try the fully general two operand permute. */
43260 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
43261 d->testing_p))
43262 return true;
43263
43264 /* Recognize interleave style patterns with reversed operands. */
43265 if (!d->one_operand_p)
43266 {
43267 for (i = 0; i < nelt; ++i)
43268 {
43269 unsigned e = d->perm[i];
43270 if (e >= nelt)
43271 e -= nelt;
43272 else
43273 e += nelt;
43274 perm2[i] = e;
43275 }
43276
43277 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
43278 d->testing_p))
43279 return true;
43280 }
43281
43282 /* Try the SSE4.1 blend variable merge instructions. */
43283 if (expand_vec_perm_blend (d))
43284 return true;
43285
43286 /* Try one of the AVX vpermil variable permutations. */
43287 if (expand_vec_perm_vpermil (d))
43288 return true;
43289
43290 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
43291 vpshufb, vpermd, vpermps or vpermq variable permutation. */
43292 if (expand_vec_perm_pshufb (d))
43293 return true;
43294
43295 /* Try the AVX2 vpalignr instruction. */
43296 if (expand_vec_perm_palignr (d, true))
43297 return true;
43298
43299 /* Try the AVX512F vpermi2 instructions. */
43300 if (TARGET_AVX512F)
43301 {
43302 rtx vec[64];
43303 enum machine_mode mode = d->vmode;
43304 if (mode == V8DFmode)
43305 mode = V8DImode;
43306 else if (mode == V16SFmode)
43307 mode = V16SImode;
43308 for (i = 0; i < nelt; ++i)
43309 vec[i] = GEN_INT (d->perm[i]);
43310 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
43311 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
43312 return true;
43313 }
43314
43315 return false;
43316 }
43317
43318 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
43319 in terms of a pair of pshuflw + pshufhw instructions. */
43320
43321 static bool
43322 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
43323 {
43324 unsigned char perm2[MAX_VECT_LEN];
43325 unsigned i;
43326 bool ok;
43327
43328 if (d->vmode != V8HImode || !d->one_operand_p)
43329 return false;
43330
43331 /* The two permutations only operate in 64-bit lanes. */
43332 for (i = 0; i < 4; ++i)
43333 if (d->perm[i] >= 4)
43334 return false;
43335 for (i = 4; i < 8; ++i)
43336 if (d->perm[i] < 4)
43337 return false;
43338
43339 if (d->testing_p)
43340 return true;
43341
43342 /* Emit the pshuflw. */
43343 memcpy (perm2, d->perm, 4);
43344 for (i = 4; i < 8; ++i)
43345 perm2[i] = i;
43346 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43347 gcc_assert (ok);
43348
43349 /* Emit the pshufhw. */
43350 memcpy (perm2 + 4, d->perm + 4, 4);
43351 for (i = 0; i < 4; ++i)
43352 perm2[i] = i;
43353 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43354 gcc_assert (ok);
43355
43356 return true;
43357 }
43358
43359 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43360 the permutation using the SSSE3 palignr instruction. This succeeds
43361 when all of the elements in PERM fit within one vector and we merely
43362 need to shift them down so that a single vector permutation has a
43363 chance to succeed. If SINGLE_INSN_ONLY_P, succeed if only
43364 the vpalignr instruction itself can perform the requested permutation. */
43365
43366 static bool
43367 expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
43368 {
43369 unsigned i, nelt = d->nelt;
43370 unsigned min, max, minswap, maxswap;
43371 bool in_order, ok, swap = false;
43372 rtx shift, target;
43373 struct expand_vec_perm_d dcopy;
43374
43375 /* Even with AVX, palignr only operates on 128-bit vectors,
43376 in AVX2 palignr operates on both 128-bit lanes. */
43377 if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43378 && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
43379 return false;
43380
43381 min = 2 * nelt;
43382 max = 0;
43383 minswap = 2 * nelt;
43384 maxswap = 0;
43385 for (i = 0; i < nelt; ++i)
43386 {
43387 unsigned e = d->perm[i];
43388 unsigned eswap = d->perm[i] ^ nelt;
43389 if (GET_MODE_SIZE (d->vmode) == 32)
43390 {
43391 e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
43392 eswap = e ^ (nelt / 2);
43393 }
43394 if (e < min)
43395 min = e;
43396 if (e > max)
43397 max = e;
43398 if (eswap < minswap)
43399 minswap = eswap;
43400 if (eswap > maxswap)
43401 maxswap = eswap;
43402 }
43403 if (min == 0
43404 || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
43405 {
43406 if (d->one_operand_p
43407 || minswap == 0
43408 || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
43409 ? nelt / 2 : nelt))
43410 return false;
43411 swap = true;
43412 min = minswap;
43413 max = maxswap;
43414 }
43415
43416 /* Given that we have SSSE3, we know we'll be able to implement the
43417 single operand permutation after the palignr with pshufb for
43418 128-bit vectors. If SINGLE_INSN_ONLY_P, in_order has to be computed
43419 first. */
43420 if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
43421 return true;
43422
43423 dcopy = *d;
43424 if (swap)
43425 {
43426 dcopy.op0 = d->op1;
43427 dcopy.op1 = d->op0;
43428 for (i = 0; i < nelt; ++i)
43429 dcopy.perm[i] ^= nelt;
43430 }
43431
43432 in_order = true;
43433 for (i = 0; i < nelt; ++i)
43434 {
43435 unsigned e = dcopy.perm[i];
43436 if (GET_MODE_SIZE (d->vmode) == 32
43437 && e >= nelt
43438 && (e & (nelt / 2 - 1)) < min)
43439 e = e - min - (nelt / 2);
43440 else
43441 e = e - min;
43442 if (e != i)
43443 in_order = false;
43444 dcopy.perm[i] = e;
43445 }
43446 dcopy.one_operand_p = true;
43447
43448 if (single_insn_only_p && !in_order)
43449 return false;
43450
43451 /* For AVX2, test whether we can permute the result in one instruction. */
43452 if (d->testing_p)
43453 {
43454 if (in_order)
43455 return true;
43456 dcopy.op1 = dcopy.op0;
43457 return expand_vec_perm_1 (&dcopy);
43458 }
43459
43460 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43461 if (GET_MODE_SIZE (d->vmode) == 16)
43462 {
43463 target = gen_reg_rtx (TImode);
43464 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
43465 gen_lowpart (TImode, dcopy.op0), shift));
43466 }
43467 else
43468 {
43469 target = gen_reg_rtx (V2TImode);
43470 emit_insn (gen_avx2_palignrv2ti (target,
43471 gen_lowpart (V2TImode, dcopy.op1),
43472 gen_lowpart (V2TImode, dcopy.op0),
43473 shift));
43474 }
43475
43476 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43477
43478 /* Test for the degenerate case where the alignment by itself
43479 produces the desired permutation. */
43480 if (in_order)
43481 {
43482 emit_move_insn (d->target, dcopy.op0);
43483 return true;
43484 }
43485
43486 ok = expand_vec_perm_1 (&dcopy);
43487 gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
43488
43489 return ok;
43490 }
43491
43492 /* A subroutine of ix86_expand_vec_perm_const_1. Try to simplify
43493 the permutation using the SSE4_1 pblendv instruction. Potentially
43494 reduces permutation from 2 pshufb and or to 1 pshufb and pblendv. */
43495
43496 static bool
43497 expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
43498 {
43499 unsigned i, which, nelt = d->nelt;
43500 struct expand_vec_perm_d dcopy, dcopy1;
43501 enum machine_mode vmode = d->vmode;
43502 bool ok;
43503
43504 /* Use the same checks as in expand_vec_perm_blend. */
43505 if (d->one_operand_p)
43506 return false;
43507 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
43508 ;
43509 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
43510 ;
43511 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
43512 ;
43513 else
43514 return false;
43515
43516 /* Figure out where permutation elements stay not in their
43517 respective lanes. */
43518 for (i = 0, which = 0; i < nelt; ++i)
43519 {
43520 unsigned e = d->perm[i];
43521 if (e != i)
43522 which |= (e < nelt ? 1 : 2);
43523 }
43524 /* We can pblend the part where elements stay not in their
43525 respective lanes only when these elements are all in one
43526 half of a permutation.
43527 {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
43528 lanes, but both 8 and 9 >= 8
43529 {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
43530 respective lanes and 8 >= 8, but 2 not. */
43531 if (which != 1 && which != 2)
43532 return false;
43533 if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
43534 return true;
43535
43536 /* First we apply one operand permutation to the part where
43537 elements stay not in their respective lanes. */
43538 dcopy = *d;
43539 if (which == 2)
43540 dcopy.op0 = dcopy.op1 = d->op1;
43541 else
43542 dcopy.op0 = dcopy.op1 = d->op0;
43543 dcopy.one_operand_p = true;
43544
43545 for (i = 0; i < nelt; ++i)
43546 dcopy.perm[i] = d->perm[i] & (nelt - 1);
43547
43548 ok = expand_vec_perm_1 (&dcopy);
43549 if (GET_MODE_SIZE (vmode) != 16 && !ok)
43550 return false;
43551 else
43552 gcc_assert (ok);
43553 if (d->testing_p)
43554 return true;
43555
43556 /* Next we put permuted elements into their positions. */
43557 dcopy1 = *d;
43558 if (which == 2)
43559 dcopy1.op1 = dcopy.target;
43560 else
43561 dcopy1.op0 = dcopy.target;
43562
43563 for (i = 0; i < nelt; ++i)
43564 dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
43565
43566 ok = expand_vec_perm_blend (&dcopy1);
43567 gcc_assert (ok);
43568
43569 return true;
43570 }
43571
43572 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43573
43574 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43575 a two vector permutation into a single vector permutation by using
43576 an interleave operation to merge the vectors. */
43577
43578 static bool
43579 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43580 {
43581 struct expand_vec_perm_d dremap, dfinal;
43582 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43583 unsigned HOST_WIDE_INT contents;
43584 unsigned char remap[2 * MAX_VECT_LEN];
43585 rtx_insn *seq;
43586 bool ok, same_halves = false;
43587
43588 if (GET_MODE_SIZE (d->vmode) == 16)
43589 {
43590 if (d->one_operand_p)
43591 return false;
43592 }
43593 else if (GET_MODE_SIZE (d->vmode) == 32)
43594 {
43595 if (!TARGET_AVX)
43596 return false;
43597 /* For 32-byte modes allow even d->one_operand_p.
43598 The lack of cross-lane shuffling in some instructions
43599 might prevent a single insn shuffle. */
43600 dfinal = *d;
43601 dfinal.testing_p = true;
43602 /* If expand_vec_perm_interleave3 can expand this into
43603 a 3 insn sequence, give up and let it be expanded as
43604 3 insn sequence. While that is one insn longer,
43605 it doesn't need a memory operand and in the common
43606 case that both interleave low and high permutations
43607 with the same operands are adjacent needs 4 insns
43608 for both after CSE. */
43609 if (expand_vec_perm_interleave3 (&dfinal))
43610 return false;
43611 }
43612 else
43613 return false;
43614
43615 /* Examine from whence the elements come. */
43616 contents = 0;
43617 for (i = 0; i < nelt; ++i)
43618 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43619
43620 memset (remap, 0xff, sizeof (remap));
43621 dremap = *d;
43622
43623 if (GET_MODE_SIZE (d->vmode) == 16)
43624 {
43625 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43626
43627 /* Split the two input vectors into 4 halves. */
43628 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43629 h2 = h1 << nelt2;
43630 h3 = h2 << nelt2;
43631 h4 = h3 << nelt2;
43632
43633 /* If the elements from the low halves use interleave low, and similarly
43634 for interleave high. If the elements are from mis-matched halves, we
43635 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43636 if ((contents & (h1 | h3)) == contents)
43637 {
43638 /* punpckl* */
43639 for (i = 0; i < nelt2; ++i)
43640 {
43641 remap[i] = i * 2;
43642 remap[i + nelt] = i * 2 + 1;
43643 dremap.perm[i * 2] = i;
43644 dremap.perm[i * 2 + 1] = i + nelt;
43645 }
43646 if (!TARGET_SSE2 && d->vmode == V4SImode)
43647 dremap.vmode = V4SFmode;
43648 }
43649 else if ((contents & (h2 | h4)) == contents)
43650 {
43651 /* punpckh* */
43652 for (i = 0; i < nelt2; ++i)
43653 {
43654 remap[i + nelt2] = i * 2;
43655 remap[i + nelt + nelt2] = i * 2 + 1;
43656 dremap.perm[i * 2] = i + nelt2;
43657 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43658 }
43659 if (!TARGET_SSE2 && d->vmode == V4SImode)
43660 dremap.vmode = V4SFmode;
43661 }
43662 else if ((contents & (h1 | h4)) == contents)
43663 {
43664 /* shufps */
43665 for (i = 0; i < nelt2; ++i)
43666 {
43667 remap[i] = i;
43668 remap[i + nelt + nelt2] = i + nelt2;
43669 dremap.perm[i] = i;
43670 dremap.perm[i + nelt2] = i + nelt + nelt2;
43671 }
43672 if (nelt != 4)
43673 {
43674 /* shufpd */
43675 dremap.vmode = V2DImode;
43676 dremap.nelt = 2;
43677 dremap.perm[0] = 0;
43678 dremap.perm[1] = 3;
43679 }
43680 }
43681 else if ((contents & (h2 | h3)) == contents)
43682 {
43683 /* shufps */
43684 for (i = 0; i < nelt2; ++i)
43685 {
43686 remap[i + nelt2] = i;
43687 remap[i + nelt] = i + nelt2;
43688 dremap.perm[i] = i + nelt2;
43689 dremap.perm[i + nelt2] = i + nelt;
43690 }
43691 if (nelt != 4)
43692 {
43693 /* shufpd */
43694 dremap.vmode = V2DImode;
43695 dremap.nelt = 2;
43696 dremap.perm[0] = 1;
43697 dremap.perm[1] = 2;
43698 }
43699 }
43700 else
43701 return false;
43702 }
43703 else
43704 {
43705 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43706 unsigned HOST_WIDE_INT q[8];
43707 unsigned int nonzero_halves[4];
43708
43709 /* Split the two input vectors into 8 quarters. */
43710 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43711 for (i = 1; i < 8; ++i)
43712 q[i] = q[0] << (nelt4 * i);
43713 for (i = 0; i < 4; ++i)
43714 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43715 {
43716 nonzero_halves[nzcnt] = i;
43717 ++nzcnt;
43718 }
43719
43720 if (nzcnt == 1)
43721 {
43722 gcc_assert (d->one_operand_p);
43723 nonzero_halves[1] = nonzero_halves[0];
43724 same_halves = true;
43725 }
43726 else if (d->one_operand_p)
43727 {
43728 gcc_assert (nonzero_halves[0] == 0);
43729 gcc_assert (nonzero_halves[1] == 1);
43730 }
43731
43732 if (nzcnt <= 2)
43733 {
43734 if (d->perm[0] / nelt2 == nonzero_halves[1])
43735 {
43736 /* Attempt to increase the likelihood that dfinal
43737 shuffle will be intra-lane. */
43738 char tmph = nonzero_halves[0];
43739 nonzero_halves[0] = nonzero_halves[1];
43740 nonzero_halves[1] = tmph;
43741 }
43742
43743 /* vperm2f128 or vperm2i128. */
43744 for (i = 0; i < nelt2; ++i)
43745 {
43746 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43747 remap[i + nonzero_halves[0] * nelt2] = i;
43748 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43749 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43750 }
43751
43752 if (d->vmode != V8SFmode
43753 && d->vmode != V4DFmode
43754 && d->vmode != V8SImode)
43755 {
43756 dremap.vmode = V8SImode;
43757 dremap.nelt = 8;
43758 for (i = 0; i < 4; ++i)
43759 {
43760 dremap.perm[i] = i + nonzero_halves[0] * 4;
43761 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43762 }
43763 }
43764 }
43765 else if (d->one_operand_p)
43766 return false;
43767 else if (TARGET_AVX2
43768 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43769 {
43770 /* vpunpckl* */
43771 for (i = 0; i < nelt4; ++i)
43772 {
43773 remap[i] = i * 2;
43774 remap[i + nelt] = i * 2 + 1;
43775 remap[i + nelt2] = i * 2 + nelt2;
43776 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43777 dremap.perm[i * 2] = i;
43778 dremap.perm[i * 2 + 1] = i + nelt;
43779 dremap.perm[i * 2 + nelt2] = i + nelt2;
43780 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43781 }
43782 }
43783 else if (TARGET_AVX2
43784 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43785 {
43786 /* vpunpckh* */
43787 for (i = 0; i < nelt4; ++i)
43788 {
43789 remap[i + nelt4] = i * 2;
43790 remap[i + nelt + nelt4] = i * 2 + 1;
43791 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43792 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43793 dremap.perm[i * 2] = i + nelt4;
43794 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43795 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43796 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43797 }
43798 }
43799 else
43800 return false;
43801 }
43802
43803 /* Use the remapping array set up above to move the elements from their
43804 swizzled locations into their final destinations. */
43805 dfinal = *d;
43806 for (i = 0; i < nelt; ++i)
43807 {
43808 unsigned e = remap[d->perm[i]];
43809 gcc_assert (e < nelt);
43810 /* If same_halves is true, both halves of the remapped vector are the
43811 same. Avoid cross-lane accesses if possible. */
43812 if (same_halves && i >= nelt2)
43813 {
43814 gcc_assert (e < nelt2);
43815 dfinal.perm[i] = e + nelt2;
43816 }
43817 else
43818 dfinal.perm[i] = e;
43819 }
43820 if (!d->testing_p)
43821 {
43822 dremap.target = gen_reg_rtx (dremap.vmode);
43823 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43824 }
43825 dfinal.op1 = dfinal.op0;
43826 dfinal.one_operand_p = true;
43827
43828 /* Test if the final remap can be done with a single insn. For V4SFmode or
43829 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43830 start_sequence ();
43831 ok = expand_vec_perm_1 (&dfinal);
43832 seq = get_insns ();
43833 end_sequence ();
43834
43835 if (!ok)
43836 return false;
43837
43838 if (d->testing_p)
43839 return true;
43840
43841 if (dremap.vmode != dfinal.vmode)
43842 {
43843 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43844 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43845 }
43846
43847 ok = expand_vec_perm_1 (&dremap);
43848 gcc_assert (ok);
43849
43850 emit_insn (seq);
43851 return true;
43852 }
43853
43854 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43855 a single vector cross-lane permutation into vpermq followed
43856 by any of the single insn permutations. */
43857
43858 static bool
43859 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43860 {
43861 struct expand_vec_perm_d dremap, dfinal;
43862 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43863 unsigned contents[2];
43864 bool ok;
43865
43866 if (!(TARGET_AVX2
43867 && (d->vmode == V32QImode || d->vmode == V16HImode)
43868 && d->one_operand_p))
43869 return false;
43870
43871 contents[0] = 0;
43872 contents[1] = 0;
43873 for (i = 0; i < nelt2; ++i)
43874 {
43875 contents[0] |= 1u << (d->perm[i] / nelt4);
43876 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43877 }
43878
43879 for (i = 0; i < 2; ++i)
43880 {
43881 unsigned int cnt = 0;
43882 for (j = 0; j < 4; ++j)
43883 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43884 return false;
43885 }
43886
43887 if (d->testing_p)
43888 return true;
43889
43890 dremap = *d;
43891 dremap.vmode = V4DImode;
43892 dremap.nelt = 4;
43893 dremap.target = gen_reg_rtx (V4DImode);
43894 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43895 dremap.op1 = dremap.op0;
43896 dremap.one_operand_p = true;
43897 for (i = 0; i < 2; ++i)
43898 {
43899 unsigned int cnt = 0;
43900 for (j = 0; j < 4; ++j)
43901 if ((contents[i] & (1u << j)) != 0)
43902 dremap.perm[2 * i + cnt++] = j;
43903 for (; cnt < 2; ++cnt)
43904 dremap.perm[2 * i + cnt] = 0;
43905 }
43906
43907 dfinal = *d;
43908 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43909 dfinal.op1 = dfinal.op0;
43910 dfinal.one_operand_p = true;
43911 for (i = 0, j = 0; i < nelt; ++i)
43912 {
43913 if (i == nelt2)
43914 j = 2;
43915 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43916 if ((d->perm[i] / nelt4) == dremap.perm[j])
43917 ;
43918 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43919 dfinal.perm[i] |= nelt4;
43920 else
43921 gcc_unreachable ();
43922 }
43923
43924 ok = expand_vec_perm_1 (&dremap);
43925 gcc_assert (ok);
43926
43927 ok = expand_vec_perm_1 (&dfinal);
43928 gcc_assert (ok);
43929
43930 return true;
43931 }
43932
43933 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43934 a vector permutation using two instructions, vperm2f128 resp.
43935 vperm2i128 followed by any single in-lane permutation. */
43936
43937 static bool
43938 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43939 {
43940 struct expand_vec_perm_d dfirst, dsecond;
43941 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43942 bool ok;
43943
43944 if (!TARGET_AVX
43945 || GET_MODE_SIZE (d->vmode) != 32
43946 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43947 return false;
43948
43949 dsecond = *d;
43950 dsecond.one_operand_p = false;
43951 dsecond.testing_p = true;
43952
43953 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43954 immediate. For perm < 16 the second permutation uses
43955 d->op0 as first operand, for perm >= 16 it uses d->op1
43956 as first operand. The second operand is the result of
43957 vperm2[fi]128. */
43958 for (perm = 0; perm < 32; perm++)
43959 {
43960 /* Ignore permutations which do not move anything cross-lane. */
43961 if (perm < 16)
43962 {
43963 /* The second shuffle for e.g. V4DFmode has
43964 0123 and ABCD operands.
43965 Ignore AB23, as 23 is already in the second lane
43966 of the first operand. */
43967 if ((perm & 0xc) == (1 << 2)) continue;
43968 /* And 01CD, as 01 is in the first lane of the first
43969 operand. */
43970 if ((perm & 3) == 0) continue;
43971 /* And 4567, as then the vperm2[fi]128 doesn't change
43972 anything on the original 4567 second operand. */
43973 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43974 }
43975 else
43976 {
43977 /* The second shuffle for e.g. V4DFmode has
43978 4567 and ABCD operands.
43979 Ignore AB67, as 67 is already in the second lane
43980 of the first operand. */
43981 if ((perm & 0xc) == (3 << 2)) continue;
43982 /* And 45CD, as 45 is in the first lane of the first
43983 operand. */
43984 if ((perm & 3) == 2) continue;
43985 /* And 0123, as then the vperm2[fi]128 doesn't change
43986 anything on the original 0123 first operand. */
43987 if ((perm & 0xf) == (1 << 2)) continue;
43988 }
43989
43990 for (i = 0; i < nelt; i++)
43991 {
43992 j = d->perm[i] / nelt2;
43993 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43994 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43995 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43996 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43997 else
43998 break;
43999 }
44000
44001 if (i == nelt)
44002 {
44003 start_sequence ();
44004 ok = expand_vec_perm_1 (&dsecond);
44005 end_sequence ();
44006 }
44007 else
44008 ok = false;
44009
44010 if (ok)
44011 {
44012 if (d->testing_p)
44013 return true;
44014
44015 /* Found a usable second shuffle. dfirst will be
44016 vperm2f128 on d->op0 and d->op1. */
44017 dsecond.testing_p = false;
44018 dfirst = *d;
44019 dfirst.target = gen_reg_rtx (d->vmode);
44020 for (i = 0; i < nelt; i++)
44021 dfirst.perm[i] = (i & (nelt2 - 1))
44022 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
44023
44024 canonicalize_perm (&dfirst);
44025 ok = expand_vec_perm_1 (&dfirst);
44026 gcc_assert (ok);
44027
44028 /* And dsecond is some single insn shuffle, taking
44029 d->op0 and result of vperm2f128 (if perm < 16) or
44030 d->op1 and result of vperm2f128 (otherwise). */
44031 if (perm >= 16)
44032 dsecond.op0 = dsecond.op1;
44033 dsecond.op1 = dfirst.target;
44034
44035 ok = expand_vec_perm_1 (&dsecond);
44036 gcc_assert (ok);
44037
44038 return true;
44039 }
44040
44041 /* For one operand, the only useful vperm2f128 permutation is 0x01
44042 aka lanes swap. */
44043 if (d->one_operand_p)
44044 return false;
44045 }
44046
44047 return false;
44048 }
44049
44050 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
44051 a two vector permutation using 2 intra-lane interleave insns
44052 and cross-lane shuffle for 32-byte vectors. */
44053
44054 static bool
44055 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
44056 {
44057 unsigned i, nelt;
44058 rtx (*gen) (rtx, rtx, rtx);
44059
44060 if (d->one_operand_p)
44061 return false;
44062 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
44063 ;
44064 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
44065 ;
44066 else
44067 return false;
44068
44069 nelt = d->nelt;
44070 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
44071 return false;
44072 for (i = 0; i < nelt; i += 2)
44073 if (d->perm[i] != d->perm[0] + i / 2
44074 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
44075 return false;
44076
44077 if (d->testing_p)
44078 return true;
44079
44080 switch (d->vmode)
44081 {
44082 case V32QImode:
44083 if (d->perm[0])
44084 gen = gen_vec_interleave_highv32qi;
44085 else
44086 gen = gen_vec_interleave_lowv32qi;
44087 break;
44088 case V16HImode:
44089 if (d->perm[0])
44090 gen = gen_vec_interleave_highv16hi;
44091 else
44092 gen = gen_vec_interleave_lowv16hi;
44093 break;
44094 case V8SImode:
44095 if (d->perm[0])
44096 gen = gen_vec_interleave_highv8si;
44097 else
44098 gen = gen_vec_interleave_lowv8si;
44099 break;
44100 case V4DImode:
44101 if (d->perm[0])
44102 gen = gen_vec_interleave_highv4di;
44103 else
44104 gen = gen_vec_interleave_lowv4di;
44105 break;
44106 case V8SFmode:
44107 if (d->perm[0])
44108 gen = gen_vec_interleave_highv8sf;
44109 else
44110 gen = gen_vec_interleave_lowv8sf;
44111 break;
44112 case V4DFmode:
44113 if (d->perm[0])
44114 gen = gen_vec_interleave_highv4df;
44115 else
44116 gen = gen_vec_interleave_lowv4df;
44117 break;
44118 default:
44119 gcc_unreachable ();
44120 }
44121
44122 emit_insn (gen (d->target, d->op0, d->op1));
44123 return true;
44124 }
44125
44126 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
44127 a single vector permutation using a single intra-lane vector
44128 permutation, vperm2f128 swapping the lanes and vblend* insn blending
44129 the non-swapped and swapped vectors together. */
44130
44131 static bool
44132 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
44133 {
44134 struct expand_vec_perm_d dfirst, dsecond;
44135 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
44136 rtx_insn *seq;
44137 bool ok;
44138 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
44139
44140 if (!TARGET_AVX
44141 || TARGET_AVX2
44142 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
44143 || !d->one_operand_p)
44144 return false;
44145
44146 dfirst = *d;
44147 for (i = 0; i < nelt; i++)
44148 dfirst.perm[i] = 0xff;
44149 for (i = 0, msk = 0; i < nelt; i++)
44150 {
44151 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
44152 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
44153 return false;
44154 dfirst.perm[j] = d->perm[i];
44155 if (j != i)
44156 msk |= (1 << i);
44157 }
44158 for (i = 0; i < nelt; i++)
44159 if (dfirst.perm[i] == 0xff)
44160 dfirst.perm[i] = i;
44161
44162 if (!d->testing_p)
44163 dfirst.target = gen_reg_rtx (dfirst.vmode);
44164
44165 start_sequence ();
44166 ok = expand_vec_perm_1 (&dfirst);
44167 seq = get_insns ();
44168 end_sequence ();
44169
44170 if (!ok)
44171 return false;
44172
44173 if (d->testing_p)
44174 return true;
44175
44176 emit_insn (seq);
44177
44178 dsecond = *d;
44179 dsecond.op0 = dfirst.target;
44180 dsecond.op1 = dfirst.target;
44181 dsecond.one_operand_p = true;
44182 dsecond.target = gen_reg_rtx (dsecond.vmode);
44183 for (i = 0; i < nelt; i++)
44184 dsecond.perm[i] = i ^ nelt2;
44185
44186 ok = expand_vec_perm_1 (&dsecond);
44187 gcc_assert (ok);
44188
44189 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
44190 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
44191 return true;
44192 }
44193
44194 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
44195 permutation using two vperm2f128, followed by a vshufpd insn blending
44196 the two vectors together. */
44197
44198 static bool
44199 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
44200 {
44201 struct expand_vec_perm_d dfirst, dsecond, dthird;
44202 bool ok;
44203
44204 if (!TARGET_AVX || (d->vmode != V4DFmode))
44205 return false;
44206
44207 if (d->testing_p)
44208 return true;
44209
44210 dfirst = *d;
44211 dsecond = *d;
44212 dthird = *d;
44213
44214 dfirst.perm[0] = (d->perm[0] & ~1);
44215 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
44216 dfirst.perm[2] = (d->perm[2] & ~1);
44217 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
44218 dsecond.perm[0] = (d->perm[1] & ~1);
44219 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
44220 dsecond.perm[2] = (d->perm[3] & ~1);
44221 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
44222 dthird.perm[0] = (d->perm[0] % 2);
44223 dthird.perm[1] = (d->perm[1] % 2) + 4;
44224 dthird.perm[2] = (d->perm[2] % 2) + 2;
44225 dthird.perm[3] = (d->perm[3] % 2) + 6;
44226
44227 dfirst.target = gen_reg_rtx (dfirst.vmode);
44228 dsecond.target = gen_reg_rtx (dsecond.vmode);
44229 dthird.op0 = dfirst.target;
44230 dthird.op1 = dsecond.target;
44231 dthird.one_operand_p = false;
44232
44233 canonicalize_perm (&dfirst);
44234 canonicalize_perm (&dsecond);
44235
44236 ok = expand_vec_perm_1 (&dfirst)
44237 && expand_vec_perm_1 (&dsecond)
44238 && expand_vec_perm_1 (&dthird);
44239
44240 gcc_assert (ok);
44241
44242 return true;
44243 }
44244
44245 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
44246 permutation with two pshufb insns and an ior. We should have already
44247 failed all two instruction sequences. */
44248
44249 static bool
44250 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
44251 {
44252 rtx rperm[2][16], vperm, l, h, op, m128;
44253 unsigned int i, nelt, eltsz;
44254
44255 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
44256 return false;
44257 gcc_assert (!d->one_operand_p);
44258
44259 if (d->testing_p)
44260 return true;
44261
44262 nelt = d->nelt;
44263 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44264
44265 /* Generate two permutation masks. If the required element is within
44266 the given vector it is shuffled into the proper lane. If the required
44267 element is in the other vector, force a zero into the lane by setting
44268 bit 7 in the permutation mask. */
44269 m128 = GEN_INT (-128);
44270 for (i = 0; i < nelt; ++i)
44271 {
44272 unsigned j, e = d->perm[i];
44273 unsigned which = (e >= nelt);
44274 if (e >= nelt)
44275 e -= nelt;
44276
44277 for (j = 0; j < eltsz; ++j)
44278 {
44279 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
44280 rperm[1-which][i*eltsz + j] = m128;
44281 }
44282 }
44283
44284 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
44285 vperm = force_reg (V16QImode, vperm);
44286
44287 l = gen_reg_rtx (V16QImode);
44288 op = gen_lowpart (V16QImode, d->op0);
44289 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
44290
44291 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
44292 vperm = force_reg (V16QImode, vperm);
44293
44294 h = gen_reg_rtx (V16QImode);
44295 op = gen_lowpart (V16QImode, d->op1);
44296 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
44297
44298 op = d->target;
44299 if (d->vmode != V16QImode)
44300 op = gen_reg_rtx (V16QImode);
44301 emit_insn (gen_iorv16qi3 (op, l, h));
44302 if (op != d->target)
44303 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44304
44305 return true;
44306 }
44307
44308 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
44309 with two vpshufb insns, vpermq and vpor. We should have already failed
44310 all two or three instruction sequences. */
44311
44312 static bool
44313 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
44314 {
44315 rtx rperm[2][32], vperm, l, h, hp, op, m128;
44316 unsigned int i, nelt, eltsz;
44317
44318 if (!TARGET_AVX2
44319 || !d->one_operand_p
44320 || (d->vmode != V32QImode && d->vmode != V16HImode))
44321 return false;
44322
44323 if (d->testing_p)
44324 return true;
44325
44326 nelt = d->nelt;
44327 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44328
44329 /* Generate two permutation masks. If the required element is within
44330 the same lane, it is shuffled in. If the required element from the
44331 other lane, force a zero by setting bit 7 in the permutation mask.
44332 In the other mask the mask has non-negative elements if element
44333 is requested from the other lane, but also moved to the other lane,
44334 so that the result of vpshufb can have the two V2TImode halves
44335 swapped. */
44336 m128 = GEN_INT (-128);
44337 for (i = 0; i < nelt; ++i)
44338 {
44339 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44340 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44341
44342 for (j = 0; j < eltsz; ++j)
44343 {
44344 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
44345 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
44346 }
44347 }
44348
44349 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44350 vperm = force_reg (V32QImode, vperm);
44351
44352 h = gen_reg_rtx (V32QImode);
44353 op = gen_lowpart (V32QImode, d->op0);
44354 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44355
44356 /* Swap the 128-byte lanes of h into hp. */
44357 hp = gen_reg_rtx (V4DImode);
44358 op = gen_lowpart (V4DImode, h);
44359 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
44360 const1_rtx));
44361
44362 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44363 vperm = force_reg (V32QImode, vperm);
44364
44365 l = gen_reg_rtx (V32QImode);
44366 op = gen_lowpart (V32QImode, d->op0);
44367 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44368
44369 op = d->target;
44370 if (d->vmode != V32QImode)
44371 op = gen_reg_rtx (V32QImode);
44372 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
44373 if (op != d->target)
44374 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44375
44376 return true;
44377 }
44378
44379 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
44380 and extract-odd permutations of two V32QImode and V16QImode operand
44381 with two vpshufb insns, vpor and vpermq. We should have already
44382 failed all two or three instruction sequences. */
44383
44384 static bool
44385 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
44386 {
44387 rtx rperm[2][32], vperm, l, h, ior, op, m128;
44388 unsigned int i, nelt, eltsz;
44389
44390 if (!TARGET_AVX2
44391 || d->one_operand_p
44392 || (d->vmode != V32QImode && d->vmode != V16HImode))
44393 return false;
44394
44395 for (i = 0; i < d->nelt; ++i)
44396 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
44397 return false;
44398
44399 if (d->testing_p)
44400 return true;
44401
44402 nelt = d->nelt;
44403 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44404
44405 /* Generate two permutation masks. In the first permutation mask
44406 the first quarter will contain indexes for the first half
44407 of the op0, the second quarter will contain bit 7 set, third quarter
44408 will contain indexes for the second half of the op0 and the
44409 last quarter bit 7 set. In the second permutation mask
44410 the first quarter will contain bit 7 set, the second quarter
44411 indexes for the first half of the op1, the third quarter bit 7 set
44412 and last quarter indexes for the second half of the op1.
44413 I.e. the first mask e.g. for V32QImode extract even will be:
44414 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
44415 (all values masked with 0xf except for -128) and second mask
44416 for extract even will be
44417 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
44418 m128 = GEN_INT (-128);
44419 for (i = 0; i < nelt; ++i)
44420 {
44421 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44422 unsigned which = d->perm[i] >= nelt;
44423 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
44424
44425 for (j = 0; j < eltsz; ++j)
44426 {
44427 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
44428 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
44429 }
44430 }
44431
44432 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
44433 vperm = force_reg (V32QImode, vperm);
44434
44435 l = gen_reg_rtx (V32QImode);
44436 op = gen_lowpart (V32QImode, d->op0);
44437 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
44438
44439 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
44440 vperm = force_reg (V32QImode, vperm);
44441
44442 h = gen_reg_rtx (V32QImode);
44443 op = gen_lowpart (V32QImode, d->op1);
44444 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
44445
44446 ior = gen_reg_rtx (V32QImode);
44447 emit_insn (gen_iorv32qi3 (ior, l, h));
44448
44449 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
44450 op = gen_reg_rtx (V4DImode);
44451 ior = gen_lowpart (V4DImode, ior);
44452 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
44453 const1_rtx, GEN_INT (3)));
44454 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44455
44456 return true;
44457 }
44458
44459 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
44460 and extract-odd permutations. */
44461
44462 static bool
44463 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
44464 {
44465 rtx t1, t2, t3, t4, t5;
44466
44467 switch (d->vmode)
44468 {
44469 case V4DFmode:
44470 if (d->testing_p)
44471 break;
44472 t1 = gen_reg_rtx (V4DFmode);
44473 t2 = gen_reg_rtx (V4DFmode);
44474
44475 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44476 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44477 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44478
44479 /* Now an unpck[lh]pd will produce the result required. */
44480 if (odd)
44481 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44482 else
44483 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44484 emit_insn (t3);
44485 break;
44486
44487 case V8SFmode:
44488 {
44489 int mask = odd ? 0xdd : 0x88;
44490
44491 if (d->testing_p)
44492 break;
44493 t1 = gen_reg_rtx (V8SFmode);
44494 t2 = gen_reg_rtx (V8SFmode);
44495 t3 = gen_reg_rtx (V8SFmode);
44496
44497 /* Shuffle within the 128-bit lanes to produce:
44498 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44499 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44500 GEN_INT (mask)));
44501
44502 /* Shuffle the lanes around to produce:
44503 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44504 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44505 GEN_INT (0x3)));
44506
44507 /* Shuffle within the 128-bit lanes to produce:
44508 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44509 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44510
44511 /* Shuffle within the 128-bit lanes to produce:
44512 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44513 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44514
44515 /* Shuffle the lanes around to produce:
44516 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44517 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44518 GEN_INT (0x20)));
44519 }
44520 break;
44521
44522 case V2DFmode:
44523 case V4SFmode:
44524 case V2DImode:
44525 case V4SImode:
44526 /* These are always directly implementable by expand_vec_perm_1. */
44527 gcc_unreachable ();
44528
44529 case V8HImode:
44530 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44531 return expand_vec_perm_pshufb2 (d);
44532 else
44533 {
44534 if (d->testing_p)
44535 break;
44536 /* We need 2*log2(N)-1 operations to achieve odd/even
44537 with interleave. */
44538 t1 = gen_reg_rtx (V8HImode);
44539 t2 = gen_reg_rtx (V8HImode);
44540 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44541 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44542 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44543 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44544 if (odd)
44545 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44546 else
44547 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44548 emit_insn (t3);
44549 }
44550 break;
44551
44552 case V16QImode:
44553 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44554 return expand_vec_perm_pshufb2 (d);
44555 else
44556 {
44557 if (d->testing_p)
44558 break;
44559 t1 = gen_reg_rtx (V16QImode);
44560 t2 = gen_reg_rtx (V16QImode);
44561 t3 = gen_reg_rtx (V16QImode);
44562 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44563 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44564 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44565 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44566 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44567 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44568 if (odd)
44569 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44570 else
44571 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44572 emit_insn (t3);
44573 }
44574 break;
44575
44576 case V16HImode:
44577 case V32QImode:
44578 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44579
44580 case V4DImode:
44581 if (!TARGET_AVX2)
44582 {
44583 struct expand_vec_perm_d d_copy = *d;
44584 d_copy.vmode = V4DFmode;
44585 if (d->testing_p)
44586 d_copy.target = gen_lowpart (V4DFmode, d->target);
44587 else
44588 d_copy.target = gen_reg_rtx (V4DFmode);
44589 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44590 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44591 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44592 {
44593 if (!d->testing_p)
44594 emit_move_insn (d->target,
44595 gen_lowpart (V4DImode, d_copy.target));
44596 return true;
44597 }
44598 return false;
44599 }
44600
44601 if (d->testing_p)
44602 break;
44603
44604 t1 = gen_reg_rtx (V4DImode);
44605 t2 = gen_reg_rtx (V4DImode);
44606
44607 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44608 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44609 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44610
44611 /* Now an vpunpck[lh]qdq will produce the result required. */
44612 if (odd)
44613 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44614 else
44615 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44616 emit_insn (t3);
44617 break;
44618
44619 case V8SImode:
44620 if (!TARGET_AVX2)
44621 {
44622 struct expand_vec_perm_d d_copy = *d;
44623 d_copy.vmode = V8SFmode;
44624 if (d->testing_p)
44625 d_copy.target = gen_lowpart (V8SFmode, d->target);
44626 else
44627 d_copy.target = gen_reg_rtx (V8SFmode);
44628 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44629 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44630 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44631 {
44632 if (!d->testing_p)
44633 emit_move_insn (d->target,
44634 gen_lowpart (V8SImode, d_copy.target));
44635 return true;
44636 }
44637 return false;
44638 }
44639
44640 if (d->testing_p)
44641 break;
44642
44643 t1 = gen_reg_rtx (V8SImode);
44644 t2 = gen_reg_rtx (V8SImode);
44645 t3 = gen_reg_rtx (V4DImode);
44646 t4 = gen_reg_rtx (V4DImode);
44647 t5 = gen_reg_rtx (V4DImode);
44648
44649 /* Shuffle the lanes around into
44650 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44651 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44652 gen_lowpart (V4DImode, d->op1),
44653 GEN_INT (0x20)));
44654 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44655 gen_lowpart (V4DImode, d->op1),
44656 GEN_INT (0x31)));
44657
44658 /* Swap the 2nd and 3rd position in each lane into
44659 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44660 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44661 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44662 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44663 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44664
44665 /* Now an vpunpck[lh]qdq will produce
44666 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44667 if (odd)
44668 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44669 gen_lowpart (V4DImode, t2));
44670 else
44671 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44672 gen_lowpart (V4DImode, t2));
44673 emit_insn (t3);
44674 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44675 break;
44676
44677 default:
44678 gcc_unreachable ();
44679 }
44680
44681 return true;
44682 }
44683
44684 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44685 extract-even and extract-odd permutations. */
44686
44687 static bool
44688 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44689 {
44690 unsigned i, odd, nelt = d->nelt;
44691
44692 odd = d->perm[0];
44693 if (odd != 0 && odd != 1)
44694 return false;
44695
44696 for (i = 1; i < nelt; ++i)
44697 if (d->perm[i] != 2 * i + odd)
44698 return false;
44699
44700 return expand_vec_perm_even_odd_1 (d, odd);
44701 }
44702
44703 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44704 permutations. We assume that expand_vec_perm_1 has already failed. */
44705
44706 static bool
44707 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44708 {
44709 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44710 enum machine_mode vmode = d->vmode;
44711 unsigned char perm2[4];
44712 rtx op0 = d->op0, dest;
44713 bool ok;
44714
44715 switch (vmode)
44716 {
44717 case V4DFmode:
44718 case V8SFmode:
44719 /* These are special-cased in sse.md so that we can optionally
44720 use the vbroadcast instruction. They expand to two insns
44721 if the input happens to be in a register. */
44722 gcc_unreachable ();
44723
44724 case V2DFmode:
44725 case V2DImode:
44726 case V4SFmode:
44727 case V4SImode:
44728 /* These are always implementable using standard shuffle patterns. */
44729 gcc_unreachable ();
44730
44731 case V8HImode:
44732 case V16QImode:
44733 /* These can be implemented via interleave. We save one insn by
44734 stopping once we have promoted to V4SImode and then use pshufd. */
44735 if (d->testing_p)
44736 return true;
44737 do
44738 {
44739 rtx dest;
44740 rtx (*gen) (rtx, rtx, rtx)
44741 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44742 : gen_vec_interleave_lowv8hi;
44743
44744 if (elt >= nelt2)
44745 {
44746 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44747 : gen_vec_interleave_highv8hi;
44748 elt -= nelt2;
44749 }
44750 nelt2 /= 2;
44751
44752 dest = gen_reg_rtx (vmode);
44753 emit_insn (gen (dest, op0, op0));
44754 vmode = get_mode_wider_vector (vmode);
44755 op0 = gen_lowpart (vmode, dest);
44756 }
44757 while (vmode != V4SImode);
44758
44759 memset (perm2, elt, 4);
44760 dest = gen_reg_rtx (V4SImode);
44761 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44762 gcc_assert (ok);
44763 if (!d->testing_p)
44764 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44765 return true;
44766
44767 case V32QImode:
44768 case V16HImode:
44769 case V8SImode:
44770 case V4DImode:
44771 /* For AVX2 broadcasts of the first element vpbroadcast* or
44772 vpermq should be used by expand_vec_perm_1. */
44773 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44774 return false;
44775
44776 default:
44777 gcc_unreachable ();
44778 }
44779 }
44780
44781 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44782 broadcast permutations. */
44783
44784 static bool
44785 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44786 {
44787 unsigned i, elt, nelt = d->nelt;
44788
44789 if (!d->one_operand_p)
44790 return false;
44791
44792 elt = d->perm[0];
44793 for (i = 1; i < nelt; ++i)
44794 if (d->perm[i] != elt)
44795 return false;
44796
44797 return expand_vec_perm_broadcast_1 (d);
44798 }
44799
44800 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44801 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44802 all the shorter instruction sequences. */
44803
44804 static bool
44805 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44806 {
44807 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44808 unsigned int i, nelt, eltsz;
44809 bool used[4];
44810
44811 if (!TARGET_AVX2
44812 || d->one_operand_p
44813 || (d->vmode != V32QImode && d->vmode != V16HImode))
44814 return false;
44815
44816 if (d->testing_p)
44817 return true;
44818
44819 nelt = d->nelt;
44820 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44821
44822 /* Generate 4 permutation masks. If the required element is within
44823 the same lane, it is shuffled in. If the required element from the
44824 other lane, force a zero by setting bit 7 in the permutation mask.
44825 In the other mask the mask has non-negative elements if element
44826 is requested from the other lane, but also moved to the other lane,
44827 so that the result of vpshufb can have the two V2TImode halves
44828 swapped. */
44829 m128 = GEN_INT (-128);
44830 for (i = 0; i < 32; ++i)
44831 {
44832 rperm[0][i] = m128;
44833 rperm[1][i] = m128;
44834 rperm[2][i] = m128;
44835 rperm[3][i] = m128;
44836 }
44837 used[0] = false;
44838 used[1] = false;
44839 used[2] = false;
44840 used[3] = false;
44841 for (i = 0; i < nelt; ++i)
44842 {
44843 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44844 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44845 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44846
44847 for (j = 0; j < eltsz; ++j)
44848 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44849 used[which] = true;
44850 }
44851
44852 for (i = 0; i < 2; ++i)
44853 {
44854 if (!used[2 * i + 1])
44855 {
44856 h[i] = NULL_RTX;
44857 continue;
44858 }
44859 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44860 gen_rtvec_v (32, rperm[2 * i + 1]));
44861 vperm = force_reg (V32QImode, vperm);
44862 h[i] = gen_reg_rtx (V32QImode);
44863 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44864 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44865 }
44866
44867 /* Swap the 128-byte lanes of h[X]. */
44868 for (i = 0; i < 2; ++i)
44869 {
44870 if (h[i] == NULL_RTX)
44871 continue;
44872 op = gen_reg_rtx (V4DImode);
44873 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44874 const2_rtx, GEN_INT (3), const0_rtx,
44875 const1_rtx));
44876 h[i] = gen_lowpart (V32QImode, op);
44877 }
44878
44879 for (i = 0; i < 2; ++i)
44880 {
44881 if (!used[2 * i])
44882 {
44883 l[i] = NULL_RTX;
44884 continue;
44885 }
44886 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44887 vperm = force_reg (V32QImode, vperm);
44888 l[i] = gen_reg_rtx (V32QImode);
44889 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44890 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44891 }
44892
44893 for (i = 0; i < 2; ++i)
44894 {
44895 if (h[i] && l[i])
44896 {
44897 op = gen_reg_rtx (V32QImode);
44898 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44899 l[i] = op;
44900 }
44901 else if (h[i])
44902 l[i] = h[i];
44903 }
44904
44905 gcc_assert (l[0] && l[1]);
44906 op = d->target;
44907 if (d->vmode != V32QImode)
44908 op = gen_reg_rtx (V32QImode);
44909 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44910 if (op != d->target)
44911 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44912 return true;
44913 }
44914
44915 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44916 With all of the interface bits taken care of, perform the expansion
44917 in D and return true on success. */
44918
44919 static bool
44920 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44921 {
44922 /* Try a single instruction expansion. */
44923 if (expand_vec_perm_1 (d))
44924 return true;
44925
44926 /* Try sequences of two instructions. */
44927
44928 if (expand_vec_perm_pshuflw_pshufhw (d))
44929 return true;
44930
44931 if (expand_vec_perm_palignr (d, false))
44932 return true;
44933
44934 if (expand_vec_perm_interleave2 (d))
44935 return true;
44936
44937 if (expand_vec_perm_broadcast (d))
44938 return true;
44939
44940 if (expand_vec_perm_vpermq_perm_1 (d))
44941 return true;
44942
44943 if (expand_vec_perm_vperm2f128 (d))
44944 return true;
44945
44946 if (expand_vec_perm_pblendv (d))
44947 return true;
44948
44949 /* Try sequences of three instructions. */
44950
44951 if (expand_vec_perm_2vperm2f128_vshuf (d))
44952 return true;
44953
44954 if (expand_vec_perm_pshufb2 (d))
44955 return true;
44956
44957 if (expand_vec_perm_interleave3 (d))
44958 return true;
44959
44960 if (expand_vec_perm_vperm2f128_vblend (d))
44961 return true;
44962
44963 /* Try sequences of four instructions. */
44964
44965 if (expand_vec_perm_vpshufb2_vpermq (d))
44966 return true;
44967
44968 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44969 return true;
44970
44971 /* ??? Look for narrow permutations whose element orderings would
44972 allow the promotion to a wider mode. */
44973
44974 /* ??? Look for sequences of interleave or a wider permute that place
44975 the data into the correct lanes for a half-vector shuffle like
44976 pshuf[lh]w or vpermilps. */
44977
44978 /* ??? Look for sequences of interleave that produce the desired results.
44979 The combinatorics of punpck[lh] get pretty ugly... */
44980
44981 if (expand_vec_perm_even_odd (d))
44982 return true;
44983
44984 /* Even longer sequences. */
44985 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44986 return true;
44987
44988 return false;
44989 }
44990
44991 /* If a permutation only uses one operand, make it clear. Returns true
44992 if the permutation references both operands. */
44993
44994 static bool
44995 canonicalize_perm (struct expand_vec_perm_d *d)
44996 {
44997 int i, which, nelt = d->nelt;
44998
44999 for (i = which = 0; i < nelt; ++i)
45000 which |= (d->perm[i] < nelt ? 1 : 2);
45001
45002 d->one_operand_p = true;
45003 switch (which)
45004 {
45005 default:
45006 gcc_unreachable();
45007
45008 case 3:
45009 if (!rtx_equal_p (d->op0, d->op1))
45010 {
45011 d->one_operand_p = false;
45012 break;
45013 }
45014 /* The elements of PERM do not suggest that only the first operand
45015 is used, but both operands are identical. Allow easier matching
45016 of the permutation by folding the permutation into the single
45017 input vector. */
45018 /* FALLTHRU */
45019
45020 case 2:
45021 for (i = 0; i < nelt; ++i)
45022 d->perm[i] &= nelt - 1;
45023 d->op0 = d->op1;
45024 break;
45025
45026 case 1:
45027 d->op1 = d->op0;
45028 break;
45029 }
45030
45031 return (which == 3);
45032 }
45033
45034 bool
45035 ix86_expand_vec_perm_const (rtx operands[4])
45036 {
45037 struct expand_vec_perm_d d;
45038 unsigned char perm[MAX_VECT_LEN];
45039 int i, nelt;
45040 bool two_args;
45041 rtx sel;
45042
45043 d.target = operands[0];
45044 d.op0 = operands[1];
45045 d.op1 = operands[2];
45046 sel = operands[3];
45047
45048 d.vmode = GET_MODE (d.target);
45049 gcc_assert (VECTOR_MODE_P (d.vmode));
45050 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45051 d.testing_p = false;
45052
45053 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
45054 gcc_assert (XVECLEN (sel, 0) == nelt);
45055 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
45056
45057 for (i = 0; i < nelt; ++i)
45058 {
45059 rtx e = XVECEXP (sel, 0, i);
45060 int ei = INTVAL (e) & (2 * nelt - 1);
45061 d.perm[i] = ei;
45062 perm[i] = ei;
45063 }
45064
45065 two_args = canonicalize_perm (&d);
45066
45067 if (ix86_expand_vec_perm_const_1 (&d))
45068 return true;
45069
45070 /* If the selector says both arguments are needed, but the operands are the
45071 same, the above tried to expand with one_operand_p and flattened selector.
45072 If that didn't work, retry without one_operand_p; we succeeded with that
45073 during testing. */
45074 if (two_args && d.one_operand_p)
45075 {
45076 d.one_operand_p = false;
45077 memcpy (d.perm, perm, sizeof (perm));
45078 return ix86_expand_vec_perm_const_1 (&d);
45079 }
45080
45081 return false;
45082 }
45083
45084 /* Implement targetm.vectorize.vec_perm_const_ok. */
45085
45086 static bool
45087 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
45088 const unsigned char *sel)
45089 {
45090 struct expand_vec_perm_d d;
45091 unsigned int i, nelt, which;
45092 bool ret;
45093
45094 d.vmode = vmode;
45095 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45096 d.testing_p = true;
45097
45098 /* Given sufficient ISA support we can just return true here
45099 for selected vector modes. */
45100 if (d.vmode == V16SImode || d.vmode == V16SFmode
45101 || d.vmode == V8DFmode || d.vmode == V8DImode)
45102 /* All implementable with a single vpermi2 insn. */
45103 return true;
45104 if (GET_MODE_SIZE (d.vmode) == 16)
45105 {
45106 /* All implementable with a single vpperm insn. */
45107 if (TARGET_XOP)
45108 return true;
45109 /* All implementable with 2 pshufb + 1 ior. */
45110 if (TARGET_SSSE3)
45111 return true;
45112 /* All implementable with shufpd or unpck[lh]pd. */
45113 if (d.nelt == 2)
45114 return true;
45115 }
45116
45117 /* Extract the values from the vector CST into the permutation
45118 array in D. */
45119 memcpy (d.perm, sel, nelt);
45120 for (i = which = 0; i < nelt; ++i)
45121 {
45122 unsigned char e = d.perm[i];
45123 gcc_assert (e < 2 * nelt);
45124 which |= (e < nelt ? 1 : 2);
45125 }
45126
45127 /* For all elements from second vector, fold the elements to first. */
45128 if (which == 2)
45129 for (i = 0; i < nelt; ++i)
45130 d.perm[i] -= nelt;
45131
45132 /* Check whether the mask can be applied to the vector type. */
45133 d.one_operand_p = (which != 3);
45134
45135 /* Implementable with shufps or pshufd. */
45136 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
45137 return true;
45138
45139 /* Otherwise we have to go through the motions and see if we can
45140 figure out how to generate the requested permutation. */
45141 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
45142 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
45143 if (!d.one_operand_p)
45144 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
45145
45146 start_sequence ();
45147 ret = ix86_expand_vec_perm_const_1 (&d);
45148 end_sequence ();
45149
45150 return ret;
45151 }
45152
45153 void
45154 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
45155 {
45156 struct expand_vec_perm_d d;
45157 unsigned i, nelt;
45158
45159 d.target = targ;
45160 d.op0 = op0;
45161 d.op1 = op1;
45162 d.vmode = GET_MODE (targ);
45163 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45164 d.one_operand_p = false;
45165 d.testing_p = false;
45166
45167 for (i = 0; i < nelt; ++i)
45168 d.perm[i] = i * 2 + odd;
45169
45170 /* We'll either be able to implement the permutation directly... */
45171 if (expand_vec_perm_1 (&d))
45172 return;
45173
45174 /* ... or we use the special-case patterns. */
45175 expand_vec_perm_even_odd_1 (&d, odd);
45176 }
45177
45178 static void
45179 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
45180 {
45181 struct expand_vec_perm_d d;
45182 unsigned i, nelt, base;
45183 bool ok;
45184
45185 d.target = targ;
45186 d.op0 = op0;
45187 d.op1 = op1;
45188 d.vmode = GET_MODE (targ);
45189 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
45190 d.one_operand_p = false;
45191 d.testing_p = false;
45192
45193 base = high_p ? nelt / 2 : 0;
45194 for (i = 0; i < nelt / 2; ++i)
45195 {
45196 d.perm[i * 2] = i + base;
45197 d.perm[i * 2 + 1] = i + base + nelt;
45198 }
45199
45200 /* Note that for AVX this isn't one instruction. */
45201 ok = ix86_expand_vec_perm_const_1 (&d);
45202 gcc_assert (ok);
45203 }
45204
45205
45206 /* Expand a vector operation CODE for a V*QImode in terms of the
45207 same operation on V*HImode. */
45208
45209 void
45210 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
45211 {
45212 enum machine_mode qimode = GET_MODE (dest);
45213 enum machine_mode himode;
45214 rtx (*gen_il) (rtx, rtx, rtx);
45215 rtx (*gen_ih) (rtx, rtx, rtx);
45216 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
45217 struct expand_vec_perm_d d;
45218 bool ok, full_interleave;
45219 bool uns_p = false;
45220 int i;
45221
45222 switch (qimode)
45223 {
45224 case V16QImode:
45225 himode = V8HImode;
45226 gen_il = gen_vec_interleave_lowv16qi;
45227 gen_ih = gen_vec_interleave_highv16qi;
45228 break;
45229 case V32QImode:
45230 himode = V16HImode;
45231 gen_il = gen_avx2_interleave_lowv32qi;
45232 gen_ih = gen_avx2_interleave_highv32qi;
45233 break;
45234 default:
45235 gcc_unreachable ();
45236 }
45237
45238 op2_l = op2_h = op2;
45239 switch (code)
45240 {
45241 case MULT:
45242 /* Unpack data such that we've got a source byte in each low byte of
45243 each word. We don't care what goes into the high byte of each word.
45244 Rather than trying to get zero in there, most convenient is to let
45245 it be a copy of the low byte. */
45246 op2_l = gen_reg_rtx (qimode);
45247 op2_h = gen_reg_rtx (qimode);
45248 emit_insn (gen_il (op2_l, op2, op2));
45249 emit_insn (gen_ih (op2_h, op2, op2));
45250 /* FALLTHRU */
45251
45252 op1_l = gen_reg_rtx (qimode);
45253 op1_h = gen_reg_rtx (qimode);
45254 emit_insn (gen_il (op1_l, op1, op1));
45255 emit_insn (gen_ih (op1_h, op1, op1));
45256 full_interleave = qimode == V16QImode;
45257 break;
45258
45259 case ASHIFT:
45260 case LSHIFTRT:
45261 uns_p = true;
45262 /* FALLTHRU */
45263 case ASHIFTRT:
45264 op1_l = gen_reg_rtx (himode);
45265 op1_h = gen_reg_rtx (himode);
45266 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
45267 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
45268 full_interleave = true;
45269 break;
45270 default:
45271 gcc_unreachable ();
45272 }
45273
45274 /* Perform the operation. */
45275 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
45276 1, OPTAB_DIRECT);
45277 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
45278 1, OPTAB_DIRECT);
45279 gcc_assert (res_l && res_h);
45280
45281 /* Merge the data back into the right place. */
45282 d.target = dest;
45283 d.op0 = gen_lowpart (qimode, res_l);
45284 d.op1 = gen_lowpart (qimode, res_h);
45285 d.vmode = qimode;
45286 d.nelt = GET_MODE_NUNITS (qimode);
45287 d.one_operand_p = false;
45288 d.testing_p = false;
45289
45290 if (full_interleave)
45291 {
45292 /* For SSE2, we used an full interleave, so the desired
45293 results are in the even elements. */
45294 for (i = 0; i < 32; ++i)
45295 d.perm[i] = i * 2;
45296 }
45297 else
45298 {
45299 /* For AVX, the interleave used above was not cross-lane. So the
45300 extraction is evens but with the second and third quarter swapped.
45301 Happily, that is even one insn shorter than even extraction. */
45302 for (i = 0; i < 32; ++i)
45303 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
45304 }
45305
45306 ok = ix86_expand_vec_perm_const_1 (&d);
45307 gcc_assert (ok);
45308
45309 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45310 gen_rtx_fmt_ee (code, qimode, op1, op2));
45311 }
45312
45313 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
45314 if op is CONST_VECTOR with all odd elements equal to their
45315 preceding element. */
45316
45317 static bool
45318 const_vector_equal_evenodd_p (rtx op)
45319 {
45320 enum machine_mode mode = GET_MODE (op);
45321 int i, nunits = GET_MODE_NUNITS (mode);
45322 if (GET_CODE (op) != CONST_VECTOR
45323 || nunits != CONST_VECTOR_NUNITS (op))
45324 return false;
45325 for (i = 0; i < nunits; i += 2)
45326 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
45327 return false;
45328 return true;
45329 }
45330
45331 void
45332 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
45333 bool uns_p, bool odd_p)
45334 {
45335 enum machine_mode mode = GET_MODE (op1);
45336 enum machine_mode wmode = GET_MODE (dest);
45337 rtx x;
45338 rtx orig_op1 = op1, orig_op2 = op2;
45339
45340 if (!nonimmediate_operand (op1, mode))
45341 op1 = force_reg (mode, op1);
45342 if (!nonimmediate_operand (op2, mode))
45343 op2 = force_reg (mode, op2);
45344
45345 /* We only play even/odd games with vectors of SImode. */
45346 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
45347
45348 /* If we're looking for the odd results, shift those members down to
45349 the even slots. For some cpus this is faster than a PSHUFD. */
45350 if (odd_p)
45351 {
45352 /* For XOP use vpmacsdqh, but only for smult, as it is only
45353 signed. */
45354 if (TARGET_XOP && mode == V4SImode && !uns_p)
45355 {
45356 x = force_reg (wmode, CONST0_RTX (wmode));
45357 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
45358 return;
45359 }
45360
45361 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
45362 if (!const_vector_equal_evenodd_p (orig_op1))
45363 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
45364 x, NULL, 1, OPTAB_DIRECT);
45365 if (!const_vector_equal_evenodd_p (orig_op2))
45366 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
45367 x, NULL, 1, OPTAB_DIRECT);
45368 op1 = gen_lowpart (mode, op1);
45369 op2 = gen_lowpart (mode, op2);
45370 }
45371
45372 if (mode == V16SImode)
45373 {
45374 if (uns_p)
45375 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
45376 else
45377 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
45378 }
45379 else if (mode == V8SImode)
45380 {
45381 if (uns_p)
45382 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
45383 else
45384 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
45385 }
45386 else if (uns_p)
45387 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
45388 else if (TARGET_SSE4_1)
45389 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
45390 else
45391 {
45392 rtx s1, s2, t0, t1, t2;
45393
45394 /* The easiest way to implement this without PMULDQ is to go through
45395 the motions as if we are performing a full 64-bit multiply. With
45396 the exception that we need to do less shuffling of the elements. */
45397
45398 /* Compute the sign-extension, aka highparts, of the two operands. */
45399 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45400 op1, pc_rtx, pc_rtx);
45401 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
45402 op2, pc_rtx, pc_rtx);
45403
45404 /* Multiply LO(A) * HI(B), and vice-versa. */
45405 t1 = gen_reg_rtx (wmode);
45406 t2 = gen_reg_rtx (wmode);
45407 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
45408 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
45409
45410 /* Multiply LO(A) * LO(B). */
45411 t0 = gen_reg_rtx (wmode);
45412 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
45413
45414 /* Combine and shift the highparts into place. */
45415 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
45416 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
45417 1, OPTAB_DIRECT);
45418
45419 /* Combine high and low parts. */
45420 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
45421 return;
45422 }
45423 emit_insn (x);
45424 }
45425
45426 void
45427 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
45428 bool uns_p, bool high_p)
45429 {
45430 enum machine_mode wmode = GET_MODE (dest);
45431 enum machine_mode mode = GET_MODE (op1);
45432 rtx t1, t2, t3, t4, mask;
45433
45434 switch (mode)
45435 {
45436 case V4SImode:
45437 t1 = gen_reg_rtx (mode);
45438 t2 = gen_reg_rtx (mode);
45439 if (TARGET_XOP && !uns_p)
45440 {
45441 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
45442 shuffle the elements once so that all elements are in the right
45443 place for immediate use: { A C B D }. */
45444 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
45445 const1_rtx, GEN_INT (3)));
45446 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
45447 const1_rtx, GEN_INT (3)));
45448 }
45449 else
45450 {
45451 /* Put the elements into place for the multiply. */
45452 ix86_expand_vec_interleave (t1, op1, op1, high_p);
45453 ix86_expand_vec_interleave (t2, op2, op2, high_p);
45454 high_p = false;
45455 }
45456 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
45457 break;
45458
45459 case V8SImode:
45460 /* Shuffle the elements between the lanes. After this we
45461 have { A B E F | C D G H } for each operand. */
45462 t1 = gen_reg_rtx (V4DImode);
45463 t2 = gen_reg_rtx (V4DImode);
45464 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
45465 const0_rtx, const2_rtx,
45466 const1_rtx, GEN_INT (3)));
45467 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
45468 const0_rtx, const2_rtx,
45469 const1_rtx, GEN_INT (3)));
45470
45471 /* Shuffle the elements within the lanes. After this we
45472 have { A A B B | C C D D } or { E E F F | G G H H }. */
45473 t3 = gen_reg_rtx (V8SImode);
45474 t4 = gen_reg_rtx (V8SImode);
45475 mask = GEN_INT (high_p
45476 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45477 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45478 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45479 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45480
45481 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45482 break;
45483
45484 case V8HImode:
45485 case V16HImode:
45486 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45487 uns_p, OPTAB_DIRECT);
45488 t2 = expand_binop (mode,
45489 uns_p ? umul_highpart_optab : smul_highpart_optab,
45490 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45491 gcc_assert (t1 && t2);
45492
45493 t3 = gen_reg_rtx (mode);
45494 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45495 emit_move_insn (dest, gen_lowpart (wmode, t3));
45496 break;
45497
45498 case V16QImode:
45499 case V32QImode:
45500 t1 = gen_reg_rtx (wmode);
45501 t2 = gen_reg_rtx (wmode);
45502 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45503 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45504
45505 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45506 break;
45507
45508 default:
45509 gcc_unreachable ();
45510 }
45511 }
45512
45513 void
45514 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45515 {
45516 rtx res_1, res_2, res_3, res_4;
45517
45518 res_1 = gen_reg_rtx (V4SImode);
45519 res_2 = gen_reg_rtx (V4SImode);
45520 res_3 = gen_reg_rtx (V2DImode);
45521 res_4 = gen_reg_rtx (V2DImode);
45522 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45523 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45524
45525 /* Move the results in element 2 down to element 1; we don't care
45526 what goes in elements 2 and 3. Then we can merge the parts
45527 back together with an interleave.
45528
45529 Note that two other sequences were tried:
45530 (1) Use interleaves at the start instead of psrldq, which allows
45531 us to use a single shufps to merge things back at the end.
45532 (2) Use shufps here to combine the two vectors, then pshufd to
45533 put the elements in the correct order.
45534 In both cases the cost of the reformatting stall was too high
45535 and the overall sequence slower. */
45536
45537 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45538 const0_rtx, const2_rtx,
45539 const0_rtx, const0_rtx));
45540 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45541 const0_rtx, const2_rtx,
45542 const0_rtx, const0_rtx));
45543 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45544
45545 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45546 }
45547
45548 void
45549 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45550 {
45551 enum machine_mode mode = GET_MODE (op0);
45552 rtx t1, t2, t3, t4, t5, t6;
45553
45554 if (TARGET_XOP && mode == V2DImode)
45555 {
45556 /* op1: A,B,C,D, op2: E,F,G,H */
45557 op1 = gen_lowpart (V4SImode, op1);
45558 op2 = gen_lowpart (V4SImode, op2);
45559
45560 t1 = gen_reg_rtx (V4SImode);
45561 t2 = gen_reg_rtx (V4SImode);
45562 t3 = gen_reg_rtx (V2DImode);
45563 t4 = gen_reg_rtx (V2DImode);
45564
45565 /* t1: B,A,D,C */
45566 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45567 GEN_INT (1),
45568 GEN_INT (0),
45569 GEN_INT (3),
45570 GEN_INT (2)));
45571
45572 /* t2: (B*E),(A*F),(D*G),(C*H) */
45573 emit_insn (gen_mulv4si3 (t2, t1, op2));
45574
45575 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45576 emit_insn (gen_xop_phadddq (t3, t2));
45577
45578 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45579 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45580
45581 /* Multiply lower parts and add all */
45582 t5 = gen_reg_rtx (V2DImode);
45583 emit_insn (gen_vec_widen_umult_even_v4si (t5,
45584 gen_lowpart (V4SImode, op1),
45585 gen_lowpart (V4SImode, op2)));
45586 op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
45587
45588 }
45589 else
45590 {
45591 enum machine_mode nmode;
45592 rtx (*umul) (rtx, rtx, rtx);
45593
45594 if (mode == V2DImode)
45595 {
45596 umul = gen_vec_widen_umult_even_v4si;
45597 nmode = V4SImode;
45598 }
45599 else if (mode == V4DImode)
45600 {
45601 umul = gen_vec_widen_umult_even_v8si;
45602 nmode = V8SImode;
45603 }
45604 else if (mode == V8DImode)
45605 {
45606 umul = gen_vec_widen_umult_even_v16si;
45607 nmode = V16SImode;
45608 }
45609 else
45610 gcc_unreachable ();
45611
45612
45613 /* Multiply low parts. */
45614 t1 = gen_reg_rtx (mode);
45615 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45616
45617 /* Shift input vectors right 32 bits so we can multiply high parts. */
45618 t6 = GEN_INT (32);
45619 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45620 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45621
45622 /* Multiply high parts by low parts. */
45623 t4 = gen_reg_rtx (mode);
45624 t5 = gen_reg_rtx (mode);
45625 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45626 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45627
45628 /* Combine and shift the highparts back. */
45629 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45630 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45631
45632 /* Combine high and low parts. */
45633 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45634 }
45635
45636 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45637 gen_rtx_MULT (mode, op1, op2));
45638 }
45639
45640 /* Calculate integer abs() using only SSE2 instructions. */
45641
45642 void
45643 ix86_expand_sse2_abs (rtx target, rtx input)
45644 {
45645 enum machine_mode mode = GET_MODE (target);
45646 rtx tmp0, tmp1, x;
45647
45648 switch (mode)
45649 {
45650 /* For 32-bit signed integer X, the best way to calculate the absolute
45651 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45652 case V4SImode:
45653 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45654 GEN_INT (GET_MODE_BITSIZE
45655 (GET_MODE_INNER (mode)) - 1),
45656 NULL, 0, OPTAB_DIRECT);
45657 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45658 NULL, 0, OPTAB_DIRECT);
45659 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45660 target, 0, OPTAB_DIRECT);
45661 break;
45662
45663 /* For 16-bit signed integer X, the best way to calculate the absolute
45664 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45665 case V8HImode:
45666 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45667
45668 x = expand_simple_binop (mode, SMAX, tmp0, input,
45669 target, 0, OPTAB_DIRECT);
45670 break;
45671
45672 /* For 8-bit signed integer X, the best way to calculate the absolute
45673 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45674 as SSE2 provides the PMINUB insn. */
45675 case V16QImode:
45676 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45677
45678 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45679 target, 0, OPTAB_DIRECT);
45680 break;
45681
45682 default:
45683 gcc_unreachable ();
45684 }
45685
45686 if (x != target)
45687 emit_move_insn (target, x);
45688 }
45689
45690 /* Expand an insert into a vector register through pinsr insn.
45691 Return true if successful. */
45692
45693 bool
45694 ix86_expand_pinsr (rtx *operands)
45695 {
45696 rtx dst = operands[0];
45697 rtx src = operands[3];
45698
45699 unsigned int size = INTVAL (operands[1]);
45700 unsigned int pos = INTVAL (operands[2]);
45701
45702 if (GET_CODE (dst) == SUBREG)
45703 {
45704 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45705 dst = SUBREG_REG (dst);
45706 }
45707
45708 if (GET_CODE (src) == SUBREG)
45709 src = SUBREG_REG (src);
45710
45711 switch (GET_MODE (dst))
45712 {
45713 case V16QImode:
45714 case V8HImode:
45715 case V4SImode:
45716 case V2DImode:
45717 {
45718 enum machine_mode srcmode, dstmode;
45719 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45720
45721 srcmode = mode_for_size (size, MODE_INT, 0);
45722
45723 switch (srcmode)
45724 {
45725 case QImode:
45726 if (!TARGET_SSE4_1)
45727 return false;
45728 dstmode = V16QImode;
45729 pinsr = gen_sse4_1_pinsrb;
45730 break;
45731
45732 case HImode:
45733 if (!TARGET_SSE2)
45734 return false;
45735 dstmode = V8HImode;
45736 pinsr = gen_sse2_pinsrw;
45737 break;
45738
45739 case SImode:
45740 if (!TARGET_SSE4_1)
45741 return false;
45742 dstmode = V4SImode;
45743 pinsr = gen_sse4_1_pinsrd;
45744 break;
45745
45746 case DImode:
45747 gcc_assert (TARGET_64BIT);
45748 if (!TARGET_SSE4_1)
45749 return false;
45750 dstmode = V2DImode;
45751 pinsr = gen_sse4_1_pinsrq;
45752 break;
45753
45754 default:
45755 return false;
45756 }
45757
45758 rtx d = dst;
45759 if (GET_MODE (dst) != dstmode)
45760 d = gen_reg_rtx (dstmode);
45761 src = gen_lowpart (srcmode, src);
45762
45763 pos /= size;
45764
45765 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45766 GEN_INT (1 << pos)));
45767 if (d != dst)
45768 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45769 return true;
45770 }
45771
45772 default:
45773 return false;
45774 }
45775 }
45776 \f
45777 /* This function returns the calling abi specific va_list type node.
45778 It returns the FNDECL specific va_list type. */
45779
45780 static tree
45781 ix86_fn_abi_va_list (tree fndecl)
45782 {
45783 if (!TARGET_64BIT)
45784 return va_list_type_node;
45785 gcc_assert (fndecl != NULL_TREE);
45786
45787 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45788 return ms_va_list_type_node;
45789 else
45790 return sysv_va_list_type_node;
45791 }
45792
45793 /* Returns the canonical va_list type specified by TYPE. If there
45794 is no valid TYPE provided, it return NULL_TREE. */
45795
45796 static tree
45797 ix86_canonical_va_list_type (tree type)
45798 {
45799 tree wtype, htype;
45800
45801 /* Resolve references and pointers to va_list type. */
45802 if (TREE_CODE (type) == MEM_REF)
45803 type = TREE_TYPE (type);
45804 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45805 type = TREE_TYPE (type);
45806 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45807 type = TREE_TYPE (type);
45808
45809 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45810 {
45811 wtype = va_list_type_node;
45812 gcc_assert (wtype != NULL_TREE);
45813 htype = type;
45814 if (TREE_CODE (wtype) == ARRAY_TYPE)
45815 {
45816 /* If va_list is an array type, the argument may have decayed
45817 to a pointer type, e.g. by being passed to another function.
45818 In that case, unwrap both types so that we can compare the
45819 underlying records. */
45820 if (TREE_CODE (htype) == ARRAY_TYPE
45821 || POINTER_TYPE_P (htype))
45822 {
45823 wtype = TREE_TYPE (wtype);
45824 htype = TREE_TYPE (htype);
45825 }
45826 }
45827 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45828 return va_list_type_node;
45829 wtype = sysv_va_list_type_node;
45830 gcc_assert (wtype != NULL_TREE);
45831 htype = type;
45832 if (TREE_CODE (wtype) == ARRAY_TYPE)
45833 {
45834 /* If va_list is an array type, the argument may have decayed
45835 to a pointer type, e.g. by being passed to another function.
45836 In that case, unwrap both types so that we can compare the
45837 underlying records. */
45838 if (TREE_CODE (htype) == ARRAY_TYPE
45839 || POINTER_TYPE_P (htype))
45840 {
45841 wtype = TREE_TYPE (wtype);
45842 htype = TREE_TYPE (htype);
45843 }
45844 }
45845 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45846 return sysv_va_list_type_node;
45847 wtype = ms_va_list_type_node;
45848 gcc_assert (wtype != NULL_TREE);
45849 htype = type;
45850 if (TREE_CODE (wtype) == ARRAY_TYPE)
45851 {
45852 /* If va_list is an array type, the argument may have decayed
45853 to a pointer type, e.g. by being passed to another function.
45854 In that case, unwrap both types so that we can compare the
45855 underlying records. */
45856 if (TREE_CODE (htype) == ARRAY_TYPE
45857 || POINTER_TYPE_P (htype))
45858 {
45859 wtype = TREE_TYPE (wtype);
45860 htype = TREE_TYPE (htype);
45861 }
45862 }
45863 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45864 return ms_va_list_type_node;
45865 return NULL_TREE;
45866 }
45867 return std_canonical_va_list_type (type);
45868 }
45869
45870 /* Iterate through the target-specific builtin types for va_list.
45871 IDX denotes the iterator, *PTREE is set to the result type of
45872 the va_list builtin, and *PNAME to its internal type.
45873 Returns zero if there is no element for this index, otherwise
45874 IDX should be increased upon the next call.
45875 Note, do not iterate a base builtin's name like __builtin_va_list.
45876 Used from c_common_nodes_and_builtins. */
45877
45878 static int
45879 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45880 {
45881 if (TARGET_64BIT)
45882 {
45883 switch (idx)
45884 {
45885 default:
45886 break;
45887
45888 case 0:
45889 *ptree = ms_va_list_type_node;
45890 *pname = "__builtin_ms_va_list";
45891 return 1;
45892
45893 case 1:
45894 *ptree = sysv_va_list_type_node;
45895 *pname = "__builtin_sysv_va_list";
45896 return 1;
45897 }
45898 }
45899
45900 return 0;
45901 }
45902
45903 #undef TARGET_SCHED_DISPATCH
45904 #define TARGET_SCHED_DISPATCH has_dispatch
45905 #undef TARGET_SCHED_DISPATCH_DO
45906 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45907 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45908 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45909 #undef TARGET_SCHED_REORDER
45910 #define TARGET_SCHED_REORDER ix86_sched_reorder
45911 #undef TARGET_SCHED_ADJUST_PRIORITY
45912 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45913 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45914 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45915 ix86_dependencies_evaluation_hook
45916
45917 /* The size of the dispatch window is the total number of bytes of
45918 object code allowed in a window. */
45919 #define DISPATCH_WINDOW_SIZE 16
45920
45921 /* Number of dispatch windows considered for scheduling. */
45922 #define MAX_DISPATCH_WINDOWS 3
45923
45924 /* Maximum number of instructions in a window. */
45925 #define MAX_INSN 4
45926
45927 /* Maximum number of immediate operands in a window. */
45928 #define MAX_IMM 4
45929
45930 /* Maximum number of immediate bits allowed in a window. */
45931 #define MAX_IMM_SIZE 128
45932
45933 /* Maximum number of 32 bit immediates allowed in a window. */
45934 #define MAX_IMM_32 4
45935
45936 /* Maximum number of 64 bit immediates allowed in a window. */
45937 #define MAX_IMM_64 2
45938
45939 /* Maximum total of loads or prefetches allowed in a window. */
45940 #define MAX_LOAD 2
45941
45942 /* Maximum total of stores allowed in a window. */
45943 #define MAX_STORE 1
45944
45945 #undef BIG
45946 #define BIG 100
45947
45948
45949 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45950 enum dispatch_group {
45951 disp_no_group = 0,
45952 disp_load,
45953 disp_store,
45954 disp_load_store,
45955 disp_prefetch,
45956 disp_imm,
45957 disp_imm_32,
45958 disp_imm_64,
45959 disp_branch,
45960 disp_cmp,
45961 disp_jcc,
45962 disp_last
45963 };
45964
45965 /* Number of allowable groups in a dispatch window. It is an array
45966 indexed by dispatch_group enum. 100 is used as a big number,
45967 because the number of these kind of operations does not have any
45968 effect in dispatch window, but we need them for other reasons in
45969 the table. */
45970 static unsigned int num_allowable_groups[disp_last] = {
45971 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45972 };
45973
45974 char group_name[disp_last + 1][16] = {
45975 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45976 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45977 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45978 };
45979
45980 /* Instruction path. */
45981 enum insn_path {
45982 no_path = 0,
45983 path_single, /* Single micro op. */
45984 path_double, /* Double micro op. */
45985 path_multi, /* Instructions with more than 2 micro op.. */
45986 last_path
45987 };
45988
45989 /* sched_insn_info defines a window to the instructions scheduled in
45990 the basic block. It contains a pointer to the insn_info table and
45991 the instruction scheduled.
45992
45993 Windows are allocated for each basic block and are linked
45994 together. */
45995 typedef struct sched_insn_info_s {
45996 rtx insn;
45997 enum dispatch_group group;
45998 enum insn_path path;
45999 int byte_len;
46000 int imm_bytes;
46001 } sched_insn_info;
46002
46003 /* Linked list of dispatch windows. This is a two way list of
46004 dispatch windows of a basic block. It contains information about
46005 the number of uops in the window and the total number of
46006 instructions and of bytes in the object code for this dispatch
46007 window. */
46008 typedef struct dispatch_windows_s {
46009 int num_insn; /* Number of insn in the window. */
46010 int num_uops; /* Number of uops in the window. */
46011 int window_size; /* Number of bytes in the window. */
46012 int window_num; /* Window number between 0 or 1. */
46013 int num_imm; /* Number of immediates in an insn. */
46014 int num_imm_32; /* Number of 32 bit immediates in an insn. */
46015 int num_imm_64; /* Number of 64 bit immediates in an insn. */
46016 int imm_size; /* Total immediates in the window. */
46017 int num_loads; /* Total memory loads in the window. */
46018 int num_stores; /* Total memory stores in the window. */
46019 int violation; /* Violation exists in window. */
46020 sched_insn_info *window; /* Pointer to the window. */
46021 struct dispatch_windows_s *next;
46022 struct dispatch_windows_s *prev;
46023 } dispatch_windows;
46024
46025 /* Immediate valuse used in an insn. */
46026 typedef struct imm_info_s
46027 {
46028 int imm;
46029 int imm32;
46030 int imm64;
46031 } imm_info;
46032
46033 static dispatch_windows *dispatch_window_list;
46034 static dispatch_windows *dispatch_window_list1;
46035
46036 /* Get dispatch group of insn. */
46037
46038 static enum dispatch_group
46039 get_mem_group (rtx_insn *insn)
46040 {
46041 enum attr_memory memory;
46042
46043 if (INSN_CODE (insn) < 0)
46044 return disp_no_group;
46045 memory = get_attr_memory (insn);
46046 if (memory == MEMORY_STORE)
46047 return disp_store;
46048
46049 if (memory == MEMORY_LOAD)
46050 return disp_load;
46051
46052 if (memory == MEMORY_BOTH)
46053 return disp_load_store;
46054
46055 return disp_no_group;
46056 }
46057
46058 /* Return true if insn is a compare instruction. */
46059
46060 static bool
46061 is_cmp (rtx_insn *insn)
46062 {
46063 enum attr_type type;
46064
46065 type = get_attr_type (insn);
46066 return (type == TYPE_TEST
46067 || type == TYPE_ICMP
46068 || type == TYPE_FCMP
46069 || GET_CODE (PATTERN (insn)) == COMPARE);
46070 }
46071
46072 /* Return true if a dispatch violation encountered. */
46073
46074 static bool
46075 dispatch_violation (void)
46076 {
46077 if (dispatch_window_list->next)
46078 return dispatch_window_list->next->violation;
46079 return dispatch_window_list->violation;
46080 }
46081
46082 /* Return true if insn is a branch instruction. */
46083
46084 static bool
46085 is_branch (rtx insn)
46086 {
46087 return (CALL_P (insn) || JUMP_P (insn));
46088 }
46089
46090 /* Return true if insn is a prefetch instruction. */
46091
46092 static bool
46093 is_prefetch (rtx insn)
46094 {
46095 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
46096 }
46097
46098 /* This function initializes a dispatch window and the list container holding a
46099 pointer to the window. */
46100
46101 static void
46102 init_window (int window_num)
46103 {
46104 int i;
46105 dispatch_windows *new_list;
46106
46107 if (window_num == 0)
46108 new_list = dispatch_window_list;
46109 else
46110 new_list = dispatch_window_list1;
46111
46112 new_list->num_insn = 0;
46113 new_list->num_uops = 0;
46114 new_list->window_size = 0;
46115 new_list->next = NULL;
46116 new_list->prev = NULL;
46117 new_list->window_num = window_num;
46118 new_list->num_imm = 0;
46119 new_list->num_imm_32 = 0;
46120 new_list->num_imm_64 = 0;
46121 new_list->imm_size = 0;
46122 new_list->num_loads = 0;
46123 new_list->num_stores = 0;
46124 new_list->violation = false;
46125
46126 for (i = 0; i < MAX_INSN; i++)
46127 {
46128 new_list->window[i].insn = NULL;
46129 new_list->window[i].group = disp_no_group;
46130 new_list->window[i].path = no_path;
46131 new_list->window[i].byte_len = 0;
46132 new_list->window[i].imm_bytes = 0;
46133 }
46134 return;
46135 }
46136
46137 /* This function allocates and initializes a dispatch window and the
46138 list container holding a pointer to the window. */
46139
46140 static dispatch_windows *
46141 allocate_window (void)
46142 {
46143 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
46144 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
46145
46146 return new_list;
46147 }
46148
46149 /* This routine initializes the dispatch scheduling information. It
46150 initiates building dispatch scheduler tables and constructs the
46151 first dispatch window. */
46152
46153 static void
46154 init_dispatch_sched (void)
46155 {
46156 /* Allocate a dispatch list and a window. */
46157 dispatch_window_list = allocate_window ();
46158 dispatch_window_list1 = allocate_window ();
46159 init_window (0);
46160 init_window (1);
46161 }
46162
46163 /* This function returns true if a branch is detected. End of a basic block
46164 does not have to be a branch, but here we assume only branches end a
46165 window. */
46166
46167 static bool
46168 is_end_basic_block (enum dispatch_group group)
46169 {
46170 return group == disp_branch;
46171 }
46172
46173 /* This function is called when the end of a window processing is reached. */
46174
46175 static void
46176 process_end_window (void)
46177 {
46178 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
46179 if (dispatch_window_list->next)
46180 {
46181 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
46182 gcc_assert (dispatch_window_list->window_size
46183 + dispatch_window_list1->window_size <= 48);
46184 init_window (1);
46185 }
46186 init_window (0);
46187 }
46188
46189 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
46190 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
46191 for 48 bytes of instructions. Note that these windows are not dispatch
46192 windows that their sizes are DISPATCH_WINDOW_SIZE. */
46193
46194 static dispatch_windows *
46195 allocate_next_window (int window_num)
46196 {
46197 if (window_num == 0)
46198 {
46199 if (dispatch_window_list->next)
46200 init_window (1);
46201 init_window (0);
46202 return dispatch_window_list;
46203 }
46204
46205 dispatch_window_list->next = dispatch_window_list1;
46206 dispatch_window_list1->prev = dispatch_window_list;
46207
46208 return dispatch_window_list1;
46209 }
46210
46211 /* Increment the number of immediate operands of an instruction. */
46212
46213 static int
46214 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
46215 {
46216 if (*in_rtx == 0)
46217 return 0;
46218
46219 switch ( GET_CODE (*in_rtx))
46220 {
46221 case CONST:
46222 case SYMBOL_REF:
46223 case CONST_INT:
46224 (imm_values->imm)++;
46225 if (x86_64_immediate_operand (*in_rtx, SImode))
46226 (imm_values->imm32)++;
46227 else
46228 (imm_values->imm64)++;
46229 break;
46230
46231 case CONST_DOUBLE:
46232 (imm_values->imm)++;
46233 (imm_values->imm64)++;
46234 break;
46235
46236 case CODE_LABEL:
46237 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
46238 {
46239 (imm_values->imm)++;
46240 (imm_values->imm32)++;
46241 }
46242 break;
46243
46244 default:
46245 break;
46246 }
46247
46248 return 0;
46249 }
46250
46251 /* Compute number of immediate operands of an instruction. */
46252
46253 static void
46254 find_constant (rtx in_rtx, imm_info *imm_values)
46255 {
46256 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
46257 (rtx_function) find_constant_1, (void *) imm_values);
46258 }
46259
46260 /* Return total size of immediate operands of an instruction along with number
46261 of corresponding immediate-operands. It initializes its parameters to zero
46262 befor calling FIND_CONSTANT.
46263 INSN is the input instruction. IMM is the total of immediates.
46264 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
46265 bit immediates. */
46266
46267 static int
46268 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
46269 {
46270 imm_info imm_values = {0, 0, 0};
46271
46272 find_constant (insn, &imm_values);
46273 *imm = imm_values.imm;
46274 *imm32 = imm_values.imm32;
46275 *imm64 = imm_values.imm64;
46276 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
46277 }
46278
46279 /* This function indicates if an operand of an instruction is an
46280 immediate. */
46281
46282 static bool
46283 has_immediate (rtx insn)
46284 {
46285 int num_imm_operand;
46286 int num_imm32_operand;
46287 int num_imm64_operand;
46288
46289 if (insn)
46290 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46291 &num_imm64_operand);
46292 return false;
46293 }
46294
46295 /* Return single or double path for instructions. */
46296
46297 static enum insn_path
46298 get_insn_path (rtx_insn *insn)
46299 {
46300 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
46301
46302 if ((int)path == 0)
46303 return path_single;
46304
46305 if ((int)path == 1)
46306 return path_double;
46307
46308 return path_multi;
46309 }
46310
46311 /* Return insn dispatch group. */
46312
46313 static enum dispatch_group
46314 get_insn_group (rtx_insn *insn)
46315 {
46316 enum dispatch_group group = get_mem_group (insn);
46317 if (group)
46318 return group;
46319
46320 if (is_branch (insn))
46321 return disp_branch;
46322
46323 if (is_cmp (insn))
46324 return disp_cmp;
46325
46326 if (has_immediate (insn))
46327 return disp_imm;
46328
46329 if (is_prefetch (insn))
46330 return disp_prefetch;
46331
46332 return disp_no_group;
46333 }
46334
46335 /* Count number of GROUP restricted instructions in a dispatch
46336 window WINDOW_LIST. */
46337
46338 static int
46339 count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
46340 {
46341 enum dispatch_group group = get_insn_group (insn);
46342 int imm_size;
46343 int num_imm_operand;
46344 int num_imm32_operand;
46345 int num_imm64_operand;
46346
46347 if (group == disp_no_group)
46348 return 0;
46349
46350 if (group == disp_imm)
46351 {
46352 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46353 &num_imm64_operand);
46354 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
46355 || num_imm_operand + window_list->num_imm > MAX_IMM
46356 || (num_imm32_operand > 0
46357 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
46358 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
46359 || (num_imm64_operand > 0
46360 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
46361 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
46362 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
46363 && num_imm64_operand > 0
46364 && ((window_list->num_imm_64 > 0
46365 && window_list->num_insn >= 2)
46366 || window_list->num_insn >= 3)))
46367 return BIG;
46368
46369 return 1;
46370 }
46371
46372 if ((group == disp_load_store
46373 && (window_list->num_loads >= MAX_LOAD
46374 || window_list->num_stores >= MAX_STORE))
46375 || ((group == disp_load
46376 || group == disp_prefetch)
46377 && window_list->num_loads >= MAX_LOAD)
46378 || (group == disp_store
46379 && window_list->num_stores >= MAX_STORE))
46380 return BIG;
46381
46382 return 1;
46383 }
46384
46385 /* This function returns true if insn satisfies dispatch rules on the
46386 last window scheduled. */
46387
46388 static bool
46389 fits_dispatch_window (rtx_insn *insn)
46390 {
46391 dispatch_windows *window_list = dispatch_window_list;
46392 dispatch_windows *window_list_next = dispatch_window_list->next;
46393 unsigned int num_restrict;
46394 enum dispatch_group group = get_insn_group (insn);
46395 enum insn_path path = get_insn_path (insn);
46396 int sum;
46397
46398 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
46399 instructions should be given the lowest priority in the
46400 scheduling process in Haifa scheduler to make sure they will be
46401 scheduled in the same dispatch window as the reference to them. */
46402 if (group == disp_jcc || group == disp_cmp)
46403 return false;
46404
46405 /* Check nonrestricted. */
46406 if (group == disp_no_group || group == disp_branch)
46407 return true;
46408
46409 /* Get last dispatch window. */
46410 if (window_list_next)
46411 window_list = window_list_next;
46412
46413 if (window_list->window_num == 1)
46414 {
46415 sum = window_list->prev->window_size + window_list->window_size;
46416
46417 if (sum == 32
46418 || (min_insn_size (insn) + sum) >= 48)
46419 /* Window 1 is full. Go for next window. */
46420 return true;
46421 }
46422
46423 num_restrict = count_num_restricted (insn, window_list);
46424
46425 if (num_restrict > num_allowable_groups[group])
46426 return false;
46427
46428 /* See if it fits in the first window. */
46429 if (window_list->window_num == 0)
46430 {
46431 /* The first widow should have only single and double path
46432 uops. */
46433 if (path == path_double
46434 && (window_list->num_uops + 2) > MAX_INSN)
46435 return false;
46436 else if (path != path_single)
46437 return false;
46438 }
46439 return true;
46440 }
46441
46442 /* Add an instruction INSN with NUM_UOPS micro-operations to the
46443 dispatch window WINDOW_LIST. */
46444
46445 static void
46446 add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
46447 {
46448 int byte_len = min_insn_size (insn);
46449 int num_insn = window_list->num_insn;
46450 int imm_size;
46451 sched_insn_info *window = window_list->window;
46452 enum dispatch_group group = get_insn_group (insn);
46453 enum insn_path path = get_insn_path (insn);
46454 int num_imm_operand;
46455 int num_imm32_operand;
46456 int num_imm64_operand;
46457
46458 if (!window_list->violation && group != disp_cmp
46459 && !fits_dispatch_window (insn))
46460 window_list->violation = true;
46461
46462 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46463 &num_imm64_operand);
46464
46465 /* Initialize window with new instruction. */
46466 window[num_insn].insn = insn;
46467 window[num_insn].byte_len = byte_len;
46468 window[num_insn].group = group;
46469 window[num_insn].path = path;
46470 window[num_insn].imm_bytes = imm_size;
46471
46472 window_list->window_size += byte_len;
46473 window_list->num_insn = num_insn + 1;
46474 window_list->num_uops = window_list->num_uops + num_uops;
46475 window_list->imm_size += imm_size;
46476 window_list->num_imm += num_imm_operand;
46477 window_list->num_imm_32 += num_imm32_operand;
46478 window_list->num_imm_64 += num_imm64_operand;
46479
46480 if (group == disp_store)
46481 window_list->num_stores += 1;
46482 else if (group == disp_load
46483 || group == disp_prefetch)
46484 window_list->num_loads += 1;
46485 else if (group == disp_load_store)
46486 {
46487 window_list->num_stores += 1;
46488 window_list->num_loads += 1;
46489 }
46490 }
46491
46492 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46493 If the total bytes of instructions or the number of instructions in
46494 the window exceed allowable, it allocates a new window. */
46495
46496 static void
46497 add_to_dispatch_window (rtx_insn *insn)
46498 {
46499 int byte_len;
46500 dispatch_windows *window_list;
46501 dispatch_windows *next_list;
46502 dispatch_windows *window0_list;
46503 enum insn_path path;
46504 enum dispatch_group insn_group;
46505 bool insn_fits;
46506 int num_insn;
46507 int num_uops;
46508 int window_num;
46509 int insn_num_uops;
46510 int sum;
46511
46512 if (INSN_CODE (insn) < 0)
46513 return;
46514
46515 byte_len = min_insn_size (insn);
46516 window_list = dispatch_window_list;
46517 next_list = window_list->next;
46518 path = get_insn_path (insn);
46519 insn_group = get_insn_group (insn);
46520
46521 /* Get the last dispatch window. */
46522 if (next_list)
46523 window_list = dispatch_window_list->next;
46524
46525 if (path == path_single)
46526 insn_num_uops = 1;
46527 else if (path == path_double)
46528 insn_num_uops = 2;
46529 else
46530 insn_num_uops = (int) path;
46531
46532 /* If current window is full, get a new window.
46533 Window number zero is full, if MAX_INSN uops are scheduled in it.
46534 Window number one is full, if window zero's bytes plus window
46535 one's bytes is 32, or if the bytes of the new instruction added
46536 to the total makes it greater than 48, or it has already MAX_INSN
46537 instructions in it. */
46538 num_insn = window_list->num_insn;
46539 num_uops = window_list->num_uops;
46540 window_num = window_list->window_num;
46541 insn_fits = fits_dispatch_window (insn);
46542
46543 if (num_insn >= MAX_INSN
46544 || num_uops + insn_num_uops > MAX_INSN
46545 || !(insn_fits))
46546 {
46547 window_num = ~window_num & 1;
46548 window_list = allocate_next_window (window_num);
46549 }
46550
46551 if (window_num == 0)
46552 {
46553 add_insn_window (insn, window_list, insn_num_uops);
46554 if (window_list->num_insn >= MAX_INSN
46555 && insn_group == disp_branch)
46556 {
46557 process_end_window ();
46558 return;
46559 }
46560 }
46561 else if (window_num == 1)
46562 {
46563 window0_list = window_list->prev;
46564 sum = window0_list->window_size + window_list->window_size;
46565 if (sum == 32
46566 || (byte_len + sum) >= 48)
46567 {
46568 process_end_window ();
46569 window_list = dispatch_window_list;
46570 }
46571
46572 add_insn_window (insn, window_list, insn_num_uops);
46573 }
46574 else
46575 gcc_unreachable ();
46576
46577 if (is_end_basic_block (insn_group))
46578 {
46579 /* End of basic block is reached do end-basic-block process. */
46580 process_end_window ();
46581 return;
46582 }
46583 }
46584
46585 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46586
46587 DEBUG_FUNCTION static void
46588 debug_dispatch_window_file (FILE *file, int window_num)
46589 {
46590 dispatch_windows *list;
46591 int i;
46592
46593 if (window_num == 0)
46594 list = dispatch_window_list;
46595 else
46596 list = dispatch_window_list1;
46597
46598 fprintf (file, "Window #%d:\n", list->window_num);
46599 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46600 list->num_insn, list->num_uops, list->window_size);
46601 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46602 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46603
46604 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46605 list->num_stores);
46606 fprintf (file, " insn info:\n");
46607
46608 for (i = 0; i < MAX_INSN; i++)
46609 {
46610 if (!list->window[i].insn)
46611 break;
46612 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46613 i, group_name[list->window[i].group],
46614 i, (void *)list->window[i].insn,
46615 i, list->window[i].path,
46616 i, list->window[i].byte_len,
46617 i, list->window[i].imm_bytes);
46618 }
46619 }
46620
46621 /* Print to stdout a dispatch window. */
46622
46623 DEBUG_FUNCTION void
46624 debug_dispatch_window (int window_num)
46625 {
46626 debug_dispatch_window_file (stdout, window_num);
46627 }
46628
46629 /* Print INSN dispatch information to FILE. */
46630
46631 DEBUG_FUNCTION static void
46632 debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
46633 {
46634 int byte_len;
46635 enum insn_path path;
46636 enum dispatch_group group;
46637 int imm_size;
46638 int num_imm_operand;
46639 int num_imm32_operand;
46640 int num_imm64_operand;
46641
46642 if (INSN_CODE (insn) < 0)
46643 return;
46644
46645 byte_len = min_insn_size (insn);
46646 path = get_insn_path (insn);
46647 group = get_insn_group (insn);
46648 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46649 &num_imm64_operand);
46650
46651 fprintf (file, " insn info:\n");
46652 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46653 group_name[group], path, byte_len);
46654 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46655 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46656 }
46657
46658 /* Print to STDERR the status of the ready list with respect to
46659 dispatch windows. */
46660
46661 DEBUG_FUNCTION void
46662 debug_ready_dispatch (void)
46663 {
46664 int i;
46665 int no_ready = number_in_ready ();
46666
46667 fprintf (stdout, "Number of ready: %d\n", no_ready);
46668
46669 for (i = 0; i < no_ready; i++)
46670 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46671 }
46672
46673 /* This routine is the driver of the dispatch scheduler. */
46674
46675 static void
46676 do_dispatch (rtx_insn *insn, int mode)
46677 {
46678 if (mode == DISPATCH_INIT)
46679 init_dispatch_sched ();
46680 else if (mode == ADD_TO_DISPATCH_WINDOW)
46681 add_to_dispatch_window (insn);
46682 }
46683
46684 /* Return TRUE if Dispatch Scheduling is supported. */
46685
46686 static bool
46687 has_dispatch (rtx_insn *insn, int action)
46688 {
46689 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46690 && flag_dispatch_scheduler)
46691 switch (action)
46692 {
46693 default:
46694 return false;
46695
46696 case IS_DISPATCH_ON:
46697 return true;
46698 break;
46699
46700 case IS_CMP:
46701 return is_cmp (insn);
46702
46703 case DISPATCH_VIOLATION:
46704 return dispatch_violation ();
46705
46706 case FITS_DISPATCH_WINDOW:
46707 return fits_dispatch_window (insn);
46708 }
46709
46710 return false;
46711 }
46712
46713 /* Implementation of reassociation_width target hook used by
46714 reassoc phase to identify parallelism level in reassociated
46715 tree. Statements tree_code is passed in OPC. Arguments type
46716 is passed in MODE.
46717
46718 Currently parallel reassociation is enabled for Atom
46719 processors only and we set reassociation width to be 2
46720 because Atom may issue up to 2 instructions per cycle.
46721
46722 Return value should be fixed if parallel reassociation is
46723 enabled for other processors. */
46724
46725 static int
46726 ix86_reassociation_width (unsigned int, enum machine_mode mode)
46727 {
46728 int res = 1;
46729
46730 /* Vector part. */
46731 if (VECTOR_MODE_P (mode))
46732 {
46733 if (TARGET_VECTOR_PARALLEL_EXECUTION)
46734 return 2;
46735 else
46736 return 1;
46737 }
46738
46739 /* Scalar part. */
46740 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46741 res = 2;
46742 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46743 res = 2;
46744
46745 return res;
46746 }
46747
46748 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46749 place emms and femms instructions. */
46750
46751 static enum machine_mode
46752 ix86_preferred_simd_mode (enum machine_mode mode)
46753 {
46754 if (!TARGET_SSE)
46755 return word_mode;
46756
46757 switch (mode)
46758 {
46759 case QImode:
46760 return TARGET_AVX512BW ? V64QImode :
46761 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46762 case HImode:
46763 return TARGET_AVX512BW ? V32HImode :
46764 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46765 case SImode:
46766 return TARGET_AVX512F ? V16SImode :
46767 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46768 case DImode:
46769 return TARGET_AVX512F ? V8DImode :
46770 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46771
46772 case SFmode:
46773 if (TARGET_AVX512F)
46774 return V16SFmode;
46775 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46776 return V8SFmode;
46777 else
46778 return V4SFmode;
46779
46780 case DFmode:
46781 if (!TARGET_VECTORIZE_DOUBLE)
46782 return word_mode;
46783 else if (TARGET_AVX512F)
46784 return V8DFmode;
46785 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46786 return V4DFmode;
46787 else if (TARGET_SSE2)
46788 return V2DFmode;
46789 /* FALLTHRU */
46790
46791 default:
46792 return word_mode;
46793 }
46794 }
46795
46796 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46797 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46798 256bit and 128bit vectors. */
46799
46800 static unsigned int
46801 ix86_autovectorize_vector_sizes (void)
46802 {
46803 return TARGET_AVX512F ? 64 | 32 | 16 :
46804 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46805 }
46806
46807 \f
46808
46809 /* Return class of registers which could be used for pseudo of MODE
46810 and of class RCLASS for spilling instead of memory. Return NO_REGS
46811 if it is not possible or non-profitable. */
46812 static reg_class_t
46813 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46814 {
46815 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46816 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46817 && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
46818 return ALL_SSE_REGS;
46819 return NO_REGS;
46820 }
46821
46822 /* Implement targetm.vectorize.init_cost. */
46823
46824 static void *
46825 ix86_init_cost (struct loop *)
46826 {
46827 unsigned *cost = XNEWVEC (unsigned, 3);
46828 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46829 return cost;
46830 }
46831
46832 /* Implement targetm.vectorize.add_stmt_cost. */
46833
46834 static unsigned
46835 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46836 struct _stmt_vec_info *stmt_info, int misalign,
46837 enum vect_cost_model_location where)
46838 {
46839 unsigned *cost = (unsigned *) data;
46840 unsigned retval = 0;
46841
46842 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46843 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46844
46845 /* Statements in an inner loop relative to the loop being
46846 vectorized are weighted more heavily. The value here is
46847 arbitrary and could potentially be improved with analysis. */
46848 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46849 count *= 50; /* FIXME. */
46850
46851 retval = (unsigned) (count * stmt_cost);
46852
46853 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46854 for Silvermont as it has out of order integer pipeline and can execute
46855 2 scalar instruction per tick, but has in order SIMD pipeline. */
46856 if (TARGET_SILVERMONT || TARGET_INTEL)
46857 if (stmt_info && stmt_info->stmt)
46858 {
46859 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46860 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46861 retval = (retval * 17) / 10;
46862 }
46863
46864 cost[where] += retval;
46865
46866 return retval;
46867 }
46868
46869 /* Implement targetm.vectorize.finish_cost. */
46870
46871 static void
46872 ix86_finish_cost (void *data, unsigned *prologue_cost,
46873 unsigned *body_cost, unsigned *epilogue_cost)
46874 {
46875 unsigned *cost = (unsigned *) data;
46876 *prologue_cost = cost[vect_prologue];
46877 *body_cost = cost[vect_body];
46878 *epilogue_cost = cost[vect_epilogue];
46879 }
46880
46881 /* Implement targetm.vectorize.destroy_cost_data. */
46882
46883 static void
46884 ix86_destroy_cost_data (void *data)
46885 {
46886 free (data);
46887 }
46888
46889 /* Validate target specific memory model bits in VAL. */
46890
46891 static unsigned HOST_WIDE_INT
46892 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46893 {
46894 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46895 bool strong;
46896
46897 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46898 |MEMMODEL_MASK)
46899 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46900 {
46901 warning (OPT_Winvalid_memory_model,
46902 "Unknown architecture specific memory model");
46903 return MEMMODEL_SEQ_CST;
46904 }
46905 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46906 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46907 {
46908 warning (OPT_Winvalid_memory_model,
46909 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46910 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46911 }
46912 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46913 {
46914 warning (OPT_Winvalid_memory_model,
46915 "HLE_RELEASE not used with RELEASE or stronger memory model");
46916 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46917 }
46918 return val;
46919 }
46920
46921 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46922 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46923 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46924 or number of vecsize_mangle variants that should be emitted. */
46925
46926 static int
46927 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46928 struct cgraph_simd_clone *clonei,
46929 tree base_type, int num)
46930 {
46931 int ret = 1;
46932
46933 if (clonei->simdlen
46934 && (clonei->simdlen < 2
46935 || clonei->simdlen > 16
46936 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46937 {
46938 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46939 "unsupported simdlen %d", clonei->simdlen);
46940 return 0;
46941 }
46942
46943 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46944 if (TREE_CODE (ret_type) != VOID_TYPE)
46945 switch (TYPE_MODE (ret_type))
46946 {
46947 case QImode:
46948 case HImode:
46949 case SImode:
46950 case DImode:
46951 case SFmode:
46952 case DFmode:
46953 /* case SCmode: */
46954 /* case DCmode: */
46955 break;
46956 default:
46957 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46958 "unsupported return type %qT for simd\n", ret_type);
46959 return 0;
46960 }
46961
46962 tree t;
46963 int i;
46964
46965 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46966 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46967 switch (TYPE_MODE (TREE_TYPE (t)))
46968 {
46969 case QImode:
46970 case HImode:
46971 case SImode:
46972 case DImode:
46973 case SFmode:
46974 case DFmode:
46975 /* case SCmode: */
46976 /* case DCmode: */
46977 break;
46978 default:
46979 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46980 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46981 return 0;
46982 }
46983
46984 if (clonei->cilk_elemental)
46985 {
46986 /* Parse here processor clause. If not present, default to 'b'. */
46987 clonei->vecsize_mangle = 'b';
46988 }
46989 else if (!TREE_PUBLIC (node->decl))
46990 {
46991 /* If the function isn't exported, we can pick up just one ISA
46992 for the clones. */
46993 if (TARGET_AVX2)
46994 clonei->vecsize_mangle = 'd';
46995 else if (TARGET_AVX)
46996 clonei->vecsize_mangle = 'c';
46997 else
46998 clonei->vecsize_mangle = 'b';
46999 ret = 1;
47000 }
47001 else
47002 {
47003 clonei->vecsize_mangle = "bcd"[num];
47004 ret = 3;
47005 }
47006 switch (clonei->vecsize_mangle)
47007 {
47008 case 'b':
47009 clonei->vecsize_int = 128;
47010 clonei->vecsize_float = 128;
47011 break;
47012 case 'c':
47013 clonei->vecsize_int = 128;
47014 clonei->vecsize_float = 256;
47015 break;
47016 case 'd':
47017 clonei->vecsize_int = 256;
47018 clonei->vecsize_float = 256;
47019 break;
47020 }
47021 if (clonei->simdlen == 0)
47022 {
47023 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
47024 clonei->simdlen = clonei->vecsize_int;
47025 else
47026 clonei->simdlen = clonei->vecsize_float;
47027 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
47028 if (clonei->simdlen > 16)
47029 clonei->simdlen = 16;
47030 }
47031 return ret;
47032 }
47033
47034 /* Add target attribute to SIMD clone NODE if needed. */
47035
47036 static void
47037 ix86_simd_clone_adjust (struct cgraph_node *node)
47038 {
47039 const char *str = NULL;
47040 gcc_assert (node->decl == cfun->decl);
47041 switch (node->simdclone->vecsize_mangle)
47042 {
47043 case 'b':
47044 if (!TARGET_SSE2)
47045 str = "sse2";
47046 break;
47047 case 'c':
47048 if (!TARGET_AVX)
47049 str = "avx";
47050 break;
47051 case 'd':
47052 if (!TARGET_AVX2)
47053 str = "avx2";
47054 break;
47055 default:
47056 gcc_unreachable ();
47057 }
47058 if (str == NULL)
47059 return;
47060 push_cfun (NULL);
47061 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
47062 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
47063 gcc_assert (ok);
47064 pop_cfun ();
47065 ix86_previous_fndecl = NULL_TREE;
47066 ix86_set_current_function (node->decl);
47067 }
47068
47069 /* If SIMD clone NODE can't be used in a vectorized loop
47070 in current function, return -1, otherwise return a badness of using it
47071 (0 if it is most desirable from vecsize_mangle point of view, 1
47072 slightly less desirable, etc.). */
47073
47074 static int
47075 ix86_simd_clone_usable (struct cgraph_node *node)
47076 {
47077 switch (node->simdclone->vecsize_mangle)
47078 {
47079 case 'b':
47080 if (!TARGET_SSE2)
47081 return -1;
47082 if (!TARGET_AVX)
47083 return 0;
47084 return TARGET_AVX2 ? 2 : 1;
47085 case 'c':
47086 if (!TARGET_AVX)
47087 return -1;
47088 return TARGET_AVX2 ? 1 : 0;
47089 break;
47090 case 'd':
47091 if (!TARGET_AVX2)
47092 return -1;
47093 return 0;
47094 default:
47095 gcc_unreachable ();
47096 }
47097 }
47098
47099 /* This function gives out the number of memory references.
47100 This value determines the unrolling factor for
47101 bdver3 and bdver4 architectures. */
47102
47103 static int
47104 ix86_loop_memcount (rtx *x, unsigned *mem_count)
47105 {
47106 if (*x != NULL_RTX && MEM_P (*x))
47107 {
47108 enum machine_mode mode;
47109 unsigned int n_words;
47110
47111 mode = GET_MODE (*x);
47112 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
47113
47114 if (n_words > 4)
47115 (*mem_count)+=2;
47116 else
47117 (*mem_count)+=1;
47118 }
47119 return 0;
47120 }
47121
47122 /* This function adjusts the unroll factor based on
47123 the hardware capabilities. For ex, bdver3 has
47124 a loop buffer which makes unrolling of smaller
47125 loops less important. This function decides the
47126 unroll factor using number of memory references
47127 (value 32 is used) as a heuristic. */
47128
47129 static unsigned
47130 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
47131 {
47132 basic_block *bbs;
47133 rtx_insn *insn;
47134 unsigned i;
47135 unsigned mem_count = 0;
47136
47137 if (!TARGET_ADJUST_UNROLL)
47138 return nunroll;
47139
47140 /* Count the number of memory references within the loop body. */
47141 bbs = get_loop_body (loop);
47142 for (i = 0; i < loop->num_nodes; i++)
47143 {
47144 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
47145 if (NONDEBUG_INSN_P (insn))
47146 for_each_rtx_in_insn (&insn, (rtx_function) ix86_loop_memcount,
47147 &mem_count);
47148 }
47149 free (bbs);
47150
47151 if (mem_count && mem_count <=32)
47152 return 32/mem_count;
47153
47154 return nunroll;
47155 }
47156
47157
47158 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
47159
47160 static bool
47161 ix86_float_exceptions_rounding_supported_p (void)
47162 {
47163 /* For x87 floating point with standard excess precision handling,
47164 there is no adddf3 pattern (since x87 floating point only has
47165 XFmode operations) so the default hook implementation gets this
47166 wrong. */
47167 return TARGET_80387 || TARGET_SSE_MATH;
47168 }
47169
47170 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
47171
47172 static void
47173 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
47174 {
47175 if (!TARGET_80387 && !TARGET_SSE_MATH)
47176 return;
47177 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
47178 if (TARGET_80387)
47179 {
47180 tree fenv_index_type = build_index_type (size_int (6));
47181 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
47182 tree fenv_var = create_tmp_var (fenv_type, NULL);
47183 mark_addressable (fenv_var);
47184 tree fenv_ptr = build_pointer_type (fenv_type);
47185 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
47186 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
47187 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
47188 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
47189 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
47190 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
47191 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
47192 tree hold_fnclex = build_call_expr (fnclex, 0);
47193 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
47194 hold_fnclex);
47195 *clear = build_call_expr (fnclex, 0);
47196 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
47197 tree fnstsw_call = build_call_expr (fnstsw, 0);
47198 tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
47199 sw_var, fnstsw_call);
47200 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
47201 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
47202 exceptions_var, exceptions_x87);
47203 *update = build2 (COMPOUND_EXPR, integer_type_node,
47204 sw_mod, update_mod);
47205 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
47206 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
47207 }
47208 if (TARGET_SSE_MATH)
47209 {
47210 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
47211 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
47212 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
47213 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
47214 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
47215 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
47216 mxcsr_orig_var, stmxcsr_hold_call);
47217 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
47218 mxcsr_orig_var,
47219 build_int_cst (unsigned_type_node, 0x1f80));
47220 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
47221 build_int_cst (unsigned_type_node, 0xffffffc0));
47222 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
47223 mxcsr_mod_var, hold_mod_val);
47224 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47225 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
47226 hold_assign_orig, hold_assign_mod);
47227 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
47228 ldmxcsr_hold_call);
47229 if (*hold)
47230 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
47231 else
47232 *hold = hold_all;
47233 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
47234 if (*clear)
47235 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
47236 ldmxcsr_clear_call);
47237 else
47238 *clear = ldmxcsr_clear_call;
47239 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
47240 tree exceptions_sse = fold_convert (integer_type_node,
47241 stxmcsr_update_call);
47242 if (*update)
47243 {
47244 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
47245 exceptions_var, exceptions_sse);
47246 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
47247 exceptions_var, exceptions_mod);
47248 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
47249 exceptions_assign);
47250 }
47251 else
47252 *update = build2 (MODIFY_EXPR, integer_type_node,
47253 exceptions_var, exceptions_sse);
47254 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
47255 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47256 ldmxcsr_update_call);
47257 }
47258 tree atomic_feraiseexcept
47259 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
47260 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
47261 1, exceptions_var);
47262 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
47263 atomic_feraiseexcept_call);
47264 }
47265
47266 /* Initialize the GCC target structure. */
47267 #undef TARGET_RETURN_IN_MEMORY
47268 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
47269
47270 #undef TARGET_LEGITIMIZE_ADDRESS
47271 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
47272
47273 #undef TARGET_ATTRIBUTE_TABLE
47274 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
47275 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
47276 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
47277 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47278 # undef TARGET_MERGE_DECL_ATTRIBUTES
47279 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
47280 #endif
47281
47282 #undef TARGET_COMP_TYPE_ATTRIBUTES
47283 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
47284
47285 #undef TARGET_INIT_BUILTINS
47286 #define TARGET_INIT_BUILTINS ix86_init_builtins
47287 #undef TARGET_BUILTIN_DECL
47288 #define TARGET_BUILTIN_DECL ix86_builtin_decl
47289 #undef TARGET_EXPAND_BUILTIN
47290 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
47291
47292 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
47293 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
47294 ix86_builtin_vectorized_function
47295
47296 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
47297 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
47298
47299 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
47300 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
47301
47302 #undef TARGET_VECTORIZE_BUILTIN_GATHER
47303 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
47304
47305 #undef TARGET_BUILTIN_RECIPROCAL
47306 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
47307
47308 #undef TARGET_ASM_FUNCTION_EPILOGUE
47309 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
47310
47311 #undef TARGET_ENCODE_SECTION_INFO
47312 #ifndef SUBTARGET_ENCODE_SECTION_INFO
47313 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
47314 #else
47315 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
47316 #endif
47317
47318 #undef TARGET_ASM_OPEN_PAREN
47319 #define TARGET_ASM_OPEN_PAREN ""
47320 #undef TARGET_ASM_CLOSE_PAREN
47321 #define TARGET_ASM_CLOSE_PAREN ""
47322
47323 #undef TARGET_ASM_BYTE_OP
47324 #define TARGET_ASM_BYTE_OP ASM_BYTE
47325
47326 #undef TARGET_ASM_ALIGNED_HI_OP
47327 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
47328 #undef TARGET_ASM_ALIGNED_SI_OP
47329 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
47330 #ifdef ASM_QUAD
47331 #undef TARGET_ASM_ALIGNED_DI_OP
47332 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
47333 #endif
47334
47335 #undef TARGET_PROFILE_BEFORE_PROLOGUE
47336 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
47337
47338 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
47339 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
47340
47341 #undef TARGET_ASM_UNALIGNED_HI_OP
47342 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
47343 #undef TARGET_ASM_UNALIGNED_SI_OP
47344 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
47345 #undef TARGET_ASM_UNALIGNED_DI_OP
47346 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
47347
47348 #undef TARGET_PRINT_OPERAND
47349 #define TARGET_PRINT_OPERAND ix86_print_operand
47350 #undef TARGET_PRINT_OPERAND_ADDRESS
47351 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
47352 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
47353 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
47354 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
47355 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
47356
47357 #undef TARGET_SCHED_INIT_GLOBAL
47358 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
47359 #undef TARGET_SCHED_ADJUST_COST
47360 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
47361 #undef TARGET_SCHED_ISSUE_RATE
47362 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
47363 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
47364 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
47365 ia32_multipass_dfa_lookahead
47366 #undef TARGET_SCHED_MACRO_FUSION_P
47367 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
47368 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
47369 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
47370
47371 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
47372 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
47373
47374 #undef TARGET_MEMMODEL_CHECK
47375 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
47376
47377 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
47378 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
47379
47380 #ifdef HAVE_AS_TLS
47381 #undef TARGET_HAVE_TLS
47382 #define TARGET_HAVE_TLS true
47383 #endif
47384 #undef TARGET_CANNOT_FORCE_CONST_MEM
47385 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
47386 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
47387 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
47388
47389 #undef TARGET_DELEGITIMIZE_ADDRESS
47390 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
47391
47392 #undef TARGET_MS_BITFIELD_LAYOUT_P
47393 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
47394
47395 #if TARGET_MACHO
47396 #undef TARGET_BINDS_LOCAL_P
47397 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
47398 #endif
47399 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
47400 #undef TARGET_BINDS_LOCAL_P
47401 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
47402 #endif
47403
47404 #undef TARGET_ASM_OUTPUT_MI_THUNK
47405 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
47406 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
47407 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
47408
47409 #undef TARGET_ASM_FILE_START
47410 #define TARGET_ASM_FILE_START x86_file_start
47411
47412 #undef TARGET_OPTION_OVERRIDE
47413 #define TARGET_OPTION_OVERRIDE ix86_option_override
47414
47415 #undef TARGET_REGISTER_MOVE_COST
47416 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
47417 #undef TARGET_MEMORY_MOVE_COST
47418 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
47419 #undef TARGET_RTX_COSTS
47420 #define TARGET_RTX_COSTS ix86_rtx_costs
47421 #undef TARGET_ADDRESS_COST
47422 #define TARGET_ADDRESS_COST ix86_address_cost
47423
47424 #undef TARGET_FIXED_CONDITION_CODE_REGS
47425 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
47426 #undef TARGET_CC_MODES_COMPATIBLE
47427 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
47428
47429 #undef TARGET_MACHINE_DEPENDENT_REORG
47430 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
47431
47432 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
47433 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
47434
47435 #undef TARGET_BUILD_BUILTIN_VA_LIST
47436 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
47437
47438 #undef TARGET_FOLD_BUILTIN
47439 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
47440
47441 #undef TARGET_COMPARE_VERSION_PRIORITY
47442 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
47443
47444 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
47445 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
47446 ix86_generate_version_dispatcher_body
47447
47448 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
47449 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
47450 ix86_get_function_versions_dispatcher
47451
47452 #undef TARGET_ENUM_VA_LIST_P
47453 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
47454
47455 #undef TARGET_FN_ABI_VA_LIST
47456 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
47457
47458 #undef TARGET_CANONICAL_VA_LIST_TYPE
47459 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
47460
47461 #undef TARGET_EXPAND_BUILTIN_VA_START
47462 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
47463
47464 #undef TARGET_MD_ASM_CLOBBERS
47465 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
47466
47467 #undef TARGET_PROMOTE_PROTOTYPES
47468 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
47469 #undef TARGET_SETUP_INCOMING_VARARGS
47470 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
47471 #undef TARGET_MUST_PASS_IN_STACK
47472 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
47473 #undef TARGET_FUNCTION_ARG_ADVANCE
47474 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
47475 #undef TARGET_FUNCTION_ARG
47476 #define TARGET_FUNCTION_ARG ix86_function_arg
47477 #undef TARGET_INIT_PIC_REG
47478 #define TARGET_INIT_PIC_REG ix86_init_pic_reg
47479 #undef TARGET_USE_PSEUDO_PIC_REG
47480 #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
47481 #undef TARGET_FUNCTION_ARG_BOUNDARY
47482 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
47483 #undef TARGET_PASS_BY_REFERENCE
47484 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
47485 #undef TARGET_INTERNAL_ARG_POINTER
47486 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
47487 #undef TARGET_UPDATE_STACK_BOUNDARY
47488 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
47489 #undef TARGET_GET_DRAP_RTX
47490 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
47491 #undef TARGET_STRICT_ARGUMENT_NAMING
47492 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
47493 #undef TARGET_STATIC_CHAIN
47494 #define TARGET_STATIC_CHAIN ix86_static_chain
47495 #undef TARGET_TRAMPOLINE_INIT
47496 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47497 #undef TARGET_RETURN_POPS_ARGS
47498 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47499
47500 #undef TARGET_LEGITIMATE_COMBINED_INSN
47501 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47502
47503 #undef TARGET_ASAN_SHADOW_OFFSET
47504 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47505
47506 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47507 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47508
47509 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47510 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47511
47512 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47513 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47514
47515 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
47516 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
47517 ix86_libgcc_floating_mode_supported_p
47518
47519 #undef TARGET_C_MODE_FOR_SUFFIX
47520 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47521
47522 #ifdef HAVE_AS_TLS
47523 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47524 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47525 #endif
47526
47527 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47528 #undef TARGET_INSERT_ATTRIBUTES
47529 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47530 #endif
47531
47532 #undef TARGET_MANGLE_TYPE
47533 #define TARGET_MANGLE_TYPE ix86_mangle_type
47534
47535 #if !TARGET_MACHO
47536 #undef TARGET_STACK_PROTECT_FAIL
47537 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47538 #endif
47539
47540 #undef TARGET_FUNCTION_VALUE
47541 #define TARGET_FUNCTION_VALUE ix86_function_value
47542
47543 #undef TARGET_FUNCTION_VALUE_REGNO_P
47544 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47545
47546 #undef TARGET_PROMOTE_FUNCTION_MODE
47547 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47548
47549 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47550 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47551
47552 #undef TARGET_INSTANTIATE_DECLS
47553 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47554
47555 #undef TARGET_SECONDARY_RELOAD
47556 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47557
47558 #undef TARGET_CLASS_MAX_NREGS
47559 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47560
47561 #undef TARGET_PREFERRED_RELOAD_CLASS
47562 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47563 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47564 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47565 #undef TARGET_CLASS_LIKELY_SPILLED_P
47566 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47567
47568 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47569 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47570 ix86_builtin_vectorization_cost
47571 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47572 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47573 ix86_vectorize_vec_perm_const_ok
47574 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47575 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47576 ix86_preferred_simd_mode
47577 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47578 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47579 ix86_autovectorize_vector_sizes
47580 #undef TARGET_VECTORIZE_INIT_COST
47581 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47582 #undef TARGET_VECTORIZE_ADD_STMT_COST
47583 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47584 #undef TARGET_VECTORIZE_FINISH_COST
47585 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47586 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47587 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47588
47589 #undef TARGET_SET_CURRENT_FUNCTION
47590 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47591
47592 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47593 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47594
47595 #undef TARGET_OPTION_SAVE
47596 #define TARGET_OPTION_SAVE ix86_function_specific_save
47597
47598 #undef TARGET_OPTION_RESTORE
47599 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47600
47601 #undef TARGET_OPTION_PRINT
47602 #define TARGET_OPTION_PRINT ix86_function_specific_print
47603
47604 #undef TARGET_OPTION_FUNCTION_VERSIONS
47605 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47606
47607 #undef TARGET_CAN_INLINE_P
47608 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47609
47610 #undef TARGET_EXPAND_TO_RTL_HOOK
47611 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47612
47613 #undef TARGET_LEGITIMATE_ADDRESS_P
47614 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47615
47616 #undef TARGET_LRA_P
47617 #define TARGET_LRA_P hook_bool_void_true
47618
47619 #undef TARGET_REGISTER_PRIORITY
47620 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47621
47622 #undef TARGET_REGISTER_USAGE_LEVELING_P
47623 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47624
47625 #undef TARGET_LEGITIMATE_CONSTANT_P
47626 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47627
47628 #undef TARGET_FRAME_POINTER_REQUIRED
47629 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47630
47631 #undef TARGET_CAN_ELIMINATE
47632 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47633
47634 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47635 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47636
47637 #undef TARGET_ASM_CODE_END
47638 #define TARGET_ASM_CODE_END ix86_code_end
47639
47640 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47641 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47642
47643 #if TARGET_MACHO
47644 #undef TARGET_INIT_LIBFUNCS
47645 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47646 #endif
47647
47648 #undef TARGET_LOOP_UNROLL_ADJUST
47649 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47650
47651 #undef TARGET_SPILL_CLASS
47652 #define TARGET_SPILL_CLASS ix86_spill_class
47653
47654 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47655 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47656 ix86_simd_clone_compute_vecsize_and_simdlen
47657
47658 #undef TARGET_SIMD_CLONE_ADJUST
47659 #define TARGET_SIMD_CLONE_ADJUST \
47660 ix86_simd_clone_adjust
47661
47662 #undef TARGET_SIMD_CLONE_USABLE
47663 #define TARGET_SIMD_CLONE_USABLE \
47664 ix86_simd_clone_usable
47665
47666 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47667 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47668 ix86_float_exceptions_rounding_supported_p
47669
47670 #undef TARGET_MODE_EMIT
47671 #define TARGET_MODE_EMIT ix86_emit_mode_set
47672
47673 #undef TARGET_MODE_NEEDED
47674 #define TARGET_MODE_NEEDED ix86_mode_needed
47675
47676 #undef TARGET_MODE_AFTER
47677 #define TARGET_MODE_AFTER ix86_mode_after
47678
47679 #undef TARGET_MODE_ENTRY
47680 #define TARGET_MODE_ENTRY ix86_mode_entry
47681
47682 #undef TARGET_MODE_EXIT
47683 #define TARGET_MODE_EXIT ix86_mode_exit
47684
47685 #undef TARGET_MODE_PRIORITY
47686 #define TARGET_MODE_PRIORITY ix86_mode_priority
47687
47688 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
47689 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
47690
47691 struct gcc_target targetm = TARGET_INITIALIZER;
47692 \f
47693 #include "gt-i386.h"