target.def (mode_switching): New hook vector.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "stringpool.h"
27 #include "attribs.h"
28 #include "calls.h"
29 #include "stor-layout.h"
30 #include "varasm.h"
31 #include "tm_p.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "insn-config.h"
35 #include "conditions.h"
36 #include "output.h"
37 #include "insn-codes.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "except.h"
41 #include "function.h"
42 #include "recog.h"
43 #include "expr.h"
44 #include "optabs.h"
45 #include "diagnostic-core.h"
46 #include "toplev.h"
47 #include "basic-block.h"
48 #include "ggc.h"
49 #include "target.h"
50 #include "target-def.h"
51 #include "common/common-target.h"
52 #include "langhooks.h"
53 #include "reload.h"
54 #include "cgraph.h"
55 #include "pointer-set.h"
56 #include "hash-table.h"
57 #include "vec.h"
58 #include "basic-block.h"
59 #include "tree-ssa-alias.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimple-expr.h"
64 #include "is-a.h"
65 #include "gimple.h"
66 #include "gimplify.h"
67 #include "cfgloop.h"
68 #include "dwarf2.h"
69 #include "df.h"
70 #include "tm-constrs.h"
71 #include "params.h"
72 #include "cselib.h"
73 #include "debug.h"
74 #include "sched-int.h"
75 #include "sbitmap.h"
76 #include "fibheap.h"
77 #include "opts.h"
78 #include "diagnostic.h"
79 #include "dumpfile.h"
80 #include "tree-pass.h"
81 #include "wide-int.h"
82 #include "context.h"
83 #include "pass_manager.h"
84 #include "target-globals.h"
85 #include "tree-vectorizer.h"
86 #include "shrink-wrap.h"
87
88 static rtx legitimize_dllimport_symbol (rtx, bool);
89 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
90 static rtx legitimize_pe_coff_symbol (rtx, bool);
91
92 #ifndef CHECK_STACK_LIMIT
93 #define CHECK_STACK_LIMIT (-1)
94 #endif
95
96 /* Return index of given mode in mult and division cost tables. */
97 #define MODE_INDEX(mode) \
98 ((mode) == QImode ? 0 \
99 : (mode) == HImode ? 1 \
100 : (mode) == SImode ? 2 \
101 : (mode) == DImode ? 3 \
102 : 4)
103
104 /* Processor costs (relative to an add) */
105 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
106 #define COSTS_N_BYTES(N) ((N) * 2)
107
108 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
109
110 static stringop_algs ix86_size_memcpy[2] = {
111 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
112 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
113 static stringop_algs ix86_size_memset[2] = {
114 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
115 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
116
117 const
118 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
119 COSTS_N_BYTES (2), /* cost of an add instruction */
120 COSTS_N_BYTES (3), /* cost of a lea instruction */
121 COSTS_N_BYTES (2), /* variable shift costs */
122 COSTS_N_BYTES (3), /* constant shift costs */
123 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
124 COSTS_N_BYTES (3), /* HI */
125 COSTS_N_BYTES (3), /* SI */
126 COSTS_N_BYTES (3), /* DI */
127 COSTS_N_BYTES (5)}, /* other */
128 0, /* cost of multiply per each bit set */
129 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
130 COSTS_N_BYTES (3), /* HI */
131 COSTS_N_BYTES (3), /* SI */
132 COSTS_N_BYTES (3), /* DI */
133 COSTS_N_BYTES (5)}, /* other */
134 COSTS_N_BYTES (3), /* cost of movsx */
135 COSTS_N_BYTES (3), /* cost of movzx */
136 0, /* "large" insn */
137 2, /* MOVE_RATIO */
138 2, /* cost for loading QImode using movzbl */
139 {2, 2, 2}, /* cost of loading integer registers
140 in QImode, HImode and SImode.
141 Relative to reg-reg move (2). */
142 {2, 2, 2}, /* cost of storing integer registers */
143 2, /* cost of reg,reg fld/fst */
144 {2, 2, 2}, /* cost of loading fp registers
145 in SFmode, DFmode and XFmode */
146 {2, 2, 2}, /* cost of storing fp registers
147 in SFmode, DFmode and XFmode */
148 3, /* cost of moving MMX register */
149 {3, 3}, /* cost of loading MMX registers
150 in SImode and DImode */
151 {3, 3}, /* cost of storing MMX registers
152 in SImode and DImode */
153 3, /* cost of moving SSE register */
154 {3, 3, 3}, /* cost of loading SSE registers
155 in SImode, DImode and TImode */
156 {3, 3, 3}, /* cost of storing SSE registers
157 in SImode, DImode and TImode */
158 3, /* MMX or SSE register to integer */
159 0, /* size of l1 cache */
160 0, /* size of l2 cache */
161 0, /* size of prefetch block */
162 0, /* number of parallel prefetches */
163 2, /* Branch cost */
164 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
165 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
166 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
167 COSTS_N_BYTES (2), /* cost of FABS instruction. */
168 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
169 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
170 ix86_size_memcpy,
171 ix86_size_memset,
172 1, /* scalar_stmt_cost. */
173 1, /* scalar load_cost. */
174 1, /* scalar_store_cost. */
175 1, /* vec_stmt_cost. */
176 1, /* vec_to_scalar_cost. */
177 1, /* scalar_to_vec_cost. */
178 1, /* vec_align_load_cost. */
179 1, /* vec_unalign_load_cost. */
180 1, /* vec_store_cost. */
181 1, /* cond_taken_branch_cost. */
182 1, /* cond_not_taken_branch_cost. */
183 };
184
185 /* Processor costs (relative to an add) */
186 static stringop_algs i386_memcpy[2] = {
187 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
188 DUMMY_STRINGOP_ALGS};
189 static stringop_algs i386_memset[2] = {
190 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
191 DUMMY_STRINGOP_ALGS};
192
193 static const
194 struct processor_costs i386_cost = { /* 386 specific costs */
195 COSTS_N_INSNS (1), /* cost of an add instruction */
196 COSTS_N_INSNS (1), /* cost of a lea instruction */
197 COSTS_N_INSNS (3), /* variable shift costs */
198 COSTS_N_INSNS (2), /* constant shift costs */
199 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
200 COSTS_N_INSNS (6), /* HI */
201 COSTS_N_INSNS (6), /* SI */
202 COSTS_N_INSNS (6), /* DI */
203 COSTS_N_INSNS (6)}, /* other */
204 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
205 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
206 COSTS_N_INSNS (23), /* HI */
207 COSTS_N_INSNS (23), /* SI */
208 COSTS_N_INSNS (23), /* DI */
209 COSTS_N_INSNS (23)}, /* other */
210 COSTS_N_INSNS (3), /* cost of movsx */
211 COSTS_N_INSNS (2), /* cost of movzx */
212 15, /* "large" insn */
213 3, /* MOVE_RATIO */
214 4, /* cost for loading QImode using movzbl */
215 {2, 4, 2}, /* cost of loading integer registers
216 in QImode, HImode and SImode.
217 Relative to reg-reg move (2). */
218 {2, 4, 2}, /* cost of storing integer registers */
219 2, /* cost of reg,reg fld/fst */
220 {8, 8, 8}, /* cost of loading fp registers
221 in SFmode, DFmode and XFmode */
222 {8, 8, 8}, /* cost of storing fp registers
223 in SFmode, DFmode and XFmode */
224 2, /* cost of moving MMX register */
225 {4, 8}, /* cost of loading MMX registers
226 in SImode and DImode */
227 {4, 8}, /* cost of storing MMX registers
228 in SImode and DImode */
229 2, /* cost of moving SSE register */
230 {4, 8, 16}, /* cost of loading SSE registers
231 in SImode, DImode and TImode */
232 {4, 8, 16}, /* cost of storing SSE registers
233 in SImode, DImode and TImode */
234 3, /* MMX or SSE register to integer */
235 0, /* size of l1 cache */
236 0, /* size of l2 cache */
237 0, /* size of prefetch block */
238 0, /* number of parallel prefetches */
239 1, /* Branch cost */
240 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
241 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
242 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
243 COSTS_N_INSNS (22), /* cost of FABS instruction. */
244 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
245 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
246 i386_memcpy,
247 i386_memset,
248 1, /* scalar_stmt_cost. */
249 1, /* scalar load_cost. */
250 1, /* scalar_store_cost. */
251 1, /* vec_stmt_cost. */
252 1, /* vec_to_scalar_cost. */
253 1, /* scalar_to_vec_cost. */
254 1, /* vec_align_load_cost. */
255 2, /* vec_unalign_load_cost. */
256 1, /* vec_store_cost. */
257 3, /* cond_taken_branch_cost. */
258 1, /* cond_not_taken_branch_cost. */
259 };
260
261 static stringop_algs i486_memcpy[2] = {
262 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
263 DUMMY_STRINGOP_ALGS};
264 static stringop_algs i486_memset[2] = {
265 {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
266 DUMMY_STRINGOP_ALGS};
267
268 static const
269 struct processor_costs i486_cost = { /* 486 specific costs */
270 COSTS_N_INSNS (1), /* cost of an add instruction */
271 COSTS_N_INSNS (1), /* cost of a lea instruction */
272 COSTS_N_INSNS (3), /* variable shift costs */
273 COSTS_N_INSNS (2), /* constant shift costs */
274 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
275 COSTS_N_INSNS (12), /* HI */
276 COSTS_N_INSNS (12), /* SI */
277 COSTS_N_INSNS (12), /* DI */
278 COSTS_N_INSNS (12)}, /* other */
279 1, /* cost of multiply per each bit set */
280 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
281 COSTS_N_INSNS (40), /* HI */
282 COSTS_N_INSNS (40), /* SI */
283 COSTS_N_INSNS (40), /* DI */
284 COSTS_N_INSNS (40)}, /* other */
285 COSTS_N_INSNS (3), /* cost of movsx */
286 COSTS_N_INSNS (2), /* cost of movzx */
287 15, /* "large" insn */
288 3, /* MOVE_RATIO */
289 4, /* cost for loading QImode using movzbl */
290 {2, 4, 2}, /* cost of loading integer registers
291 in QImode, HImode and SImode.
292 Relative to reg-reg move (2). */
293 {2, 4, 2}, /* cost of storing integer registers */
294 2, /* cost of reg,reg fld/fst */
295 {8, 8, 8}, /* cost of loading fp registers
296 in SFmode, DFmode and XFmode */
297 {8, 8, 8}, /* cost of storing fp registers
298 in SFmode, DFmode and XFmode */
299 2, /* cost of moving MMX register */
300 {4, 8}, /* cost of loading MMX registers
301 in SImode and DImode */
302 {4, 8}, /* cost of storing MMX registers
303 in SImode and DImode */
304 2, /* cost of moving SSE register */
305 {4, 8, 16}, /* cost of loading SSE registers
306 in SImode, DImode and TImode */
307 {4, 8, 16}, /* cost of storing SSE registers
308 in SImode, DImode and TImode */
309 3, /* MMX or SSE register to integer */
310 4, /* size of l1 cache. 486 has 8kB cache
311 shared for code and data, so 4kB is
312 not really precise. */
313 4, /* size of l2 cache */
314 0, /* size of prefetch block */
315 0, /* number of parallel prefetches */
316 1, /* Branch cost */
317 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
318 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
319 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
320 COSTS_N_INSNS (3), /* cost of FABS instruction. */
321 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
322 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
323 i486_memcpy,
324 i486_memset,
325 1, /* scalar_stmt_cost. */
326 1, /* scalar load_cost. */
327 1, /* scalar_store_cost. */
328 1, /* vec_stmt_cost. */
329 1, /* vec_to_scalar_cost. */
330 1, /* scalar_to_vec_cost. */
331 1, /* vec_align_load_cost. */
332 2, /* vec_unalign_load_cost. */
333 1, /* vec_store_cost. */
334 3, /* cond_taken_branch_cost. */
335 1, /* cond_not_taken_branch_cost. */
336 };
337
338 static stringop_algs pentium_memcpy[2] = {
339 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
340 DUMMY_STRINGOP_ALGS};
341 static stringop_algs pentium_memset[2] = {
342 {libcall, {{-1, rep_prefix_4_byte, false}}},
343 DUMMY_STRINGOP_ALGS};
344
345 static const
346 struct processor_costs pentium_cost = {
347 COSTS_N_INSNS (1), /* cost of an add instruction */
348 COSTS_N_INSNS (1), /* cost of a lea instruction */
349 COSTS_N_INSNS (4), /* variable shift costs */
350 COSTS_N_INSNS (1), /* constant shift costs */
351 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
352 COSTS_N_INSNS (11), /* HI */
353 COSTS_N_INSNS (11), /* SI */
354 COSTS_N_INSNS (11), /* DI */
355 COSTS_N_INSNS (11)}, /* other */
356 0, /* cost of multiply per each bit set */
357 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
358 COSTS_N_INSNS (25), /* HI */
359 COSTS_N_INSNS (25), /* SI */
360 COSTS_N_INSNS (25), /* DI */
361 COSTS_N_INSNS (25)}, /* other */
362 COSTS_N_INSNS (3), /* cost of movsx */
363 COSTS_N_INSNS (2), /* cost of movzx */
364 8, /* "large" insn */
365 6, /* MOVE_RATIO */
366 6, /* cost for loading QImode using movzbl */
367 {2, 4, 2}, /* cost of loading integer registers
368 in QImode, HImode and SImode.
369 Relative to reg-reg move (2). */
370 {2, 4, 2}, /* cost of storing integer registers */
371 2, /* cost of reg,reg fld/fst */
372 {2, 2, 6}, /* cost of loading fp registers
373 in SFmode, DFmode and XFmode */
374 {4, 4, 6}, /* cost of storing fp registers
375 in SFmode, DFmode and XFmode */
376 8, /* cost of moving MMX register */
377 {8, 8}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {8, 8}, /* cost of storing MMX registers
380 in SImode and DImode */
381 2, /* cost of moving SSE register */
382 {4, 8, 16}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {4, 8, 16}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 3, /* MMX or SSE register to integer */
387 8, /* size of l1 cache. */
388 8, /* size of l2 cache */
389 0, /* size of prefetch block */
390 0, /* number of parallel prefetches */
391 2, /* Branch cost */
392 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
393 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
394 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
395 COSTS_N_INSNS (1), /* cost of FABS instruction. */
396 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
397 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
398 pentium_memcpy,
399 pentium_memset,
400 1, /* scalar_stmt_cost. */
401 1, /* scalar load_cost. */
402 1, /* scalar_store_cost. */
403 1, /* vec_stmt_cost. */
404 1, /* vec_to_scalar_cost. */
405 1, /* scalar_to_vec_cost. */
406 1, /* vec_align_load_cost. */
407 2, /* vec_unalign_load_cost. */
408 1, /* vec_store_cost. */
409 3, /* cond_taken_branch_cost. */
410 1, /* cond_not_taken_branch_cost. */
411 };
412
413 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
414 (we ensure the alignment). For small blocks inline loop is still a
415 noticeable win, for bigger blocks either rep movsl or rep movsb is
416 way to go. Rep movsb has apparently more expensive startup time in CPU,
417 but after 4K the difference is down in the noise. */
418 static stringop_algs pentiumpro_memcpy[2] = {
419 {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
420 {8192, rep_prefix_4_byte, false},
421 {-1, rep_prefix_1_byte, false}}},
422 DUMMY_STRINGOP_ALGS};
423 static stringop_algs pentiumpro_memset[2] = {
424 {rep_prefix_4_byte, {{1024, unrolled_loop, false},
425 {8192, rep_prefix_4_byte, false},
426 {-1, libcall, false}}},
427 DUMMY_STRINGOP_ALGS};
428 static const
429 struct processor_costs pentiumpro_cost = {
430 COSTS_N_INSNS (1), /* cost of an add instruction */
431 COSTS_N_INSNS (1), /* cost of a lea instruction */
432 COSTS_N_INSNS (1), /* variable shift costs */
433 COSTS_N_INSNS (1), /* constant shift costs */
434 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
435 COSTS_N_INSNS (4), /* HI */
436 COSTS_N_INSNS (4), /* SI */
437 COSTS_N_INSNS (4), /* DI */
438 COSTS_N_INSNS (4)}, /* other */
439 0, /* cost of multiply per each bit set */
440 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
441 COSTS_N_INSNS (17), /* HI */
442 COSTS_N_INSNS (17), /* SI */
443 COSTS_N_INSNS (17), /* DI */
444 COSTS_N_INSNS (17)}, /* other */
445 COSTS_N_INSNS (1), /* cost of movsx */
446 COSTS_N_INSNS (1), /* cost of movzx */
447 8, /* "large" insn */
448 6, /* MOVE_RATIO */
449 2, /* cost for loading QImode using movzbl */
450 {4, 4, 4}, /* cost of loading integer registers
451 in QImode, HImode and SImode.
452 Relative to reg-reg move (2). */
453 {2, 2, 2}, /* cost of storing integer registers */
454 2, /* cost of reg,reg fld/fst */
455 {2, 2, 6}, /* cost of loading fp registers
456 in SFmode, DFmode and XFmode */
457 {4, 4, 6}, /* cost of storing fp registers
458 in SFmode, DFmode and XFmode */
459 2, /* cost of moving MMX register */
460 {2, 2}, /* cost of loading MMX registers
461 in SImode and DImode */
462 {2, 2}, /* cost of storing MMX registers
463 in SImode and DImode */
464 2, /* cost of moving SSE register */
465 {2, 2, 8}, /* cost of loading SSE registers
466 in SImode, DImode and TImode */
467 {2, 2, 8}, /* cost of storing SSE registers
468 in SImode, DImode and TImode */
469 3, /* MMX or SSE register to integer */
470 8, /* size of l1 cache. */
471 256, /* size of l2 cache */
472 32, /* size of prefetch block */
473 6, /* number of parallel prefetches */
474 2, /* Branch cost */
475 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
476 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
477 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
478 COSTS_N_INSNS (2), /* cost of FABS instruction. */
479 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
480 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
481 pentiumpro_memcpy,
482 pentiumpro_memset,
483 1, /* scalar_stmt_cost. */
484 1, /* scalar load_cost. */
485 1, /* scalar_store_cost. */
486 1, /* vec_stmt_cost. */
487 1, /* vec_to_scalar_cost. */
488 1, /* scalar_to_vec_cost. */
489 1, /* vec_align_load_cost. */
490 2, /* vec_unalign_load_cost. */
491 1, /* vec_store_cost. */
492 3, /* cond_taken_branch_cost. */
493 1, /* cond_not_taken_branch_cost. */
494 };
495
496 static stringop_algs geode_memcpy[2] = {
497 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
498 DUMMY_STRINGOP_ALGS};
499 static stringop_algs geode_memset[2] = {
500 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
501 DUMMY_STRINGOP_ALGS};
502 static const
503 struct processor_costs geode_cost = {
504 COSTS_N_INSNS (1), /* cost of an add instruction */
505 COSTS_N_INSNS (1), /* cost of a lea instruction */
506 COSTS_N_INSNS (2), /* variable shift costs */
507 COSTS_N_INSNS (1), /* constant shift costs */
508 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
509 COSTS_N_INSNS (4), /* HI */
510 COSTS_N_INSNS (7), /* SI */
511 COSTS_N_INSNS (7), /* DI */
512 COSTS_N_INSNS (7)}, /* other */
513 0, /* cost of multiply per each bit set */
514 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
515 COSTS_N_INSNS (23), /* HI */
516 COSTS_N_INSNS (39), /* SI */
517 COSTS_N_INSNS (39), /* DI */
518 COSTS_N_INSNS (39)}, /* other */
519 COSTS_N_INSNS (1), /* cost of movsx */
520 COSTS_N_INSNS (1), /* cost of movzx */
521 8, /* "large" insn */
522 4, /* MOVE_RATIO */
523 1, /* cost for loading QImode using movzbl */
524 {1, 1, 1}, /* cost of loading integer registers
525 in QImode, HImode and SImode.
526 Relative to reg-reg move (2). */
527 {1, 1, 1}, /* cost of storing integer registers */
528 1, /* cost of reg,reg fld/fst */
529 {1, 1, 1}, /* cost of loading fp registers
530 in SFmode, DFmode and XFmode */
531 {4, 6, 6}, /* cost of storing fp registers
532 in SFmode, DFmode and XFmode */
533
534 1, /* cost of moving MMX register */
535 {1, 1}, /* cost of loading MMX registers
536 in SImode and DImode */
537 {1, 1}, /* cost of storing MMX registers
538 in SImode and DImode */
539 1, /* cost of moving SSE register */
540 {1, 1, 1}, /* cost of loading SSE registers
541 in SImode, DImode and TImode */
542 {1, 1, 1}, /* cost of storing SSE registers
543 in SImode, DImode and TImode */
544 1, /* MMX or SSE register to integer */
545 64, /* size of l1 cache. */
546 128, /* size of l2 cache. */
547 32, /* size of prefetch block */
548 1, /* number of parallel prefetches */
549 1, /* Branch cost */
550 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
551 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
552 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
553 COSTS_N_INSNS (1), /* cost of FABS instruction. */
554 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
555 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
556 geode_memcpy,
557 geode_memset,
558 1, /* scalar_stmt_cost. */
559 1, /* scalar load_cost. */
560 1, /* scalar_store_cost. */
561 1, /* vec_stmt_cost. */
562 1, /* vec_to_scalar_cost. */
563 1, /* scalar_to_vec_cost. */
564 1, /* vec_align_load_cost. */
565 2, /* vec_unalign_load_cost. */
566 1, /* vec_store_cost. */
567 3, /* cond_taken_branch_cost. */
568 1, /* cond_not_taken_branch_cost. */
569 };
570
571 static stringop_algs k6_memcpy[2] = {
572 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
573 DUMMY_STRINGOP_ALGS};
574 static stringop_algs k6_memset[2] = {
575 {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
576 DUMMY_STRINGOP_ALGS};
577 static const
578 struct processor_costs k6_cost = {
579 COSTS_N_INSNS (1), /* cost of an add instruction */
580 COSTS_N_INSNS (2), /* cost of a lea instruction */
581 COSTS_N_INSNS (1), /* variable shift costs */
582 COSTS_N_INSNS (1), /* constant shift costs */
583 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
584 COSTS_N_INSNS (3), /* HI */
585 COSTS_N_INSNS (3), /* SI */
586 COSTS_N_INSNS (3), /* DI */
587 COSTS_N_INSNS (3)}, /* other */
588 0, /* cost of multiply per each bit set */
589 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
590 COSTS_N_INSNS (18), /* HI */
591 COSTS_N_INSNS (18), /* SI */
592 COSTS_N_INSNS (18), /* DI */
593 COSTS_N_INSNS (18)}, /* other */
594 COSTS_N_INSNS (2), /* cost of movsx */
595 COSTS_N_INSNS (2), /* cost of movzx */
596 8, /* "large" insn */
597 4, /* MOVE_RATIO */
598 3, /* cost for loading QImode using movzbl */
599 {4, 5, 4}, /* cost of loading integer registers
600 in QImode, HImode and SImode.
601 Relative to reg-reg move (2). */
602 {2, 3, 2}, /* cost of storing integer registers */
603 4, /* cost of reg,reg fld/fst */
604 {6, 6, 6}, /* cost of loading fp registers
605 in SFmode, DFmode and XFmode */
606 {4, 4, 4}, /* cost of storing fp registers
607 in SFmode, DFmode and XFmode */
608 2, /* cost of moving MMX register */
609 {2, 2}, /* cost of loading MMX registers
610 in SImode and DImode */
611 {2, 2}, /* cost of storing MMX registers
612 in SImode and DImode */
613 2, /* cost of moving SSE register */
614 {2, 2, 8}, /* cost of loading SSE registers
615 in SImode, DImode and TImode */
616 {2, 2, 8}, /* cost of storing SSE registers
617 in SImode, DImode and TImode */
618 6, /* MMX or SSE register to integer */
619 32, /* size of l1 cache. */
620 32, /* size of l2 cache. Some models
621 have integrated l2 cache, but
622 optimizing for k6 is not important
623 enough to worry about that. */
624 32, /* size of prefetch block */
625 1, /* number of parallel prefetches */
626 1, /* Branch cost */
627 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
628 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
629 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
630 COSTS_N_INSNS (2), /* cost of FABS instruction. */
631 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
632 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
633 k6_memcpy,
634 k6_memset,
635 1, /* scalar_stmt_cost. */
636 1, /* scalar load_cost. */
637 1, /* scalar_store_cost. */
638 1, /* vec_stmt_cost. */
639 1, /* vec_to_scalar_cost. */
640 1, /* scalar_to_vec_cost. */
641 1, /* vec_align_load_cost. */
642 2, /* vec_unalign_load_cost. */
643 1, /* vec_store_cost. */
644 3, /* cond_taken_branch_cost. */
645 1, /* cond_not_taken_branch_cost. */
646 };
647
648 /* For some reason, Athlon deals better with REP prefix (relative to loops)
649 compared to K8. Alignment becomes important after 8 bytes for memcpy and
650 128 bytes for memset. */
651 static stringop_algs athlon_memcpy[2] = {
652 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
653 DUMMY_STRINGOP_ALGS};
654 static stringop_algs athlon_memset[2] = {
655 {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
656 DUMMY_STRINGOP_ALGS};
657 static const
658 struct processor_costs athlon_cost = {
659 COSTS_N_INSNS (1), /* cost of an add instruction */
660 COSTS_N_INSNS (2), /* cost of a lea instruction */
661 COSTS_N_INSNS (1), /* variable shift costs */
662 COSTS_N_INSNS (1), /* constant shift costs */
663 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
664 COSTS_N_INSNS (5), /* HI */
665 COSTS_N_INSNS (5), /* SI */
666 COSTS_N_INSNS (5), /* DI */
667 COSTS_N_INSNS (5)}, /* other */
668 0, /* cost of multiply per each bit set */
669 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
670 COSTS_N_INSNS (26), /* HI */
671 COSTS_N_INSNS (42), /* SI */
672 COSTS_N_INSNS (74), /* DI */
673 COSTS_N_INSNS (74)}, /* other */
674 COSTS_N_INSNS (1), /* cost of movsx */
675 COSTS_N_INSNS (1), /* cost of movzx */
676 8, /* "large" insn */
677 9, /* MOVE_RATIO */
678 4, /* cost for loading QImode using movzbl */
679 {3, 4, 3}, /* cost of loading integer registers
680 in QImode, HImode and SImode.
681 Relative to reg-reg move (2). */
682 {3, 4, 3}, /* cost of storing integer registers */
683 4, /* cost of reg,reg fld/fst */
684 {4, 4, 12}, /* cost of loading fp registers
685 in SFmode, DFmode and XFmode */
686 {6, 6, 8}, /* cost of storing fp registers
687 in SFmode, DFmode and XFmode */
688 2, /* cost of moving MMX register */
689 {4, 4}, /* cost of loading MMX registers
690 in SImode and DImode */
691 {4, 4}, /* cost of storing MMX registers
692 in SImode and DImode */
693 2, /* cost of moving SSE register */
694 {4, 4, 6}, /* cost of loading SSE registers
695 in SImode, DImode and TImode */
696 {4, 4, 5}, /* cost of storing SSE registers
697 in SImode, DImode and TImode */
698 5, /* MMX or SSE register to integer */
699 64, /* size of l1 cache. */
700 256, /* size of l2 cache. */
701 64, /* size of prefetch block */
702 6, /* number of parallel prefetches */
703 5, /* Branch cost */
704 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
705 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
706 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
707 COSTS_N_INSNS (2), /* cost of FABS instruction. */
708 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
709 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
710 athlon_memcpy,
711 athlon_memset,
712 1, /* scalar_stmt_cost. */
713 1, /* scalar load_cost. */
714 1, /* scalar_store_cost. */
715 1, /* vec_stmt_cost. */
716 1, /* vec_to_scalar_cost. */
717 1, /* scalar_to_vec_cost. */
718 1, /* vec_align_load_cost. */
719 2, /* vec_unalign_load_cost. */
720 1, /* vec_store_cost. */
721 3, /* cond_taken_branch_cost. */
722 1, /* cond_not_taken_branch_cost. */
723 };
724
725 /* K8 has optimized REP instruction for medium sized blocks, but for very
726 small blocks it is better to use loop. For large blocks, libcall can
727 do nontemporary accesses and beat inline considerably. */
728 static stringop_algs k8_memcpy[2] = {
729 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
730 {-1, rep_prefix_4_byte, false}}},
731 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
732 {-1, libcall, false}}}};
733 static stringop_algs k8_memset[2] = {
734 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
735 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
736 {libcall, {{48, unrolled_loop, false},
737 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
738 static const
739 struct processor_costs k8_cost = {
740 COSTS_N_INSNS (1), /* cost of an add instruction */
741 COSTS_N_INSNS (2), /* cost of a lea instruction */
742 COSTS_N_INSNS (1), /* variable shift costs */
743 COSTS_N_INSNS (1), /* constant shift costs */
744 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
745 COSTS_N_INSNS (4), /* HI */
746 COSTS_N_INSNS (3), /* SI */
747 COSTS_N_INSNS (4), /* DI */
748 COSTS_N_INSNS (5)}, /* other */
749 0, /* cost of multiply per each bit set */
750 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
751 COSTS_N_INSNS (26), /* HI */
752 COSTS_N_INSNS (42), /* SI */
753 COSTS_N_INSNS (74), /* DI */
754 COSTS_N_INSNS (74)}, /* other */
755 COSTS_N_INSNS (1), /* cost of movsx */
756 COSTS_N_INSNS (1), /* cost of movzx */
757 8, /* "large" insn */
758 9, /* MOVE_RATIO */
759 4, /* cost for loading QImode using movzbl */
760 {3, 4, 3}, /* cost of loading integer registers
761 in QImode, HImode and SImode.
762 Relative to reg-reg move (2). */
763 {3, 4, 3}, /* cost of storing integer registers */
764 4, /* cost of reg,reg fld/fst */
765 {4, 4, 12}, /* cost of loading fp registers
766 in SFmode, DFmode and XFmode */
767 {6, 6, 8}, /* cost of storing fp registers
768 in SFmode, DFmode and XFmode */
769 2, /* cost of moving MMX register */
770 {3, 3}, /* cost of loading MMX registers
771 in SImode and DImode */
772 {4, 4}, /* cost of storing MMX registers
773 in SImode and DImode */
774 2, /* cost of moving SSE register */
775 {4, 3, 6}, /* cost of loading SSE registers
776 in SImode, DImode and TImode */
777 {4, 4, 5}, /* cost of storing SSE registers
778 in SImode, DImode and TImode */
779 5, /* MMX or SSE register to integer */
780 64, /* size of l1 cache. */
781 512, /* size of l2 cache. */
782 64, /* size of prefetch block */
783 /* New AMD processors never drop prefetches; if they cannot be performed
784 immediately, they are queued. We set number of simultaneous prefetches
785 to a large constant to reflect this (it probably is not a good idea not
786 to limit number of prefetches at all, as their execution also takes some
787 time). */
788 100, /* number of parallel prefetches */
789 3, /* Branch cost */
790 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
791 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
792 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
793 COSTS_N_INSNS (2), /* cost of FABS instruction. */
794 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
795 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
796
797 k8_memcpy,
798 k8_memset,
799 4, /* scalar_stmt_cost. */
800 2, /* scalar load_cost. */
801 2, /* scalar_store_cost. */
802 5, /* vec_stmt_cost. */
803 0, /* vec_to_scalar_cost. */
804 2, /* scalar_to_vec_cost. */
805 2, /* vec_align_load_cost. */
806 3, /* vec_unalign_load_cost. */
807 3, /* vec_store_cost. */
808 3, /* cond_taken_branch_cost. */
809 2, /* cond_not_taken_branch_cost. */
810 };
811
812 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
813 very small blocks it is better to use loop. For large blocks, libcall can
814 do nontemporary accesses and beat inline considerably. */
815 static stringop_algs amdfam10_memcpy[2] = {
816 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
817 {-1, rep_prefix_4_byte, false}}},
818 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
819 {-1, libcall, false}}}};
820 static stringop_algs amdfam10_memset[2] = {
821 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
822 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
823 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
824 {-1, libcall, false}}}};
825 struct processor_costs amdfam10_cost = {
826 COSTS_N_INSNS (1), /* cost of an add instruction */
827 COSTS_N_INSNS (2), /* cost of a lea instruction */
828 COSTS_N_INSNS (1), /* variable shift costs */
829 COSTS_N_INSNS (1), /* constant shift costs */
830 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
831 COSTS_N_INSNS (4), /* HI */
832 COSTS_N_INSNS (3), /* SI */
833 COSTS_N_INSNS (4), /* DI */
834 COSTS_N_INSNS (5)}, /* other */
835 0, /* cost of multiply per each bit set */
836 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
837 COSTS_N_INSNS (35), /* HI */
838 COSTS_N_INSNS (51), /* SI */
839 COSTS_N_INSNS (83), /* DI */
840 COSTS_N_INSNS (83)}, /* other */
841 COSTS_N_INSNS (1), /* cost of movsx */
842 COSTS_N_INSNS (1), /* cost of movzx */
843 8, /* "large" insn */
844 9, /* MOVE_RATIO */
845 4, /* cost for loading QImode using movzbl */
846 {3, 4, 3}, /* cost of loading integer registers
847 in QImode, HImode and SImode.
848 Relative to reg-reg move (2). */
849 {3, 4, 3}, /* cost of storing integer registers */
850 4, /* cost of reg,reg fld/fst */
851 {4, 4, 12}, /* cost of loading fp registers
852 in SFmode, DFmode and XFmode */
853 {6, 6, 8}, /* cost of storing fp registers
854 in SFmode, DFmode and XFmode */
855 2, /* cost of moving MMX register */
856 {3, 3}, /* cost of loading MMX registers
857 in SImode and DImode */
858 {4, 4}, /* cost of storing MMX registers
859 in SImode and DImode */
860 2, /* cost of moving SSE register */
861 {4, 4, 3}, /* cost of loading SSE registers
862 in SImode, DImode and TImode */
863 {4, 4, 5}, /* cost of storing SSE registers
864 in SImode, DImode and TImode */
865 3, /* MMX or SSE register to integer */
866 /* On K8:
867 MOVD reg64, xmmreg Double FSTORE 4
868 MOVD reg32, xmmreg Double FSTORE 4
869 On AMDFAM10:
870 MOVD reg64, xmmreg Double FADD 3
871 1/1 1/1
872 MOVD reg32, xmmreg Double FADD 3
873 1/1 1/1 */
874 64, /* size of l1 cache. */
875 512, /* size of l2 cache. */
876 64, /* size of prefetch block */
877 /* New AMD processors never drop prefetches; if they cannot be performed
878 immediately, they are queued. We set number of simultaneous prefetches
879 to a large constant to reflect this (it probably is not a good idea not
880 to limit number of prefetches at all, as their execution also takes some
881 time). */
882 100, /* number of parallel prefetches */
883 2, /* Branch cost */
884 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
885 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
886 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
887 COSTS_N_INSNS (2), /* cost of FABS instruction. */
888 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
889 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
890
891 amdfam10_memcpy,
892 amdfam10_memset,
893 4, /* scalar_stmt_cost. */
894 2, /* scalar load_cost. */
895 2, /* scalar_store_cost. */
896 6, /* vec_stmt_cost. */
897 0, /* vec_to_scalar_cost. */
898 2, /* scalar_to_vec_cost. */
899 2, /* vec_align_load_cost. */
900 2, /* vec_unalign_load_cost. */
901 2, /* vec_store_cost. */
902 2, /* cond_taken_branch_cost. */
903 1, /* cond_not_taken_branch_cost. */
904 };
905
906 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
907 very small blocks it is better to use loop. For large blocks, libcall
908 can do nontemporary accesses and beat inline considerably. */
909 static stringop_algs bdver1_memcpy[2] = {
910 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
911 {-1, rep_prefix_4_byte, false}}},
912 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
913 {-1, libcall, false}}}};
914 static stringop_algs bdver1_memset[2] = {
915 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
916 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
917 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
918 {-1, libcall, false}}}};
919
920 const struct processor_costs bdver1_cost = {
921 COSTS_N_INSNS (1), /* cost of an add instruction */
922 COSTS_N_INSNS (1), /* cost of a lea instruction */
923 COSTS_N_INSNS (1), /* variable shift costs */
924 COSTS_N_INSNS (1), /* constant shift costs */
925 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
926 COSTS_N_INSNS (4), /* HI */
927 COSTS_N_INSNS (4), /* SI */
928 COSTS_N_INSNS (6), /* DI */
929 COSTS_N_INSNS (6)}, /* other */
930 0, /* cost of multiply per each bit set */
931 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
932 COSTS_N_INSNS (35), /* HI */
933 COSTS_N_INSNS (51), /* SI */
934 COSTS_N_INSNS (83), /* DI */
935 COSTS_N_INSNS (83)}, /* other */
936 COSTS_N_INSNS (1), /* cost of movsx */
937 COSTS_N_INSNS (1), /* cost of movzx */
938 8, /* "large" insn */
939 9, /* MOVE_RATIO */
940 4, /* cost for loading QImode using movzbl */
941 {5, 5, 4}, /* cost of loading integer registers
942 in QImode, HImode and SImode.
943 Relative to reg-reg move (2). */
944 {4, 4, 4}, /* cost of storing integer registers */
945 2, /* cost of reg,reg fld/fst */
946 {5, 5, 12}, /* cost of loading fp registers
947 in SFmode, DFmode and XFmode */
948 {4, 4, 8}, /* cost of storing fp registers
949 in SFmode, DFmode and XFmode */
950 2, /* cost of moving MMX register */
951 {4, 4}, /* cost of loading MMX registers
952 in SImode and DImode */
953 {4, 4}, /* cost of storing MMX registers
954 in SImode and DImode */
955 2, /* cost of moving SSE register */
956 {4, 4, 4}, /* cost of loading SSE registers
957 in SImode, DImode and TImode */
958 {4, 4, 4}, /* cost of storing SSE registers
959 in SImode, DImode and TImode */
960 2, /* MMX or SSE register to integer */
961 /* On K8:
962 MOVD reg64, xmmreg Double FSTORE 4
963 MOVD reg32, xmmreg Double FSTORE 4
964 On AMDFAM10:
965 MOVD reg64, xmmreg Double FADD 3
966 1/1 1/1
967 MOVD reg32, xmmreg Double FADD 3
968 1/1 1/1 */
969 16, /* size of l1 cache. */
970 2048, /* size of l2 cache. */
971 64, /* size of prefetch block */
972 /* New AMD processors never drop prefetches; if they cannot be performed
973 immediately, they are queued. We set number of simultaneous prefetches
974 to a large constant to reflect this (it probably is not a good idea not
975 to limit number of prefetches at all, as their execution also takes some
976 time). */
977 100, /* number of parallel prefetches */
978 2, /* Branch cost */
979 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
980 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
981 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
982 COSTS_N_INSNS (2), /* cost of FABS instruction. */
983 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
984 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
985
986 bdver1_memcpy,
987 bdver1_memset,
988 6, /* scalar_stmt_cost. */
989 4, /* scalar load_cost. */
990 4, /* scalar_store_cost. */
991 6, /* vec_stmt_cost. */
992 0, /* vec_to_scalar_cost. */
993 2, /* scalar_to_vec_cost. */
994 4, /* vec_align_load_cost. */
995 4, /* vec_unalign_load_cost. */
996 4, /* vec_store_cost. */
997 2, /* cond_taken_branch_cost. */
998 1, /* cond_not_taken_branch_cost. */
999 };
1000
1001 /* BDVER2 has optimized REP instruction for medium sized blocks, but for
1002 very small blocks it is better to use loop. For large blocks, libcall
1003 can do nontemporary accesses and beat inline considerably. */
1004
1005 static stringop_algs bdver2_memcpy[2] = {
1006 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1007 {-1, rep_prefix_4_byte, false}}},
1008 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1009 {-1, libcall, false}}}};
1010 static stringop_algs bdver2_memset[2] = {
1011 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1012 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1013 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1014 {-1, libcall, false}}}};
1015
1016 const struct processor_costs bdver2_cost = {
1017 COSTS_N_INSNS (1), /* cost of an add instruction */
1018 COSTS_N_INSNS (1), /* cost of a lea instruction */
1019 COSTS_N_INSNS (1), /* variable shift costs */
1020 COSTS_N_INSNS (1), /* constant shift costs */
1021 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1022 COSTS_N_INSNS (4), /* HI */
1023 COSTS_N_INSNS (4), /* SI */
1024 COSTS_N_INSNS (6), /* DI */
1025 COSTS_N_INSNS (6)}, /* other */
1026 0, /* cost of multiply per each bit set */
1027 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1028 COSTS_N_INSNS (35), /* HI */
1029 COSTS_N_INSNS (51), /* SI */
1030 COSTS_N_INSNS (83), /* DI */
1031 COSTS_N_INSNS (83)}, /* other */
1032 COSTS_N_INSNS (1), /* cost of movsx */
1033 COSTS_N_INSNS (1), /* cost of movzx */
1034 8, /* "large" insn */
1035 9, /* MOVE_RATIO */
1036 4, /* cost for loading QImode using movzbl */
1037 {5, 5, 4}, /* cost of loading integer registers
1038 in QImode, HImode and SImode.
1039 Relative to reg-reg move (2). */
1040 {4, 4, 4}, /* cost of storing integer registers */
1041 2, /* cost of reg,reg fld/fst */
1042 {5, 5, 12}, /* cost of loading fp registers
1043 in SFmode, DFmode and XFmode */
1044 {4, 4, 8}, /* cost of storing fp registers
1045 in SFmode, DFmode and XFmode */
1046 2, /* cost of moving MMX register */
1047 {4, 4}, /* cost of loading MMX registers
1048 in SImode and DImode */
1049 {4, 4}, /* cost of storing MMX registers
1050 in SImode and DImode */
1051 2, /* cost of moving SSE register */
1052 {4, 4, 4}, /* cost of loading SSE registers
1053 in SImode, DImode and TImode */
1054 {4, 4, 4}, /* cost of storing SSE registers
1055 in SImode, DImode and TImode */
1056 2, /* MMX or SSE register to integer */
1057 /* On K8:
1058 MOVD reg64, xmmreg Double FSTORE 4
1059 MOVD reg32, xmmreg Double FSTORE 4
1060 On AMDFAM10:
1061 MOVD reg64, xmmreg Double FADD 3
1062 1/1 1/1
1063 MOVD reg32, xmmreg Double FADD 3
1064 1/1 1/1 */
1065 16, /* size of l1 cache. */
1066 2048, /* size of l2 cache. */
1067 64, /* size of prefetch block */
1068 /* New AMD processors never drop prefetches; if they cannot be performed
1069 immediately, they are queued. We set number of simultaneous prefetches
1070 to a large constant to reflect this (it probably is not a good idea not
1071 to limit number of prefetches at all, as their execution also takes some
1072 time). */
1073 100, /* number of parallel prefetches */
1074 2, /* Branch cost */
1075 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1076 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1077 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1078 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1079 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1080 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1081
1082 bdver2_memcpy,
1083 bdver2_memset,
1084 6, /* scalar_stmt_cost. */
1085 4, /* scalar load_cost. */
1086 4, /* scalar_store_cost. */
1087 6, /* vec_stmt_cost. */
1088 0, /* vec_to_scalar_cost. */
1089 2, /* scalar_to_vec_cost. */
1090 4, /* vec_align_load_cost. */
1091 4, /* vec_unalign_load_cost. */
1092 4, /* vec_store_cost. */
1093 2, /* cond_taken_branch_cost. */
1094 1, /* cond_not_taken_branch_cost. */
1095 };
1096
1097
1098 /* BDVER3 has optimized REP instruction for medium sized blocks, but for
1099 very small blocks it is better to use loop. For large blocks, libcall
1100 can do nontemporary accesses and beat inline considerably. */
1101 static stringop_algs bdver3_memcpy[2] = {
1102 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1103 {-1, rep_prefix_4_byte, false}}},
1104 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1105 {-1, libcall, false}}}};
1106 static stringop_algs bdver3_memset[2] = {
1107 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1108 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1109 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1110 {-1, libcall, false}}}};
1111 struct processor_costs bdver3_cost = {
1112 COSTS_N_INSNS (1), /* cost of an add instruction */
1113 COSTS_N_INSNS (1), /* cost of a lea instruction */
1114 COSTS_N_INSNS (1), /* variable shift costs */
1115 COSTS_N_INSNS (1), /* constant shift costs */
1116 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1117 COSTS_N_INSNS (4), /* HI */
1118 COSTS_N_INSNS (4), /* SI */
1119 COSTS_N_INSNS (6), /* DI */
1120 COSTS_N_INSNS (6)}, /* other */
1121 0, /* cost of multiply per each bit set */
1122 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1123 COSTS_N_INSNS (35), /* HI */
1124 COSTS_N_INSNS (51), /* SI */
1125 COSTS_N_INSNS (83), /* DI */
1126 COSTS_N_INSNS (83)}, /* other */
1127 COSTS_N_INSNS (1), /* cost of movsx */
1128 COSTS_N_INSNS (1), /* cost of movzx */
1129 8, /* "large" insn */
1130 9, /* MOVE_RATIO */
1131 4, /* cost for loading QImode using movzbl */
1132 {5, 5, 4}, /* cost of loading integer registers
1133 in QImode, HImode and SImode.
1134 Relative to reg-reg move (2). */
1135 {4, 4, 4}, /* cost of storing integer registers */
1136 2, /* cost of reg,reg fld/fst */
1137 {5, 5, 12}, /* cost of loading fp registers
1138 in SFmode, DFmode and XFmode */
1139 {4, 4, 8}, /* cost of storing fp registers
1140 in SFmode, DFmode and XFmode */
1141 2, /* cost of moving MMX register */
1142 {4, 4}, /* cost of loading MMX registers
1143 in SImode and DImode */
1144 {4, 4}, /* cost of storing MMX registers
1145 in SImode and DImode */
1146 2, /* cost of moving SSE register */
1147 {4, 4, 4}, /* cost of loading SSE registers
1148 in SImode, DImode and TImode */
1149 {4, 4, 4}, /* cost of storing SSE registers
1150 in SImode, DImode and TImode */
1151 2, /* MMX or SSE register to integer */
1152 16, /* size of l1 cache. */
1153 2048, /* size of l2 cache. */
1154 64, /* size of prefetch block */
1155 /* New AMD processors never drop prefetches; if they cannot be performed
1156 immediately, they are queued. We set number of simultaneous prefetches
1157 to a large constant to reflect this (it probably is not a good idea not
1158 to limit number of prefetches at all, as their execution also takes some
1159 time). */
1160 100, /* number of parallel prefetches */
1161 2, /* Branch cost */
1162 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1168
1169 bdver3_memcpy,
1170 bdver3_memset,
1171 6, /* scalar_stmt_cost. */
1172 4, /* scalar load_cost. */
1173 4, /* scalar_store_cost. */
1174 6, /* vec_stmt_cost. */
1175 0, /* vec_to_scalar_cost. */
1176 2, /* scalar_to_vec_cost. */
1177 4, /* vec_align_load_cost. */
1178 4, /* vec_unalign_load_cost. */
1179 4, /* vec_store_cost. */
1180 2, /* cond_taken_branch_cost. */
1181 1, /* cond_not_taken_branch_cost. */
1182 };
1183
1184 /* BDVER4 has optimized REP instruction for medium sized blocks, but for
1185 very small blocks it is better to use loop. For large blocks, libcall
1186 can do nontemporary accesses and beat inline considerably. */
1187 static stringop_algs bdver4_memcpy[2] = {
1188 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1189 {-1, rep_prefix_4_byte, false}}},
1190 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1191 {-1, libcall, false}}}};
1192 static stringop_algs bdver4_memset[2] = {
1193 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1194 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1195 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1196 {-1, libcall, false}}}};
1197 struct processor_costs bdver4_cost = {
1198 COSTS_N_INSNS (1), /* cost of an add instruction */
1199 COSTS_N_INSNS (1), /* cost of a lea instruction */
1200 COSTS_N_INSNS (1), /* variable shift costs */
1201 COSTS_N_INSNS (1), /* constant shift costs */
1202 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
1203 COSTS_N_INSNS (4), /* HI */
1204 COSTS_N_INSNS (4), /* SI */
1205 COSTS_N_INSNS (6), /* DI */
1206 COSTS_N_INSNS (6)}, /* other */
1207 0, /* cost of multiply per each bit set */
1208 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1209 COSTS_N_INSNS (35), /* HI */
1210 COSTS_N_INSNS (51), /* SI */
1211 COSTS_N_INSNS (83), /* DI */
1212 COSTS_N_INSNS (83)}, /* other */
1213 COSTS_N_INSNS (1), /* cost of movsx */
1214 COSTS_N_INSNS (1), /* cost of movzx */
1215 8, /* "large" insn */
1216 9, /* MOVE_RATIO */
1217 4, /* cost for loading QImode using movzbl */
1218 {5, 5, 4}, /* cost of loading integer registers
1219 in QImode, HImode and SImode.
1220 Relative to reg-reg move (2). */
1221 {4, 4, 4}, /* cost of storing integer registers */
1222 2, /* cost of reg,reg fld/fst */
1223 {5, 5, 12}, /* cost of loading fp registers
1224 in SFmode, DFmode and XFmode */
1225 {4, 4, 8}, /* cost of storing fp registers
1226 in SFmode, DFmode and XFmode */
1227 2, /* cost of moving MMX register */
1228 {4, 4}, /* cost of loading MMX registers
1229 in SImode and DImode */
1230 {4, 4}, /* cost of storing MMX registers
1231 in SImode and DImode */
1232 2, /* cost of moving SSE register */
1233 {4, 4, 4}, /* cost of loading SSE registers
1234 in SImode, DImode and TImode */
1235 {4, 4, 4}, /* cost of storing SSE registers
1236 in SImode, DImode and TImode */
1237 2, /* MMX or SSE register to integer */
1238 16, /* size of l1 cache. */
1239 2048, /* size of l2 cache. */
1240 64, /* size of prefetch block */
1241 /* New AMD processors never drop prefetches; if they cannot be performed
1242 immediately, they are queued. We set number of simultaneous prefetches
1243 to a large constant to reflect this (it probably is not a good idea not
1244 to limit number of prefetches at all, as their execution also takes some
1245 time). */
1246 100, /* number of parallel prefetches */
1247 2, /* Branch cost */
1248 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1249 COSTS_N_INSNS (6), /* cost of FMUL instruction. */
1250 COSTS_N_INSNS (42), /* cost of FDIV instruction. */
1251 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1252 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1253 COSTS_N_INSNS (52), /* cost of FSQRT instruction. */
1254
1255 bdver4_memcpy,
1256 bdver4_memset,
1257 6, /* scalar_stmt_cost. */
1258 4, /* scalar load_cost. */
1259 4, /* scalar_store_cost. */
1260 6, /* vec_stmt_cost. */
1261 0, /* vec_to_scalar_cost. */
1262 2, /* scalar_to_vec_cost. */
1263 4, /* vec_align_load_cost. */
1264 4, /* vec_unalign_load_cost. */
1265 4, /* vec_store_cost. */
1266 2, /* cond_taken_branch_cost. */
1267 1, /* cond_not_taken_branch_cost. */
1268 };
1269
1270 /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1271 very small blocks it is better to use loop. For large blocks, libcall can
1272 do nontemporary accesses and beat inline considerably. */
1273 static stringop_algs btver1_memcpy[2] = {
1274 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1275 {-1, rep_prefix_4_byte, false}}},
1276 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1277 {-1, libcall, false}}}};
1278 static stringop_algs btver1_memset[2] = {
1279 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1280 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1281 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1282 {-1, libcall, false}}}};
1283 const struct processor_costs btver1_cost = {
1284 COSTS_N_INSNS (1), /* cost of an add instruction */
1285 COSTS_N_INSNS (2), /* cost of a lea instruction */
1286 COSTS_N_INSNS (1), /* variable shift costs */
1287 COSTS_N_INSNS (1), /* constant shift costs */
1288 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1289 COSTS_N_INSNS (4), /* HI */
1290 COSTS_N_INSNS (3), /* SI */
1291 COSTS_N_INSNS (4), /* DI */
1292 COSTS_N_INSNS (5)}, /* other */
1293 0, /* cost of multiply per each bit set */
1294 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1295 COSTS_N_INSNS (35), /* HI */
1296 COSTS_N_INSNS (51), /* SI */
1297 COSTS_N_INSNS (83), /* DI */
1298 COSTS_N_INSNS (83)}, /* other */
1299 COSTS_N_INSNS (1), /* cost of movsx */
1300 COSTS_N_INSNS (1), /* cost of movzx */
1301 8, /* "large" insn */
1302 9, /* MOVE_RATIO */
1303 4, /* cost for loading QImode using movzbl */
1304 {3, 4, 3}, /* cost of loading integer registers
1305 in QImode, HImode and SImode.
1306 Relative to reg-reg move (2). */
1307 {3, 4, 3}, /* cost of storing integer registers */
1308 4, /* cost of reg,reg fld/fst */
1309 {4, 4, 12}, /* cost of loading fp registers
1310 in SFmode, DFmode and XFmode */
1311 {6, 6, 8}, /* cost of storing fp registers
1312 in SFmode, DFmode and XFmode */
1313 2, /* cost of moving MMX register */
1314 {3, 3}, /* cost of loading MMX registers
1315 in SImode and DImode */
1316 {4, 4}, /* cost of storing MMX registers
1317 in SImode and DImode */
1318 2, /* cost of moving SSE register */
1319 {4, 4, 3}, /* cost of loading SSE registers
1320 in SImode, DImode and TImode */
1321 {4, 4, 5}, /* cost of storing SSE registers
1322 in SImode, DImode and TImode */
1323 3, /* MMX or SSE register to integer */
1324 /* On K8:
1325 MOVD reg64, xmmreg Double FSTORE 4
1326 MOVD reg32, xmmreg Double FSTORE 4
1327 On AMDFAM10:
1328 MOVD reg64, xmmreg Double FADD 3
1329 1/1 1/1
1330 MOVD reg32, xmmreg Double FADD 3
1331 1/1 1/1 */
1332 32, /* size of l1 cache. */
1333 512, /* size of l2 cache. */
1334 64, /* size of prefetch block */
1335 100, /* number of parallel prefetches */
1336 2, /* Branch cost */
1337 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1338 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1339 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1340 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1341 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1342 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1343
1344 btver1_memcpy,
1345 btver1_memset,
1346 4, /* scalar_stmt_cost. */
1347 2, /* scalar load_cost. */
1348 2, /* scalar_store_cost. */
1349 6, /* vec_stmt_cost. */
1350 0, /* vec_to_scalar_cost. */
1351 2, /* scalar_to_vec_cost. */
1352 2, /* vec_align_load_cost. */
1353 2, /* vec_unalign_load_cost. */
1354 2, /* vec_store_cost. */
1355 2, /* cond_taken_branch_cost. */
1356 1, /* cond_not_taken_branch_cost. */
1357 };
1358
1359 static stringop_algs btver2_memcpy[2] = {
1360 {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1361 {-1, rep_prefix_4_byte, false}}},
1362 {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1363 {-1, libcall, false}}}};
1364 static stringop_algs btver2_memset[2] = {
1365 {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1366 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1367 {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1368 {-1, libcall, false}}}};
1369 const struct processor_costs btver2_cost = {
1370 COSTS_N_INSNS (1), /* cost of an add instruction */
1371 COSTS_N_INSNS (2), /* cost of a lea instruction */
1372 COSTS_N_INSNS (1), /* variable shift costs */
1373 COSTS_N_INSNS (1), /* constant shift costs */
1374 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1375 COSTS_N_INSNS (4), /* HI */
1376 COSTS_N_INSNS (3), /* SI */
1377 COSTS_N_INSNS (4), /* DI */
1378 COSTS_N_INSNS (5)}, /* other */
1379 0, /* cost of multiply per each bit set */
1380 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
1381 COSTS_N_INSNS (35), /* HI */
1382 COSTS_N_INSNS (51), /* SI */
1383 COSTS_N_INSNS (83), /* DI */
1384 COSTS_N_INSNS (83)}, /* other */
1385 COSTS_N_INSNS (1), /* cost of movsx */
1386 COSTS_N_INSNS (1), /* cost of movzx */
1387 8, /* "large" insn */
1388 9, /* MOVE_RATIO */
1389 4, /* cost for loading QImode using movzbl */
1390 {3, 4, 3}, /* cost of loading integer registers
1391 in QImode, HImode and SImode.
1392 Relative to reg-reg move (2). */
1393 {3, 4, 3}, /* cost of storing integer registers */
1394 4, /* cost of reg,reg fld/fst */
1395 {4, 4, 12}, /* cost of loading fp registers
1396 in SFmode, DFmode and XFmode */
1397 {6, 6, 8}, /* cost of storing fp registers
1398 in SFmode, DFmode and XFmode */
1399 2, /* cost of moving MMX register */
1400 {3, 3}, /* cost of loading MMX registers
1401 in SImode and DImode */
1402 {4, 4}, /* cost of storing MMX registers
1403 in SImode and DImode */
1404 2, /* cost of moving SSE register */
1405 {4, 4, 3}, /* cost of loading SSE registers
1406 in SImode, DImode and TImode */
1407 {4, 4, 5}, /* cost of storing SSE registers
1408 in SImode, DImode and TImode */
1409 3, /* MMX or SSE register to integer */
1410 /* On K8:
1411 MOVD reg64, xmmreg Double FSTORE 4
1412 MOVD reg32, xmmreg Double FSTORE 4
1413 On AMDFAM10:
1414 MOVD reg64, xmmreg Double FADD 3
1415 1/1 1/1
1416 MOVD reg32, xmmreg Double FADD 3
1417 1/1 1/1 */
1418 32, /* size of l1 cache. */
1419 2048, /* size of l2 cache. */
1420 64, /* size of prefetch block */
1421 100, /* number of parallel prefetches */
1422 2, /* Branch cost */
1423 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
1424 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
1425 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
1426 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1427 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1428 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
1429 btver2_memcpy,
1430 btver2_memset,
1431 4, /* scalar_stmt_cost. */
1432 2, /* scalar load_cost. */
1433 2, /* scalar_store_cost. */
1434 6, /* vec_stmt_cost. */
1435 0, /* vec_to_scalar_cost. */
1436 2, /* scalar_to_vec_cost. */
1437 2, /* vec_align_load_cost. */
1438 2, /* vec_unalign_load_cost. */
1439 2, /* vec_store_cost. */
1440 2, /* cond_taken_branch_cost. */
1441 1, /* cond_not_taken_branch_cost. */
1442 };
1443
1444 static stringop_algs pentium4_memcpy[2] = {
1445 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1446 DUMMY_STRINGOP_ALGS};
1447 static stringop_algs pentium4_memset[2] = {
1448 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1449 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1450 DUMMY_STRINGOP_ALGS};
1451
1452 static const
1453 struct processor_costs pentium4_cost = {
1454 COSTS_N_INSNS (1), /* cost of an add instruction */
1455 COSTS_N_INSNS (3), /* cost of a lea instruction */
1456 COSTS_N_INSNS (4), /* variable shift costs */
1457 COSTS_N_INSNS (4), /* constant shift costs */
1458 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
1459 COSTS_N_INSNS (15), /* HI */
1460 COSTS_N_INSNS (15), /* SI */
1461 COSTS_N_INSNS (15), /* DI */
1462 COSTS_N_INSNS (15)}, /* other */
1463 0, /* cost of multiply per each bit set */
1464 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
1465 COSTS_N_INSNS (56), /* HI */
1466 COSTS_N_INSNS (56), /* SI */
1467 COSTS_N_INSNS (56), /* DI */
1468 COSTS_N_INSNS (56)}, /* other */
1469 COSTS_N_INSNS (1), /* cost of movsx */
1470 COSTS_N_INSNS (1), /* cost of movzx */
1471 16, /* "large" insn */
1472 6, /* MOVE_RATIO */
1473 2, /* cost for loading QImode using movzbl */
1474 {4, 5, 4}, /* cost of loading integer registers
1475 in QImode, HImode and SImode.
1476 Relative to reg-reg move (2). */
1477 {2, 3, 2}, /* cost of storing integer registers */
1478 2, /* cost of reg,reg fld/fst */
1479 {2, 2, 6}, /* cost of loading fp registers
1480 in SFmode, DFmode and XFmode */
1481 {4, 4, 6}, /* cost of storing fp registers
1482 in SFmode, DFmode and XFmode */
1483 2, /* cost of moving MMX register */
1484 {2, 2}, /* cost of loading MMX registers
1485 in SImode and DImode */
1486 {2, 2}, /* cost of storing MMX registers
1487 in SImode and DImode */
1488 12, /* cost of moving SSE register */
1489 {12, 12, 12}, /* cost of loading SSE registers
1490 in SImode, DImode and TImode */
1491 {2, 2, 8}, /* cost of storing SSE registers
1492 in SImode, DImode and TImode */
1493 10, /* MMX or SSE register to integer */
1494 8, /* size of l1 cache. */
1495 256, /* size of l2 cache. */
1496 64, /* size of prefetch block */
1497 6, /* number of parallel prefetches */
1498 2, /* Branch cost */
1499 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
1500 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
1501 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
1502 COSTS_N_INSNS (2), /* cost of FABS instruction. */
1503 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
1504 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
1505 pentium4_memcpy,
1506 pentium4_memset,
1507 1, /* scalar_stmt_cost. */
1508 1, /* scalar load_cost. */
1509 1, /* scalar_store_cost. */
1510 1, /* vec_stmt_cost. */
1511 1, /* vec_to_scalar_cost. */
1512 1, /* scalar_to_vec_cost. */
1513 1, /* vec_align_load_cost. */
1514 2, /* vec_unalign_load_cost. */
1515 1, /* vec_store_cost. */
1516 3, /* cond_taken_branch_cost. */
1517 1, /* cond_not_taken_branch_cost. */
1518 };
1519
1520 static stringop_algs nocona_memcpy[2] = {
1521 {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1522 {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1523 {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1524
1525 static stringop_algs nocona_memset[2] = {
1526 {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1527 {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1528 {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1529 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1530
1531 static const
1532 struct processor_costs nocona_cost = {
1533 COSTS_N_INSNS (1), /* cost of an add instruction */
1534 COSTS_N_INSNS (1), /* cost of a lea instruction */
1535 COSTS_N_INSNS (1), /* variable shift costs */
1536 COSTS_N_INSNS (1), /* constant shift costs */
1537 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
1538 COSTS_N_INSNS (10), /* HI */
1539 COSTS_N_INSNS (10), /* SI */
1540 COSTS_N_INSNS (10), /* DI */
1541 COSTS_N_INSNS (10)}, /* other */
1542 0, /* cost of multiply per each bit set */
1543 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
1544 COSTS_N_INSNS (66), /* HI */
1545 COSTS_N_INSNS (66), /* SI */
1546 COSTS_N_INSNS (66), /* DI */
1547 COSTS_N_INSNS (66)}, /* other */
1548 COSTS_N_INSNS (1), /* cost of movsx */
1549 COSTS_N_INSNS (1), /* cost of movzx */
1550 16, /* "large" insn */
1551 17, /* MOVE_RATIO */
1552 4, /* cost for loading QImode using movzbl */
1553 {4, 4, 4}, /* cost of loading integer registers
1554 in QImode, HImode and SImode.
1555 Relative to reg-reg move (2). */
1556 {4, 4, 4}, /* cost of storing integer registers */
1557 3, /* cost of reg,reg fld/fst */
1558 {12, 12, 12}, /* cost of loading fp registers
1559 in SFmode, DFmode and XFmode */
1560 {4, 4, 4}, /* cost of storing fp registers
1561 in SFmode, DFmode and XFmode */
1562 6, /* cost of moving MMX register */
1563 {12, 12}, /* cost of loading MMX registers
1564 in SImode and DImode */
1565 {12, 12}, /* cost of storing MMX registers
1566 in SImode and DImode */
1567 6, /* cost of moving SSE register */
1568 {12, 12, 12}, /* cost of loading SSE registers
1569 in SImode, DImode and TImode */
1570 {12, 12, 12}, /* cost of storing SSE registers
1571 in SImode, DImode and TImode */
1572 8, /* MMX or SSE register to integer */
1573 8, /* size of l1 cache. */
1574 1024, /* size of l2 cache. */
1575 64, /* size of prefetch block */
1576 8, /* number of parallel prefetches */
1577 1, /* Branch cost */
1578 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1579 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1580 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1581 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1582 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1583 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1584 nocona_memcpy,
1585 nocona_memset,
1586 1, /* scalar_stmt_cost. */
1587 1, /* scalar load_cost. */
1588 1, /* scalar_store_cost. */
1589 1, /* vec_stmt_cost. */
1590 1, /* vec_to_scalar_cost. */
1591 1, /* scalar_to_vec_cost. */
1592 1, /* vec_align_load_cost. */
1593 2, /* vec_unalign_load_cost. */
1594 1, /* vec_store_cost. */
1595 3, /* cond_taken_branch_cost. */
1596 1, /* cond_not_taken_branch_cost. */
1597 };
1598
1599 static stringop_algs atom_memcpy[2] = {
1600 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1601 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1602 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1603 static stringop_algs atom_memset[2] = {
1604 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1605 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1606 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1607 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1608 static const
1609 struct processor_costs atom_cost = {
1610 COSTS_N_INSNS (1), /* cost of an add instruction */
1611 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1612 COSTS_N_INSNS (1), /* variable shift costs */
1613 COSTS_N_INSNS (1), /* constant shift costs */
1614 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1615 COSTS_N_INSNS (4), /* HI */
1616 COSTS_N_INSNS (3), /* SI */
1617 COSTS_N_INSNS (4), /* DI */
1618 COSTS_N_INSNS (2)}, /* other */
1619 0, /* cost of multiply per each bit set */
1620 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1621 COSTS_N_INSNS (26), /* HI */
1622 COSTS_N_INSNS (42), /* SI */
1623 COSTS_N_INSNS (74), /* DI */
1624 COSTS_N_INSNS (74)}, /* other */
1625 COSTS_N_INSNS (1), /* cost of movsx */
1626 COSTS_N_INSNS (1), /* cost of movzx */
1627 8, /* "large" insn */
1628 17, /* MOVE_RATIO */
1629 4, /* cost for loading QImode using movzbl */
1630 {4, 4, 4}, /* cost of loading integer registers
1631 in QImode, HImode and SImode.
1632 Relative to reg-reg move (2). */
1633 {4, 4, 4}, /* cost of storing integer registers */
1634 4, /* cost of reg,reg fld/fst */
1635 {12, 12, 12}, /* cost of loading fp registers
1636 in SFmode, DFmode and XFmode */
1637 {6, 6, 8}, /* cost of storing fp registers
1638 in SFmode, DFmode and XFmode */
1639 2, /* cost of moving MMX register */
1640 {8, 8}, /* cost of loading MMX registers
1641 in SImode and DImode */
1642 {8, 8}, /* cost of storing MMX registers
1643 in SImode and DImode */
1644 2, /* cost of moving SSE register */
1645 {8, 8, 8}, /* cost of loading SSE registers
1646 in SImode, DImode and TImode */
1647 {8, 8, 8}, /* cost of storing SSE registers
1648 in SImode, DImode and TImode */
1649 5, /* MMX or SSE register to integer */
1650 32, /* size of l1 cache. */
1651 256, /* size of l2 cache. */
1652 64, /* size of prefetch block */
1653 6, /* number of parallel prefetches */
1654 3, /* Branch cost */
1655 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1656 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1657 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1658 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1659 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1660 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1661 atom_memcpy,
1662 atom_memset,
1663 1, /* scalar_stmt_cost. */
1664 1, /* scalar load_cost. */
1665 1, /* scalar_store_cost. */
1666 1, /* vec_stmt_cost. */
1667 1, /* vec_to_scalar_cost. */
1668 1, /* scalar_to_vec_cost. */
1669 1, /* vec_align_load_cost. */
1670 2, /* vec_unalign_load_cost. */
1671 1, /* vec_store_cost. */
1672 3, /* cond_taken_branch_cost. */
1673 1, /* cond_not_taken_branch_cost. */
1674 };
1675
1676 static stringop_algs slm_memcpy[2] = {
1677 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1678 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1679 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1680 static stringop_algs slm_memset[2] = {
1681 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1682 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1683 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1684 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1685 static const
1686 struct processor_costs slm_cost = {
1687 COSTS_N_INSNS (1), /* cost of an add instruction */
1688 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1689 COSTS_N_INSNS (1), /* variable shift costs */
1690 COSTS_N_INSNS (1), /* constant shift costs */
1691 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1692 COSTS_N_INSNS (3), /* HI */
1693 COSTS_N_INSNS (3), /* SI */
1694 COSTS_N_INSNS (4), /* DI */
1695 COSTS_N_INSNS (2)}, /* other */
1696 0, /* cost of multiply per each bit set */
1697 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1698 COSTS_N_INSNS (26), /* HI */
1699 COSTS_N_INSNS (42), /* SI */
1700 COSTS_N_INSNS (74), /* DI */
1701 COSTS_N_INSNS (74)}, /* other */
1702 COSTS_N_INSNS (1), /* cost of movsx */
1703 COSTS_N_INSNS (1), /* cost of movzx */
1704 8, /* "large" insn */
1705 17, /* MOVE_RATIO */
1706 4, /* cost for loading QImode using movzbl */
1707 {4, 4, 4}, /* cost of loading integer registers
1708 in QImode, HImode and SImode.
1709 Relative to reg-reg move (2). */
1710 {4, 4, 4}, /* cost of storing integer registers */
1711 4, /* cost of reg,reg fld/fst */
1712 {12, 12, 12}, /* cost of loading fp registers
1713 in SFmode, DFmode and XFmode */
1714 {6, 6, 8}, /* cost of storing fp registers
1715 in SFmode, DFmode and XFmode */
1716 2, /* cost of moving MMX register */
1717 {8, 8}, /* cost of loading MMX registers
1718 in SImode and DImode */
1719 {8, 8}, /* cost of storing MMX registers
1720 in SImode and DImode */
1721 2, /* cost of moving SSE register */
1722 {8, 8, 8}, /* cost of loading SSE registers
1723 in SImode, DImode and TImode */
1724 {8, 8, 8}, /* cost of storing SSE registers
1725 in SImode, DImode and TImode */
1726 5, /* MMX or SSE register to integer */
1727 32, /* size of l1 cache. */
1728 256, /* size of l2 cache. */
1729 64, /* size of prefetch block */
1730 6, /* number of parallel prefetches */
1731 3, /* Branch cost */
1732 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1733 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1734 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1735 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1736 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1737 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1738 slm_memcpy,
1739 slm_memset,
1740 1, /* scalar_stmt_cost. */
1741 1, /* scalar load_cost. */
1742 1, /* scalar_store_cost. */
1743 1, /* vec_stmt_cost. */
1744 4, /* vec_to_scalar_cost. */
1745 1, /* scalar_to_vec_cost. */
1746 1, /* vec_align_load_cost. */
1747 2, /* vec_unalign_load_cost. */
1748 1, /* vec_store_cost. */
1749 3, /* cond_taken_branch_cost. */
1750 1, /* cond_not_taken_branch_cost. */
1751 };
1752
1753 static stringop_algs intel_memcpy[2] = {
1754 {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1755 {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1756 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1757 static stringop_algs intel_memset[2] = {
1758 {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1759 {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1760 {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1761 {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1762 static const
1763 struct processor_costs intel_cost = {
1764 COSTS_N_INSNS (1), /* cost of an add instruction */
1765 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1766 COSTS_N_INSNS (1), /* variable shift costs */
1767 COSTS_N_INSNS (1), /* constant shift costs */
1768 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1769 COSTS_N_INSNS (3), /* HI */
1770 COSTS_N_INSNS (3), /* SI */
1771 COSTS_N_INSNS (4), /* DI */
1772 COSTS_N_INSNS (2)}, /* other */
1773 0, /* cost of multiply per each bit set */
1774 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1775 COSTS_N_INSNS (26), /* HI */
1776 COSTS_N_INSNS (42), /* SI */
1777 COSTS_N_INSNS (74), /* DI */
1778 COSTS_N_INSNS (74)}, /* other */
1779 COSTS_N_INSNS (1), /* cost of movsx */
1780 COSTS_N_INSNS (1), /* cost of movzx */
1781 8, /* "large" insn */
1782 17, /* MOVE_RATIO */
1783 4, /* cost for loading QImode using movzbl */
1784 {4, 4, 4}, /* cost of loading integer registers
1785 in QImode, HImode and SImode.
1786 Relative to reg-reg move (2). */
1787 {4, 4, 4}, /* cost of storing integer registers */
1788 4, /* cost of reg,reg fld/fst */
1789 {12, 12, 12}, /* cost of loading fp registers
1790 in SFmode, DFmode and XFmode */
1791 {6, 6, 8}, /* cost of storing fp registers
1792 in SFmode, DFmode and XFmode */
1793 2, /* cost of moving MMX register */
1794 {8, 8}, /* cost of loading MMX registers
1795 in SImode and DImode */
1796 {8, 8}, /* cost of storing MMX registers
1797 in SImode and DImode */
1798 2, /* cost of moving SSE register */
1799 {8, 8, 8}, /* cost of loading SSE registers
1800 in SImode, DImode and TImode */
1801 {8, 8, 8}, /* cost of storing SSE registers
1802 in SImode, DImode and TImode */
1803 5, /* MMX or SSE register to integer */
1804 32, /* size of l1 cache. */
1805 256, /* size of l2 cache. */
1806 64, /* size of prefetch block */
1807 6, /* number of parallel prefetches */
1808 3, /* Branch cost */
1809 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1810 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1811 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1812 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1813 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1814 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1815 intel_memcpy,
1816 intel_memset,
1817 1, /* scalar_stmt_cost. */
1818 1, /* scalar load_cost. */
1819 1, /* scalar_store_cost. */
1820 1, /* vec_stmt_cost. */
1821 4, /* vec_to_scalar_cost. */
1822 1, /* scalar_to_vec_cost. */
1823 1, /* vec_align_load_cost. */
1824 2, /* vec_unalign_load_cost. */
1825 1, /* vec_store_cost. */
1826 3, /* cond_taken_branch_cost. */
1827 1, /* cond_not_taken_branch_cost. */
1828 };
1829
1830 /* Generic should produce code tuned for Core-i7 (and newer chips)
1831 and btver1 (and newer chips). */
1832
1833 static stringop_algs generic_memcpy[2] = {
1834 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1835 {-1, libcall, false}}},
1836 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1837 {-1, libcall, false}}}};
1838 static stringop_algs generic_memset[2] = {
1839 {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1840 {-1, libcall, false}}},
1841 {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1842 {-1, libcall, false}}}};
1843 static const
1844 struct processor_costs generic_cost = {
1845 COSTS_N_INSNS (1), /* cost of an add instruction */
1846 /* On all chips taken into consideration lea is 2 cycles and more. With
1847 this cost however our current implementation of synth_mult results in
1848 use of unnecessary temporary registers causing regression on several
1849 SPECfp benchmarks. */
1850 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1851 COSTS_N_INSNS (1), /* variable shift costs */
1852 COSTS_N_INSNS (1), /* constant shift costs */
1853 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1854 COSTS_N_INSNS (4), /* HI */
1855 COSTS_N_INSNS (3), /* SI */
1856 COSTS_N_INSNS (4), /* DI */
1857 COSTS_N_INSNS (2)}, /* other */
1858 0, /* cost of multiply per each bit set */
1859 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1860 COSTS_N_INSNS (26), /* HI */
1861 COSTS_N_INSNS (42), /* SI */
1862 COSTS_N_INSNS (74), /* DI */
1863 COSTS_N_INSNS (74)}, /* other */
1864 COSTS_N_INSNS (1), /* cost of movsx */
1865 COSTS_N_INSNS (1), /* cost of movzx */
1866 8, /* "large" insn */
1867 17, /* MOVE_RATIO */
1868 4, /* cost for loading QImode using movzbl */
1869 {4, 4, 4}, /* cost of loading integer registers
1870 in QImode, HImode and SImode.
1871 Relative to reg-reg move (2). */
1872 {4, 4, 4}, /* cost of storing integer registers */
1873 4, /* cost of reg,reg fld/fst */
1874 {12, 12, 12}, /* cost of loading fp registers
1875 in SFmode, DFmode and XFmode */
1876 {6, 6, 8}, /* cost of storing fp registers
1877 in SFmode, DFmode and XFmode */
1878 2, /* cost of moving MMX register */
1879 {8, 8}, /* cost of loading MMX registers
1880 in SImode and DImode */
1881 {8, 8}, /* cost of storing MMX registers
1882 in SImode and DImode */
1883 2, /* cost of moving SSE register */
1884 {8, 8, 8}, /* cost of loading SSE registers
1885 in SImode, DImode and TImode */
1886 {8, 8, 8}, /* cost of storing SSE registers
1887 in SImode, DImode and TImode */
1888 5, /* MMX or SSE register to integer */
1889 32, /* size of l1 cache. */
1890 512, /* size of l2 cache. */
1891 64, /* size of prefetch block */
1892 6, /* number of parallel prefetches */
1893 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1894 value is increased to perhaps more appropriate value of 5. */
1895 3, /* Branch cost */
1896 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1897 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1898 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1899 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1900 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1901 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1902 generic_memcpy,
1903 generic_memset,
1904 1, /* scalar_stmt_cost. */
1905 1, /* scalar load_cost. */
1906 1, /* scalar_store_cost. */
1907 1, /* vec_stmt_cost. */
1908 1, /* vec_to_scalar_cost. */
1909 1, /* scalar_to_vec_cost. */
1910 1, /* vec_align_load_cost. */
1911 2, /* vec_unalign_load_cost. */
1912 1, /* vec_store_cost. */
1913 3, /* cond_taken_branch_cost. */
1914 1, /* cond_not_taken_branch_cost. */
1915 };
1916
1917 /* core_cost should produce code tuned for Core familly of CPUs. */
1918 static stringop_algs core_memcpy[2] = {
1919 {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1920 {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1921 {-1, libcall, false}}}};
1922 static stringop_algs core_memset[2] = {
1923 {libcall, {{6, loop_1_byte, true},
1924 {24, loop, true},
1925 {8192, rep_prefix_4_byte, true},
1926 {-1, libcall, false}}},
1927 {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1928 {-1, libcall, false}}}};
1929
1930 static const
1931 struct processor_costs core_cost = {
1932 COSTS_N_INSNS (1), /* cost of an add instruction */
1933 /* On all chips taken into consideration lea is 2 cycles and more. With
1934 this cost however our current implementation of synth_mult results in
1935 use of unnecessary temporary registers causing regression on several
1936 SPECfp benchmarks. */
1937 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1938 COSTS_N_INSNS (1), /* variable shift costs */
1939 COSTS_N_INSNS (1), /* constant shift costs */
1940 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1941 COSTS_N_INSNS (4), /* HI */
1942 COSTS_N_INSNS (3), /* SI */
1943 COSTS_N_INSNS (4), /* DI */
1944 COSTS_N_INSNS (2)}, /* other */
1945 0, /* cost of multiply per each bit set */
1946 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1947 COSTS_N_INSNS (26), /* HI */
1948 COSTS_N_INSNS (42), /* SI */
1949 COSTS_N_INSNS (74), /* DI */
1950 COSTS_N_INSNS (74)}, /* other */
1951 COSTS_N_INSNS (1), /* cost of movsx */
1952 COSTS_N_INSNS (1), /* cost of movzx */
1953 8, /* "large" insn */
1954 17, /* MOVE_RATIO */
1955 4, /* cost for loading QImode using movzbl */
1956 {4, 4, 4}, /* cost of loading integer registers
1957 in QImode, HImode and SImode.
1958 Relative to reg-reg move (2). */
1959 {4, 4, 4}, /* cost of storing integer registers */
1960 4, /* cost of reg,reg fld/fst */
1961 {12, 12, 12}, /* cost of loading fp registers
1962 in SFmode, DFmode and XFmode */
1963 {6, 6, 8}, /* cost of storing fp registers
1964 in SFmode, DFmode and XFmode */
1965 2, /* cost of moving MMX register */
1966 {8, 8}, /* cost of loading MMX registers
1967 in SImode and DImode */
1968 {8, 8}, /* cost of storing MMX registers
1969 in SImode and DImode */
1970 2, /* cost of moving SSE register */
1971 {8, 8, 8}, /* cost of loading SSE registers
1972 in SImode, DImode and TImode */
1973 {8, 8, 8}, /* cost of storing SSE registers
1974 in SImode, DImode and TImode */
1975 5, /* MMX or SSE register to integer */
1976 64, /* size of l1 cache. */
1977 512, /* size of l2 cache. */
1978 64, /* size of prefetch block */
1979 6, /* number of parallel prefetches */
1980 /* FIXME perhaps more appropriate value is 5. */
1981 3, /* Branch cost */
1982 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1983 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1984 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1985 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1986 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1987 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1988 core_memcpy,
1989 core_memset,
1990 1, /* scalar_stmt_cost. */
1991 1, /* scalar load_cost. */
1992 1, /* scalar_store_cost. */
1993 1, /* vec_stmt_cost. */
1994 1, /* vec_to_scalar_cost. */
1995 1, /* scalar_to_vec_cost. */
1996 1, /* vec_align_load_cost. */
1997 2, /* vec_unalign_load_cost. */
1998 1, /* vec_store_cost. */
1999 3, /* cond_taken_branch_cost. */
2000 1, /* cond_not_taken_branch_cost. */
2001 };
2002
2003
2004 /* Set by -mtune. */
2005 const struct processor_costs *ix86_tune_cost = &pentium_cost;
2006
2007 /* Set by -mtune or -Os. */
2008 const struct processor_costs *ix86_cost = &pentium_cost;
2009
2010 /* Processor feature/optimization bitmasks. */
2011 #define m_386 (1<<PROCESSOR_I386)
2012 #define m_486 (1<<PROCESSOR_I486)
2013 #define m_PENT (1<<PROCESSOR_PENTIUM)
2014 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2015 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2016 #define m_NOCONA (1<<PROCESSOR_NOCONA)
2017 #define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2018 #define m_CORE2 (1<<PROCESSOR_CORE2)
2019 #define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2020 #define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2021 #define m_HASWELL (1<<PROCESSOR_HASWELL)
2022 #define m_CORE_ALL (m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
2023 #define m_BONNELL (1<<PROCESSOR_BONNELL)
2024 #define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2025 #define m_INTEL (1<<PROCESSOR_INTEL)
2026
2027 #define m_GEODE (1<<PROCESSOR_GEODE)
2028 #define m_K6 (1<<PROCESSOR_K6)
2029 #define m_K6_GEODE (m_K6 | m_GEODE)
2030 #define m_K8 (1<<PROCESSOR_K8)
2031 #define m_ATHLON (1<<PROCESSOR_ATHLON)
2032 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
2033 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2034 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
2035 #define m_BDVER2 (1<<PROCESSOR_BDVER2)
2036 #define m_BDVER3 (1<<PROCESSOR_BDVER3)
2037 #define m_BDVER4 (1<<PROCESSOR_BDVER4)
2038 #define m_BTVER1 (1<<PROCESSOR_BTVER1)
2039 #define m_BTVER2 (1<<PROCESSOR_BTVER2)
2040 #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2041 #define m_BTVER (m_BTVER1 | m_BTVER2)
2042 #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2043
2044 #define m_GENERIC (1<<PROCESSOR_GENERIC)
2045
2046 const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2047 #undef DEF_TUNE
2048 #define DEF_TUNE(tune, name, selector) name,
2049 #include "x86-tune.def"
2050 #undef DEF_TUNE
2051 };
2052
2053 /* Feature tests against the various tunings. */
2054 unsigned char ix86_tune_features[X86_TUNE_LAST];
2055
2056 /* Feature tests against the various tunings used to create ix86_tune_features
2057 based on the processor mask. */
2058 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2059 #undef DEF_TUNE
2060 #define DEF_TUNE(tune, name, selector) selector,
2061 #include "x86-tune.def"
2062 #undef DEF_TUNE
2063 };
2064
2065 /* Feature tests against the various architecture variations. */
2066 unsigned char ix86_arch_features[X86_ARCH_LAST];
2067
2068 /* Feature tests against the various architecture variations, used to create
2069 ix86_arch_features based on the processor mask. */
2070 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2071 /* X86_ARCH_CMOV: Conditional move was added for pentiumpro. */
2072 ~(m_386 | m_486 | m_PENT | m_K6),
2073
2074 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
2075 ~m_386,
2076
2077 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2078 ~(m_386 | m_486),
2079
2080 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
2081 ~m_386,
2082
2083 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
2084 ~m_386,
2085 };
2086
2087 /* In case the average insn count for single function invocation is
2088 lower than this constant, emit fast (but longer) prologue and
2089 epilogue code. */
2090 #define FAST_PROLOGUE_INSN_COUNT 20
2091
2092 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
2093 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2094 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2095 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2096
2097 /* Array of the smallest class containing reg number REGNO, indexed by
2098 REGNO. Used by REGNO_REG_CLASS in i386.h. */
2099
2100 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2101 {
2102 /* ax, dx, cx, bx */
2103 AREG, DREG, CREG, BREG,
2104 /* si, di, bp, sp */
2105 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2106 /* FP registers */
2107 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2108 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2109 /* arg pointer */
2110 NON_Q_REGS,
2111 /* flags, fpsr, fpcr, frame */
2112 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2113 /* SSE registers */
2114 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2115 SSE_REGS, SSE_REGS,
2116 /* MMX registers */
2117 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2118 MMX_REGS, MMX_REGS,
2119 /* REX registers */
2120 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2121 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2122 /* SSE REX registers */
2123 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2124 SSE_REGS, SSE_REGS,
2125 /* AVX-512 SSE registers */
2126 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2127 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2128 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2129 EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2130 /* Mask registers. */
2131 MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2132 MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2133 };
2134
2135 /* The "default" register map used in 32bit mode. */
2136
2137 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2138 {
2139 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
2140 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
2141 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2142 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
2143 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
2144 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2145 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2146 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2147 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2148 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2149 };
2150
2151 /* The "default" register map used in 64bit mode. */
2152
2153 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2154 {
2155 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
2156 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
2157 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2158 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
2159 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
2160 8,9,10,11,12,13,14,15, /* extended integer registers */
2161 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
2162 67, 68, 69, 70, 71, 72, 73, 74, /* AVX-512 registers 16-23 */
2163 75, 76, 77, 78, 79, 80, 81, 82, /* AVX-512 registers 24-31 */
2164 118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2165 };
2166
2167 /* Define the register numbers to be used in Dwarf debugging information.
2168 The SVR4 reference port C compiler uses the following register numbers
2169 in its Dwarf output code:
2170 0 for %eax (gcc regno = 0)
2171 1 for %ecx (gcc regno = 2)
2172 2 for %edx (gcc regno = 1)
2173 3 for %ebx (gcc regno = 3)
2174 4 for %esp (gcc regno = 7)
2175 5 for %ebp (gcc regno = 6)
2176 6 for %esi (gcc regno = 4)
2177 7 for %edi (gcc regno = 5)
2178 The following three DWARF register numbers are never generated by
2179 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2180 believes these numbers have these meanings.
2181 8 for %eip (no gcc equivalent)
2182 9 for %eflags (gcc regno = 17)
2183 10 for %trapno (no gcc equivalent)
2184 It is not at all clear how we should number the FP stack registers
2185 for the x86 architecture. If the version of SDB on x86/svr4 were
2186 a bit less brain dead with respect to floating-point then we would
2187 have a precedent to follow with respect to DWARF register numbers
2188 for x86 FP registers, but the SDB on x86/svr4 is so completely
2189 broken with respect to FP registers that it is hardly worth thinking
2190 of it as something to strive for compatibility with.
2191 The version of x86/svr4 SDB I have at the moment does (partially)
2192 seem to believe that DWARF register number 11 is associated with
2193 the x86 register %st(0), but that's about all. Higher DWARF
2194 register numbers don't seem to be associated with anything in
2195 particular, and even for DWARF regno 11, SDB only seems to under-
2196 stand that it should say that a variable lives in %st(0) (when
2197 asked via an `=' command) if we said it was in DWARF regno 11,
2198 but SDB still prints garbage when asked for the value of the
2199 variable in question (via a `/' command).
2200 (Also note that the labels SDB prints for various FP stack regs
2201 when doing an `x' command are all wrong.)
2202 Note that these problems generally don't affect the native SVR4
2203 C compiler because it doesn't allow the use of -O with -g and
2204 because when it is *not* optimizing, it allocates a memory
2205 location for each floating-point variable, and the memory
2206 location is what gets described in the DWARF AT_location
2207 attribute for the variable in question.
2208 Regardless of the severe mental illness of the x86/svr4 SDB, we
2209 do something sensible here and we use the following DWARF
2210 register numbers. Note that these are all stack-top-relative
2211 numbers.
2212 11 for %st(0) (gcc regno = 8)
2213 12 for %st(1) (gcc regno = 9)
2214 13 for %st(2) (gcc regno = 10)
2215 14 for %st(3) (gcc regno = 11)
2216 15 for %st(4) (gcc regno = 12)
2217 16 for %st(5) (gcc regno = 13)
2218 17 for %st(6) (gcc regno = 14)
2219 18 for %st(7) (gcc regno = 15)
2220 */
2221 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2222 {
2223 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
2224 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
2225 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
2226 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
2227 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
2228 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
2229 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
2230 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 16-23*/
2231 -1, -1, -1, -1, -1, -1, -1, -1, /* AVX-512 registers 24-31*/
2232 93, 94, 95, 96, 97, 98, 99, 100, /* Mask registers */
2233 };
2234
2235 /* Define parameter passing and return registers. */
2236
2237 static int const x86_64_int_parameter_registers[6] =
2238 {
2239 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2240 };
2241
2242 static int const x86_64_ms_abi_int_parameter_registers[4] =
2243 {
2244 CX_REG, DX_REG, R8_REG, R9_REG
2245 };
2246
2247 static int const x86_64_int_return_registers[4] =
2248 {
2249 AX_REG, DX_REG, DI_REG, SI_REG
2250 };
2251
2252 /* Additional registers that are clobbered by SYSV calls. */
2253
2254 int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2255 {
2256 SI_REG, DI_REG,
2257 XMM6_REG, XMM7_REG,
2258 XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2259 XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2260 };
2261
2262 /* Define the structure for the machine field in struct function. */
2263
2264 struct GTY(()) stack_local_entry {
2265 unsigned short mode;
2266 unsigned short n;
2267 rtx rtl;
2268 struct stack_local_entry *next;
2269 };
2270
2271 /* Structure describing stack frame layout.
2272 Stack grows downward:
2273
2274 [arguments]
2275 <- ARG_POINTER
2276 saved pc
2277
2278 saved static chain if ix86_static_chain_on_stack
2279
2280 saved frame pointer if frame_pointer_needed
2281 <- HARD_FRAME_POINTER
2282 [saved regs]
2283 <- regs_save_offset
2284 [padding0]
2285
2286 [saved SSE regs]
2287 <- sse_regs_save_offset
2288 [padding1] |
2289 | <- FRAME_POINTER
2290 [va_arg registers] |
2291 |
2292 [frame] |
2293 |
2294 [padding2] | = to_allocate
2295 <- STACK_POINTER
2296 */
2297 struct ix86_frame
2298 {
2299 int nsseregs;
2300 int nregs;
2301 int va_arg_size;
2302 int red_zone_size;
2303 int outgoing_arguments_size;
2304
2305 /* The offsets relative to ARG_POINTER. */
2306 HOST_WIDE_INT frame_pointer_offset;
2307 HOST_WIDE_INT hard_frame_pointer_offset;
2308 HOST_WIDE_INT stack_pointer_offset;
2309 HOST_WIDE_INT hfp_save_offset;
2310 HOST_WIDE_INT reg_save_offset;
2311 HOST_WIDE_INT sse_reg_save_offset;
2312
2313 /* When save_regs_using_mov is set, emit prologue using
2314 move instead of push instructions. */
2315 bool save_regs_using_mov;
2316 };
2317
2318 /* Which cpu are we scheduling for. */
2319 enum attr_cpu ix86_schedule;
2320
2321 /* Which cpu are we optimizing for. */
2322 enum processor_type ix86_tune;
2323
2324 /* Which instruction set architecture to use. */
2325 enum processor_type ix86_arch;
2326
2327 /* True if processor has SSE prefetch instruction. */
2328 unsigned char x86_prefetch_sse;
2329
2330 /* -mstackrealign option */
2331 static const char ix86_force_align_arg_pointer_string[]
2332 = "force_align_arg_pointer";
2333
2334 static rtx (*ix86_gen_leave) (void);
2335 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2336 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2337 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2338 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2339 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2340 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2341 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2342 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2343 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2344 static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2345 static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2346
2347 /* Preferred alignment for stack boundary in bits. */
2348 unsigned int ix86_preferred_stack_boundary;
2349
2350 /* Alignment for incoming stack boundary in bits specified at
2351 command line. */
2352 static unsigned int ix86_user_incoming_stack_boundary;
2353
2354 /* Default alignment for incoming stack boundary in bits. */
2355 static unsigned int ix86_default_incoming_stack_boundary;
2356
2357 /* Alignment for incoming stack boundary in bits. */
2358 unsigned int ix86_incoming_stack_boundary;
2359
2360 /* Calling abi specific va_list type nodes. */
2361 static GTY(()) tree sysv_va_list_type_node;
2362 static GTY(()) tree ms_va_list_type_node;
2363
2364 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
2365 char internal_label_prefix[16];
2366 int internal_label_prefix_len;
2367
2368 /* Fence to use after loop using movnt. */
2369 tree x86_mfence;
2370
2371 /* Register class used for passing given 64bit part of the argument.
2372 These represent classes as documented by the PS ABI, with the exception
2373 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2374 use SF or DFmode move instead of DImode to avoid reformatting penalties.
2375
2376 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2377 whenever possible (upper half does contain padding). */
2378 enum x86_64_reg_class
2379 {
2380 X86_64_NO_CLASS,
2381 X86_64_INTEGER_CLASS,
2382 X86_64_INTEGERSI_CLASS,
2383 X86_64_SSE_CLASS,
2384 X86_64_SSESF_CLASS,
2385 X86_64_SSEDF_CLASS,
2386 X86_64_SSEUP_CLASS,
2387 X86_64_X87_CLASS,
2388 X86_64_X87UP_CLASS,
2389 X86_64_COMPLEX_X87_CLASS,
2390 X86_64_MEMORY_CLASS
2391 };
2392
2393 #define MAX_CLASSES 8
2394
2395 /* Table of constants used by fldpi, fldln2, etc.... */
2396 static REAL_VALUE_TYPE ext_80387_constants_table [5];
2397 static bool ext_80387_constants_init = 0;
2398
2399 \f
2400 static struct machine_function * ix86_init_machine_status (void);
2401 static rtx ix86_function_value (const_tree, const_tree, bool);
2402 static bool ix86_function_value_regno_p (const unsigned int);
2403 static unsigned int ix86_function_arg_boundary (enum machine_mode,
2404 const_tree);
2405 static rtx ix86_static_chain (const_tree, bool);
2406 static int ix86_function_regparm (const_tree, const_tree);
2407 static void ix86_compute_frame_layout (struct ix86_frame *);
2408 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
2409 rtx, rtx, int);
2410 static void ix86_add_new_builtins (HOST_WIDE_INT);
2411 static tree ix86_canonical_va_list_type (tree);
2412 static void predict_jump (int);
2413 static unsigned int split_stack_prologue_scratch_regno (void);
2414 static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2415
2416 enum ix86_function_specific_strings
2417 {
2418 IX86_FUNCTION_SPECIFIC_ARCH,
2419 IX86_FUNCTION_SPECIFIC_TUNE,
2420 IX86_FUNCTION_SPECIFIC_MAX
2421 };
2422
2423 static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2424 const char *, enum fpmath_unit, bool);
2425 static void ix86_function_specific_save (struct cl_target_option *,
2426 struct gcc_options *opts);
2427 static void ix86_function_specific_restore (struct gcc_options *opts,
2428 struct cl_target_option *);
2429 static void ix86_function_specific_print (FILE *, int,
2430 struct cl_target_option *);
2431 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2432 static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2433 struct gcc_options *,
2434 struct gcc_options *,
2435 struct gcc_options *);
2436 static bool ix86_can_inline_p (tree, tree);
2437 static void ix86_set_current_function (tree);
2438 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2439
2440 static enum calling_abi ix86_function_abi (const_tree);
2441
2442 \f
2443 #ifndef SUBTARGET32_DEFAULT_CPU
2444 #define SUBTARGET32_DEFAULT_CPU "i386"
2445 #endif
2446
2447 /* Whether -mtune= or -march= were specified */
2448 static int ix86_tune_defaulted;
2449 static int ix86_arch_specified;
2450
2451 /* Vectorization library interface and handlers. */
2452 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2453
2454 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2455 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2456
2457 /* Processor target table, indexed by processor number */
2458 struct ptt
2459 {
2460 const char *const name; /* processor name */
2461 const struct processor_costs *cost; /* Processor costs */
2462 const int align_loop; /* Default alignments. */
2463 const int align_loop_max_skip;
2464 const int align_jump;
2465 const int align_jump_max_skip;
2466 const int align_func;
2467 };
2468
2469 /* This table must be in sync with enum processor_type in i386.h. */
2470 static const struct ptt processor_target_table[PROCESSOR_max] =
2471 {
2472 {"generic", &generic_cost, 16, 10, 16, 10, 16},
2473 {"i386", &i386_cost, 4, 3, 4, 3, 4},
2474 {"i486", &i486_cost, 16, 15, 16, 15, 16},
2475 {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2476 {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2477 {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2478 {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2479 {"core2", &core_cost, 16, 10, 16, 10, 16},
2480 {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2481 {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2482 {"haswell", &core_cost, 16, 10, 16, 10, 16},
2483 {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2484 {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2485 {"intel", &intel_cost, 16, 15, 16, 7, 16},
2486 {"geode", &geode_cost, 0, 0, 0, 0, 0},
2487 {"k6", &k6_cost, 32, 7, 32, 7, 32},
2488 {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2489 {"k8", &k8_cost, 16, 7, 16, 7, 16},
2490 {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2491 {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2492 {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2493 {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2494 {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2495 {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2496 {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2497 };
2498 \f
2499 static unsigned int
2500 rest_of_handle_insert_vzeroupper (void)
2501 {
2502 int i;
2503
2504 /* vzeroupper instructions are inserted immediately after reload to
2505 account for possible spills from 256bit registers. The pass
2506 reuses mode switching infrastructure by re-running mode insertion
2507 pass, so disable entities that have already been processed. */
2508 for (i = 0; i < MAX_386_ENTITIES; i++)
2509 ix86_optimize_mode_switching[i] = 0;
2510
2511 ix86_optimize_mode_switching[AVX_U128] = 1;
2512
2513 /* Call optimize_mode_switching. */
2514 g->get_passes ()->execute_pass_mode_switching ();
2515 return 0;
2516 }
2517
2518 namespace {
2519
2520 const pass_data pass_data_insert_vzeroupper =
2521 {
2522 RTL_PASS, /* type */
2523 "vzeroupper", /* name */
2524 OPTGROUP_NONE, /* optinfo_flags */
2525 true, /* has_execute */
2526 TV_NONE, /* tv_id */
2527 0, /* properties_required */
2528 0, /* properties_provided */
2529 0, /* properties_destroyed */
2530 0, /* todo_flags_start */
2531 TODO_df_finish, /* todo_flags_finish */
2532 };
2533
2534 class pass_insert_vzeroupper : public rtl_opt_pass
2535 {
2536 public:
2537 pass_insert_vzeroupper(gcc::context *ctxt)
2538 : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2539 {}
2540
2541 /* opt_pass methods: */
2542 virtual bool gate (function *)
2543 {
2544 return TARGET_AVX && !TARGET_AVX512F && TARGET_VZEROUPPER;
2545 }
2546
2547 virtual unsigned int execute (function *)
2548 {
2549 return rest_of_handle_insert_vzeroupper ();
2550 }
2551
2552 }; // class pass_insert_vzeroupper
2553
2554 } // anon namespace
2555
2556 rtl_opt_pass *
2557 make_pass_insert_vzeroupper (gcc::context *ctxt)
2558 {
2559 return new pass_insert_vzeroupper (ctxt);
2560 }
2561
2562 /* Return true if a red-zone is in use. */
2563
2564 static inline bool
2565 ix86_using_red_zone (void)
2566 {
2567 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2568 }
2569 \f
2570 /* Return a string that documents the current -m options. The caller is
2571 responsible for freeing the string. */
2572
2573 static char *
2574 ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2575 const char *tune, enum fpmath_unit fpmath,
2576 bool add_nl_p)
2577 {
2578 struct ix86_target_opts
2579 {
2580 const char *option; /* option string */
2581 HOST_WIDE_INT mask; /* isa mask options */
2582 };
2583
2584 /* This table is ordered so that options like -msse4.2 that imply
2585 preceding options while match those first. */
2586 static struct ix86_target_opts isa_opts[] =
2587 {
2588 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2589 { "-mfma", OPTION_MASK_ISA_FMA },
2590 { "-mxop", OPTION_MASK_ISA_XOP },
2591 { "-mlwp", OPTION_MASK_ISA_LWP },
2592 { "-mavx512f", OPTION_MASK_ISA_AVX512F },
2593 { "-mavx512er", OPTION_MASK_ISA_AVX512ER },
2594 { "-mavx512cd", OPTION_MASK_ISA_AVX512CD },
2595 { "-mavx512pf", OPTION_MASK_ISA_AVX512PF },
2596 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2597 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2598 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2599 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2600 { "-msse3", OPTION_MASK_ISA_SSE3 },
2601 { "-msse2", OPTION_MASK_ISA_SSE2 },
2602 { "-msse", OPTION_MASK_ISA_SSE },
2603 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2604 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2605 { "-mmmx", OPTION_MASK_ISA_MMX },
2606 { "-mabm", OPTION_MASK_ISA_ABM },
2607 { "-mbmi", OPTION_MASK_ISA_BMI },
2608 { "-mbmi2", OPTION_MASK_ISA_BMI2 },
2609 { "-mlzcnt", OPTION_MASK_ISA_LZCNT },
2610 { "-mhle", OPTION_MASK_ISA_HLE },
2611 { "-mfxsr", OPTION_MASK_ISA_FXSR },
2612 { "-mrdseed", OPTION_MASK_ISA_RDSEED },
2613 { "-mprfchw", OPTION_MASK_ISA_PRFCHW },
2614 { "-madx", OPTION_MASK_ISA_ADX },
2615 { "-mtbm", OPTION_MASK_ISA_TBM },
2616 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2617 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2618 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2619 { "-maes", OPTION_MASK_ISA_AES },
2620 { "-msha", OPTION_MASK_ISA_SHA },
2621 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2622 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2623 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2624 { "-mf16c", OPTION_MASK_ISA_F16C },
2625 { "-mrtm", OPTION_MASK_ISA_RTM },
2626 { "-mxsave", OPTION_MASK_ISA_XSAVE },
2627 { "-mxsaveopt", OPTION_MASK_ISA_XSAVEOPT },
2628 { "-mprefetchwt1", OPTION_MASK_ISA_PREFETCHWT1 },
2629 };
2630
2631 /* Flag options. */
2632 static struct ix86_target_opts flag_opts[] =
2633 {
2634 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2635 { "-mlong-double-128", MASK_LONG_DOUBLE_128 },
2636 { "-mlong-double-64", MASK_LONG_DOUBLE_64 },
2637 { "-m80387", MASK_80387 },
2638 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2639 { "-malign-double", MASK_ALIGN_DOUBLE },
2640 { "-mcld", MASK_CLD },
2641 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2642 { "-mieee-fp", MASK_IEEE_FP },
2643 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2644 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2645 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2646 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2647 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2648 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2649 { "-mno-red-zone", MASK_NO_RED_ZONE },
2650 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2651 { "-mrecip", MASK_RECIP },
2652 { "-mrtd", MASK_RTD },
2653 { "-msseregparm", MASK_SSEREGPARM },
2654 { "-mstack-arg-probe", MASK_STACK_PROBE },
2655 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2656 { "-mvect8-ret-in-mem", MASK_VECT8_RETURNS },
2657 { "-m8bit-idiv", MASK_USE_8BIT_IDIV },
2658 { "-mvzeroupper", MASK_VZEROUPPER },
2659 { "-mavx256-split-unaligned-load", MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2660 { "-mavx256-split-unaligned-store", MASK_AVX256_SPLIT_UNALIGNED_STORE},
2661 { "-mprefer-avx128", MASK_PREFER_AVX128},
2662 };
2663
2664 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2665
2666 char isa_other[40];
2667 char target_other[40];
2668 unsigned num = 0;
2669 unsigned i, j;
2670 char *ret;
2671 char *ptr;
2672 size_t len;
2673 size_t line_len;
2674 size_t sep_len;
2675 const char *abi;
2676
2677 memset (opts, '\0', sizeof (opts));
2678
2679 /* Add -march= option. */
2680 if (arch)
2681 {
2682 opts[num][0] = "-march=";
2683 opts[num++][1] = arch;
2684 }
2685
2686 /* Add -mtune= option. */
2687 if (tune)
2688 {
2689 opts[num][0] = "-mtune=";
2690 opts[num++][1] = tune;
2691 }
2692
2693 /* Add -m32/-m64/-mx32. */
2694 if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2695 {
2696 if ((isa & OPTION_MASK_ABI_64) != 0)
2697 abi = "-m64";
2698 else
2699 abi = "-mx32";
2700 isa &= ~ (OPTION_MASK_ISA_64BIT
2701 | OPTION_MASK_ABI_64
2702 | OPTION_MASK_ABI_X32);
2703 }
2704 else
2705 abi = "-m32";
2706 opts[num++][0] = abi;
2707
2708 /* Pick out the options in isa options. */
2709 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2710 {
2711 if ((isa & isa_opts[i].mask) != 0)
2712 {
2713 opts[num++][0] = isa_opts[i].option;
2714 isa &= ~ isa_opts[i].mask;
2715 }
2716 }
2717
2718 if (isa && add_nl_p)
2719 {
2720 opts[num++][0] = isa_other;
2721 sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2722 isa);
2723 }
2724
2725 /* Add flag options. */
2726 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2727 {
2728 if ((flags & flag_opts[i].mask) != 0)
2729 {
2730 opts[num++][0] = flag_opts[i].option;
2731 flags &= ~ flag_opts[i].mask;
2732 }
2733 }
2734
2735 if (flags && add_nl_p)
2736 {
2737 opts[num++][0] = target_other;
2738 sprintf (target_other, "(other flags: %#x)", flags);
2739 }
2740
2741 /* Add -fpmath= option. */
2742 if (fpmath)
2743 {
2744 opts[num][0] = "-mfpmath=";
2745 switch ((int) fpmath)
2746 {
2747 case FPMATH_387:
2748 opts[num++][1] = "387";
2749 break;
2750
2751 case FPMATH_SSE:
2752 opts[num++][1] = "sse";
2753 break;
2754
2755 case FPMATH_387 | FPMATH_SSE:
2756 opts[num++][1] = "sse+387";
2757 break;
2758
2759 default:
2760 gcc_unreachable ();
2761 }
2762 }
2763
2764 /* Any options? */
2765 if (num == 0)
2766 return NULL;
2767
2768 gcc_assert (num < ARRAY_SIZE (opts));
2769
2770 /* Size the string. */
2771 len = 0;
2772 sep_len = (add_nl_p) ? 3 : 1;
2773 for (i = 0; i < num; i++)
2774 {
2775 len += sep_len;
2776 for (j = 0; j < 2; j++)
2777 if (opts[i][j])
2778 len += strlen (opts[i][j]);
2779 }
2780
2781 /* Build the string. */
2782 ret = ptr = (char *) xmalloc (len);
2783 line_len = 0;
2784
2785 for (i = 0; i < num; i++)
2786 {
2787 size_t len2[2];
2788
2789 for (j = 0; j < 2; j++)
2790 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2791
2792 if (i != 0)
2793 {
2794 *ptr++ = ' ';
2795 line_len++;
2796
2797 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2798 {
2799 *ptr++ = '\\';
2800 *ptr++ = '\n';
2801 line_len = 0;
2802 }
2803 }
2804
2805 for (j = 0; j < 2; j++)
2806 if (opts[i][j])
2807 {
2808 memcpy (ptr, opts[i][j], len2[j]);
2809 ptr += len2[j];
2810 line_len += len2[j];
2811 }
2812 }
2813
2814 *ptr = '\0';
2815 gcc_assert (ret + len >= ptr);
2816
2817 return ret;
2818 }
2819
2820 /* Return true, if profiling code should be emitted before
2821 prologue. Otherwise it returns false.
2822 Note: For x86 with "hotfix" it is sorried. */
2823 static bool
2824 ix86_profile_before_prologue (void)
2825 {
2826 return flag_fentry != 0;
2827 }
2828
2829 /* Function that is callable from the debugger to print the current
2830 options. */
2831 void ATTRIBUTE_UNUSED
2832 ix86_debug_options (void)
2833 {
2834 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2835 ix86_arch_string, ix86_tune_string,
2836 ix86_fpmath, true);
2837
2838 if (opts)
2839 {
2840 fprintf (stderr, "%s\n\n", opts);
2841 free (opts);
2842 }
2843 else
2844 fputs ("<no options>\n\n", stderr);
2845
2846 return;
2847 }
2848
2849 static const char *stringop_alg_names[] = {
2850 #define DEF_ENUM
2851 #define DEF_ALG(alg, name) #name,
2852 #include "stringop.def"
2853 #undef DEF_ENUM
2854 #undef DEF_ALG
2855 };
2856
2857 /* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2858 The string is of the following form (or comma separated list of it):
2859
2860 strategy_alg:max_size:[align|noalign]
2861
2862 where the full size range for the strategy is either [0, max_size] or
2863 [min_size, max_size], in which min_size is the max_size + 1 of the
2864 preceding range. The last size range must have max_size == -1.
2865
2866 Examples:
2867
2868 1.
2869 -mmemcpy-strategy=libcall:-1:noalign
2870
2871 this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2872
2873
2874 2.
2875 -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2876
2877 This is to tell the compiler to use the following strategy for memset
2878 1) when the expected size is between [1, 16], use rep_8byte strategy;
2879 2) when the size is between [17, 2048], use vector_loop;
2880 3) when the size is > 2048, use libcall. */
2881
2882 struct stringop_size_range
2883 {
2884 int max;
2885 stringop_alg alg;
2886 bool noalign;
2887 };
2888
2889 static void
2890 ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2891 {
2892 const struct stringop_algs *default_algs;
2893 stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2894 char *curr_range_str, *next_range_str;
2895 int i = 0, n = 0;
2896
2897 if (is_memset)
2898 default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2899 else
2900 default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2901
2902 curr_range_str = strategy_str;
2903
2904 do
2905 {
2906 int maxs;
2907 char alg_name[128];
2908 char align[16];
2909 next_range_str = strchr (curr_range_str, ',');
2910 if (next_range_str)
2911 *next_range_str++ = '\0';
2912
2913 if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2914 alg_name, &maxs, align))
2915 {
2916 error ("wrong arg %s to option %s", curr_range_str,
2917 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2918 return;
2919 }
2920
2921 if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2922 {
2923 error ("size ranges of option %s should be increasing",
2924 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2925 return;
2926 }
2927
2928 for (i = 0; i < last_alg; i++)
2929 if (!strcmp (alg_name, stringop_alg_names[i]))
2930 break;
2931
2932 if (i == last_alg)
2933 {
2934 error ("wrong stringop strategy name %s specified for option %s",
2935 alg_name,
2936 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2937 return;
2938 }
2939
2940 input_ranges[n].max = maxs;
2941 input_ranges[n].alg = (stringop_alg) i;
2942 if (!strcmp (align, "align"))
2943 input_ranges[n].noalign = false;
2944 else if (!strcmp (align, "noalign"))
2945 input_ranges[n].noalign = true;
2946 else
2947 {
2948 error ("unknown alignment %s specified for option %s",
2949 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2950 return;
2951 }
2952 n++;
2953 curr_range_str = next_range_str;
2954 }
2955 while (curr_range_str);
2956
2957 if (input_ranges[n - 1].max != -1)
2958 {
2959 error ("the max value for the last size range should be -1"
2960 " for option %s",
2961 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2962 return;
2963 }
2964
2965 if (n > MAX_STRINGOP_ALGS)
2966 {
2967 error ("too many size ranges specified in option %s",
2968 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2969 return;
2970 }
2971
2972 /* Now override the default algs array. */
2973 for (i = 0; i < n; i++)
2974 {
2975 *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
2976 *const_cast<stringop_alg *>(&default_algs->size[i].alg)
2977 = input_ranges[i].alg;
2978 *const_cast<int *>(&default_algs->size[i].noalign)
2979 = input_ranges[i].noalign;
2980 }
2981 }
2982
2983 \f
2984 /* parse -mtune-ctrl= option. When DUMP is true,
2985 print the features that are explicitly set. */
2986
2987 static void
2988 parse_mtune_ctrl_str (bool dump)
2989 {
2990 if (!ix86_tune_ctrl_string)
2991 return;
2992
2993 char *next_feature_string = NULL;
2994 char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
2995 char *orig = curr_feature_string;
2996 int i;
2997 do
2998 {
2999 bool clear = false;
3000
3001 next_feature_string = strchr (curr_feature_string, ',');
3002 if (next_feature_string)
3003 *next_feature_string++ = '\0';
3004 if (*curr_feature_string == '^')
3005 {
3006 curr_feature_string++;
3007 clear = true;
3008 }
3009 for (i = 0; i < X86_TUNE_LAST; i++)
3010 {
3011 if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3012 {
3013 ix86_tune_features[i] = !clear;
3014 if (dump)
3015 fprintf (stderr, "Explicitly %s feature %s\n",
3016 clear ? "clear" : "set", ix86_tune_feature_names[i]);
3017 break;
3018 }
3019 }
3020 if (i == X86_TUNE_LAST)
3021 error ("Unknown parameter to option -mtune-ctrl: %s",
3022 clear ? curr_feature_string - 1 : curr_feature_string);
3023 curr_feature_string = next_feature_string;
3024 }
3025 while (curr_feature_string);
3026 free (orig);
3027 }
3028
3029 /* Helper function to set ix86_tune_features. IX86_TUNE is the
3030 processor type. */
3031
3032 static void
3033 set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3034 {
3035 unsigned int ix86_tune_mask = 1u << ix86_tune;
3036 int i;
3037
3038 for (i = 0; i < X86_TUNE_LAST; ++i)
3039 {
3040 if (ix86_tune_no_default)
3041 ix86_tune_features[i] = 0;
3042 else
3043 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3044 }
3045
3046 if (dump)
3047 {
3048 fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3049 for (i = 0; i < X86_TUNE_LAST; i++)
3050 fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3051 ix86_tune_features[i] ? "on" : "off");
3052 }
3053
3054 parse_mtune_ctrl_str (dump);
3055 }
3056
3057
3058 /* Override various settings based on options. If MAIN_ARGS_P, the
3059 options are from the command line, otherwise they are from
3060 attributes. */
3061
3062 static void
3063 ix86_option_override_internal (bool main_args_p,
3064 struct gcc_options *opts,
3065 struct gcc_options *opts_set)
3066 {
3067 int i;
3068 unsigned int ix86_arch_mask;
3069 const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3070 const char *prefix;
3071 const char *suffix;
3072 const char *sw;
3073
3074 #define PTA_3DNOW (HOST_WIDE_INT_1 << 0)
3075 #define PTA_3DNOW_A (HOST_WIDE_INT_1 << 1)
3076 #define PTA_64BIT (HOST_WIDE_INT_1 << 2)
3077 #define PTA_ABM (HOST_WIDE_INT_1 << 3)
3078 #define PTA_AES (HOST_WIDE_INT_1 << 4)
3079 #define PTA_AVX (HOST_WIDE_INT_1 << 5)
3080 #define PTA_BMI (HOST_WIDE_INT_1 << 6)
3081 #define PTA_CX16 (HOST_WIDE_INT_1 << 7)
3082 #define PTA_F16C (HOST_WIDE_INT_1 << 8)
3083 #define PTA_FMA (HOST_WIDE_INT_1 << 9)
3084 #define PTA_FMA4 (HOST_WIDE_INT_1 << 10)
3085 #define PTA_FSGSBASE (HOST_WIDE_INT_1 << 11)
3086 #define PTA_LWP (HOST_WIDE_INT_1 << 12)
3087 #define PTA_LZCNT (HOST_WIDE_INT_1 << 13)
3088 #define PTA_MMX (HOST_WIDE_INT_1 << 14)
3089 #define PTA_MOVBE (HOST_WIDE_INT_1 << 15)
3090 #define PTA_NO_SAHF (HOST_WIDE_INT_1 << 16)
3091 #define PTA_PCLMUL (HOST_WIDE_INT_1 << 17)
3092 #define PTA_POPCNT (HOST_WIDE_INT_1 << 18)
3093 #define PTA_PREFETCH_SSE (HOST_WIDE_INT_1 << 19)
3094 #define PTA_RDRND (HOST_WIDE_INT_1 << 20)
3095 #define PTA_SSE (HOST_WIDE_INT_1 << 21)
3096 #define PTA_SSE2 (HOST_WIDE_INT_1 << 22)
3097 #define PTA_SSE3 (HOST_WIDE_INT_1 << 23)
3098 #define PTA_SSE4_1 (HOST_WIDE_INT_1 << 24)
3099 #define PTA_SSE4_2 (HOST_WIDE_INT_1 << 25)
3100 #define PTA_SSE4A (HOST_WIDE_INT_1 << 26)
3101 #define PTA_SSSE3 (HOST_WIDE_INT_1 << 27)
3102 #define PTA_TBM (HOST_WIDE_INT_1 << 28)
3103 #define PTA_XOP (HOST_WIDE_INT_1 << 29)
3104 #define PTA_AVX2 (HOST_WIDE_INT_1 << 30)
3105 #define PTA_BMI2 (HOST_WIDE_INT_1 << 31)
3106 #define PTA_RTM (HOST_WIDE_INT_1 << 32)
3107 #define PTA_HLE (HOST_WIDE_INT_1 << 33)
3108 #define PTA_PRFCHW (HOST_WIDE_INT_1 << 34)
3109 #define PTA_RDSEED (HOST_WIDE_INT_1 << 35)
3110 #define PTA_ADX (HOST_WIDE_INT_1 << 36)
3111 #define PTA_FXSR (HOST_WIDE_INT_1 << 37)
3112 #define PTA_XSAVE (HOST_WIDE_INT_1 << 38)
3113 #define PTA_XSAVEOPT (HOST_WIDE_INT_1 << 39)
3114 #define PTA_AVX512F (HOST_WIDE_INT_1 << 40)
3115 #define PTA_AVX512ER (HOST_WIDE_INT_1 << 41)
3116 #define PTA_AVX512PF (HOST_WIDE_INT_1 << 42)
3117 #define PTA_AVX512CD (HOST_WIDE_INT_1 << 43)
3118 #define PTA_SHA (HOST_WIDE_INT_1 << 45)
3119 #define PTA_PREFETCHWT1 (HOST_WIDE_INT_1 << 46)
3120
3121 #define PTA_CORE2 \
3122 (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3123 | PTA_CX16 | PTA_FXSR)
3124 #define PTA_NEHALEM \
3125 (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3126 #define PTA_WESTMERE \
3127 (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3128 #define PTA_SANDYBRIDGE \
3129 (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3130 #define PTA_IVYBRIDGE \
3131 (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3132 #define PTA_HASWELL \
3133 (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3134 | PTA_FMA | PTA_MOVBE | PTA_HLE)
3135 #define PTA_BROADWELL \
3136 (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3137 #define PTA_BONNELL \
3138 (PTA_CORE2 | PTA_MOVBE)
3139 #define PTA_SILVERMONT \
3140 (PTA_WESTMERE | PTA_MOVBE)
3141
3142 /* if this reaches 64, need to widen struct pta flags below */
3143
3144 static struct pta
3145 {
3146 const char *const name; /* processor name or nickname. */
3147 const enum processor_type processor;
3148 const enum attr_cpu schedule;
3149 const unsigned HOST_WIDE_INT flags;
3150 }
3151 const processor_alias_table[] =
3152 {
3153 {"i386", PROCESSOR_I386, CPU_NONE, 0},
3154 {"i486", PROCESSOR_I486, CPU_NONE, 0},
3155 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3156 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3157 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3158 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3159 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3160 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3161 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3162 PTA_MMX | PTA_SSE | PTA_FXSR},
3163 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3164 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3165 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3166 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3167 PTA_MMX | PTA_SSE | PTA_FXSR},
3168 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3169 PTA_MMX | PTA_SSE | PTA_FXSR},
3170 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3171 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3172 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3173 PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3174 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3175 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3176 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3177 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3178 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3179 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3180 | PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3181 {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3182 {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3183 {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3184 {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3185 {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3186 PTA_SANDYBRIDGE},
3187 {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3188 PTA_SANDYBRIDGE},
3189 {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3190 PTA_IVYBRIDGE},
3191 {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3192 PTA_IVYBRIDGE},
3193 {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3194 {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3195 {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3196 {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3197 {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3198 {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3199 {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3200 {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3201 {"geode", PROCESSOR_GEODE, CPU_GEODE,
3202 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3203 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3204 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3205 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3206 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3207 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3208 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3209 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3210 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3211 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3212 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3213 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3214 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3215 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3216 {"x86-64", PROCESSOR_K8, CPU_K8,
3217 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3218 {"k8", PROCESSOR_K8, CPU_K8,
3219 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3220 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3221 {"k8-sse3", PROCESSOR_K8, CPU_K8,
3222 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3223 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3224 {"opteron", PROCESSOR_K8, CPU_K8,
3225 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3226 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3227 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3228 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3229 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3230 {"athlon64", PROCESSOR_K8, CPU_K8,
3231 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3232 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3233 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3234 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3235 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3236 {"athlon-fx", PROCESSOR_K8, CPU_K8,
3237 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3238 | PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3239 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3240 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3241 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3242 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3243 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3244 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3245 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3246 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3247 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3248 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3249 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3250 {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3251 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3252 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3253 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3254 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3255 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3256 {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3257 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3258 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3259 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3260 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3261 | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3262 | PTA_XSAVEOPT | PTA_FSGSBASE},
3263 {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3264 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3265 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3266 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3267 | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3268 | PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3269 | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE},
3270 {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3271 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3272 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3273 | PTA_FXSR | PTA_XSAVE},
3274 {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3275 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3276 | PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3277 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3278 | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3279 | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3280
3281 {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3282 PTA_64BIT
3283 | PTA_HLE /* flags are only used for -march switch. */ },
3284 };
3285
3286 /* -mrecip options. */
3287 static struct
3288 {
3289 const char *string; /* option name */
3290 unsigned int mask; /* mask bits to set */
3291 }
3292 const recip_options[] =
3293 {
3294 { "all", RECIP_MASK_ALL },
3295 { "none", RECIP_MASK_NONE },
3296 { "div", RECIP_MASK_DIV },
3297 { "sqrt", RECIP_MASK_SQRT },
3298 { "vec-div", RECIP_MASK_VEC_DIV },
3299 { "vec-sqrt", RECIP_MASK_VEC_SQRT },
3300 };
3301
3302 int const pta_size = ARRAY_SIZE (processor_alias_table);
3303
3304 /* Set up prefix/suffix so the error messages refer to either the command
3305 line argument, or the attribute(target). */
3306 if (main_args_p)
3307 {
3308 prefix = "-m";
3309 suffix = "";
3310 sw = "switch";
3311 }
3312 else
3313 {
3314 prefix = "option(\"";
3315 suffix = "\")";
3316 sw = "attribute";
3317 }
3318
3319 /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3320 TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false. */
3321 if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3322 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3323 #ifdef TARGET_BI_ARCH
3324 else
3325 {
3326 #if TARGET_BI_ARCH == 1
3327 /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3328 is on and OPTION_MASK_ABI_X32 is off. We turn off
3329 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3330 -mx32. */
3331 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3332 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3333 #else
3334 /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3335 on and OPTION_MASK_ABI_64 is off. We turn off
3336 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3337 -m64. */
3338 if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3339 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3340 #endif
3341 }
3342 #endif
3343
3344 if (TARGET_X32_P (opts->x_ix86_isa_flags))
3345 {
3346 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3347 OPTION_MASK_ABI_64 for TARGET_X32. */
3348 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3349 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3350 }
3351 else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3352 opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3353 | OPTION_MASK_ABI_X32
3354 | OPTION_MASK_ABI_64);
3355 else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3356 {
3357 /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3358 OPTION_MASK_ABI_X32 for TARGET_LP64. */
3359 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3360 opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3361 }
3362
3363 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3364 SUBTARGET_OVERRIDE_OPTIONS;
3365 #endif
3366
3367 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3368 SUBSUBTARGET_OVERRIDE_OPTIONS;
3369 #endif
3370
3371 /* -fPIC is the default for x86_64. */
3372 if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3373 opts->x_flag_pic = 2;
3374
3375 /* Need to check -mtune=generic first. */
3376 if (opts->x_ix86_tune_string)
3377 {
3378 /* As special support for cross compilers we read -mtune=native
3379 as -mtune=generic. With native compilers we won't see the
3380 -mtune=native, as it was changed by the driver. */
3381 if (!strcmp (opts->x_ix86_tune_string, "native"))
3382 {
3383 opts->x_ix86_tune_string = "generic";
3384 }
3385 else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3386 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3387 "%stune=k8%s or %stune=generic%s instead as appropriate",
3388 prefix, suffix, prefix, suffix, prefix, suffix);
3389 }
3390 else
3391 {
3392 if (opts->x_ix86_arch_string)
3393 opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3394 if (!opts->x_ix86_tune_string)
3395 {
3396 opts->x_ix86_tune_string
3397 = processor_target_table[TARGET_CPU_DEFAULT].name;
3398 ix86_tune_defaulted = 1;
3399 }
3400
3401 /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3402 or defaulted. We need to use a sensible tune option. */
3403 if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3404 {
3405 opts->x_ix86_tune_string = "generic";
3406 }
3407 }
3408
3409 if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3410 && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3411 {
3412 /* rep; movq isn't available in 32-bit code. */
3413 error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3414 opts->x_ix86_stringop_alg = no_stringop;
3415 }
3416
3417 if (!opts->x_ix86_arch_string)
3418 opts->x_ix86_arch_string
3419 = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3420 ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3421 else
3422 ix86_arch_specified = 1;
3423
3424 if (opts_set->x_ix86_pmode)
3425 {
3426 if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3427 && opts->x_ix86_pmode == PMODE_SI)
3428 || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3429 && opts->x_ix86_pmode == PMODE_DI))
3430 error ("address mode %qs not supported in the %s bit mode",
3431 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3432 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3433 }
3434 else
3435 opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3436 ? PMODE_DI : PMODE_SI;
3437
3438 if (!opts_set->x_ix86_abi)
3439 opts->x_ix86_abi = DEFAULT_ABI;
3440
3441 /* For targets using ms ABI enable ms-extensions, if not
3442 explicit turned off. For non-ms ABI we turn off this
3443 option. */
3444 if (!opts_set->x_flag_ms_extensions)
3445 opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3446
3447 if (opts_set->x_ix86_cmodel)
3448 {
3449 switch (opts->x_ix86_cmodel)
3450 {
3451 case CM_SMALL:
3452 case CM_SMALL_PIC:
3453 if (opts->x_flag_pic)
3454 opts->x_ix86_cmodel = CM_SMALL_PIC;
3455 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3456 error ("code model %qs not supported in the %s bit mode",
3457 "small", "32");
3458 break;
3459
3460 case CM_MEDIUM:
3461 case CM_MEDIUM_PIC:
3462 if (opts->x_flag_pic)
3463 opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3464 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3465 error ("code model %qs not supported in the %s bit mode",
3466 "medium", "32");
3467 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3468 error ("code model %qs not supported in x32 mode",
3469 "medium");
3470 break;
3471
3472 case CM_LARGE:
3473 case CM_LARGE_PIC:
3474 if (opts->x_flag_pic)
3475 opts->x_ix86_cmodel = CM_LARGE_PIC;
3476 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3477 error ("code model %qs not supported in the %s bit mode",
3478 "large", "32");
3479 else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3480 error ("code model %qs not supported in x32 mode",
3481 "large");
3482 break;
3483
3484 case CM_32:
3485 if (opts->x_flag_pic)
3486 error ("code model %s does not support PIC mode", "32");
3487 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3488 error ("code model %qs not supported in the %s bit mode",
3489 "32", "64");
3490 break;
3491
3492 case CM_KERNEL:
3493 if (opts->x_flag_pic)
3494 {
3495 error ("code model %s does not support PIC mode", "kernel");
3496 opts->x_ix86_cmodel = CM_32;
3497 }
3498 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3499 error ("code model %qs not supported in the %s bit mode",
3500 "kernel", "32");
3501 break;
3502
3503 default:
3504 gcc_unreachable ();
3505 }
3506 }
3507 else
3508 {
3509 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3510 use of rip-relative addressing. This eliminates fixups that
3511 would otherwise be needed if this object is to be placed in a
3512 DLL, and is essentially just as efficient as direct addressing. */
3513 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3514 && (TARGET_RDOS || TARGET_PECOFF))
3515 opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3516 else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3517 opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3518 else
3519 opts->x_ix86_cmodel = CM_32;
3520 }
3521 if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3522 {
3523 error ("-masm=intel not supported in this configuration");
3524 opts->x_ix86_asm_dialect = ASM_ATT;
3525 }
3526 if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3527 != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3528 sorry ("%i-bit mode not compiled in",
3529 (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3530
3531 for (i = 0; i < pta_size; i++)
3532 if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3533 {
3534 ix86_schedule = processor_alias_table[i].schedule;
3535 ix86_arch = processor_alias_table[i].processor;
3536 /* Default cpu tuning to the architecture. */
3537 ix86_tune = ix86_arch;
3538
3539 if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3540 && !(processor_alias_table[i].flags & PTA_64BIT))
3541 error ("CPU you selected does not support x86-64 "
3542 "instruction set");
3543
3544 if (processor_alias_table[i].flags & PTA_MMX
3545 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3546 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3547 if (processor_alias_table[i].flags & PTA_3DNOW
3548 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3549 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3550 if (processor_alias_table[i].flags & PTA_3DNOW_A
3551 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3552 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3553 if (processor_alias_table[i].flags & PTA_SSE
3554 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3555 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3556 if (processor_alias_table[i].flags & PTA_SSE2
3557 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3558 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3559 if (processor_alias_table[i].flags & PTA_SSE3
3560 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3561 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3562 if (processor_alias_table[i].flags & PTA_SSSE3
3563 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3564 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3565 if (processor_alias_table[i].flags & PTA_SSE4_1
3566 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3567 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3568 if (processor_alias_table[i].flags & PTA_SSE4_2
3569 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3570 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3571 if (processor_alias_table[i].flags & PTA_AVX
3572 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3573 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3574 if (processor_alias_table[i].flags & PTA_AVX2
3575 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3576 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3577 if (processor_alias_table[i].flags & PTA_FMA
3578 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3579 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3580 if (processor_alias_table[i].flags & PTA_SSE4A
3581 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3582 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3583 if (processor_alias_table[i].flags & PTA_FMA4
3584 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3585 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3586 if (processor_alias_table[i].flags & PTA_XOP
3587 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3588 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3589 if (processor_alias_table[i].flags & PTA_LWP
3590 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3591 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3592 if (processor_alias_table[i].flags & PTA_ABM
3593 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3594 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3595 if (processor_alias_table[i].flags & PTA_BMI
3596 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3597 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3598 if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3599 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3600 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3601 if (processor_alias_table[i].flags & PTA_TBM
3602 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3603 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3604 if (processor_alias_table[i].flags & PTA_BMI2
3605 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3606 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3607 if (processor_alias_table[i].flags & PTA_CX16
3608 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3609 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3610 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3611 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3612 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3613 if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3614 && (processor_alias_table[i].flags & PTA_NO_SAHF))
3615 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3616 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3617 if (processor_alias_table[i].flags & PTA_MOVBE
3618 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3619 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3620 if (processor_alias_table[i].flags & PTA_AES
3621 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3622 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3623 if (processor_alias_table[i].flags & PTA_SHA
3624 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3625 ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3626 if (processor_alias_table[i].flags & PTA_PCLMUL
3627 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3628 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3629 if (processor_alias_table[i].flags & PTA_FSGSBASE
3630 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3631 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3632 if (processor_alias_table[i].flags & PTA_RDRND
3633 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3634 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3635 if (processor_alias_table[i].flags & PTA_F16C
3636 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3637 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3638 if (processor_alias_table[i].flags & PTA_RTM
3639 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3640 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3641 if (processor_alias_table[i].flags & PTA_HLE
3642 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3643 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3644 if (processor_alias_table[i].flags & PTA_PRFCHW
3645 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3646 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3647 if (processor_alias_table[i].flags & PTA_RDSEED
3648 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3649 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3650 if (processor_alias_table[i].flags & PTA_ADX
3651 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3652 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3653 if (processor_alias_table[i].flags & PTA_FXSR
3654 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3655 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3656 if (processor_alias_table[i].flags & PTA_XSAVE
3657 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3658 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3659 if (processor_alias_table[i].flags & PTA_XSAVEOPT
3660 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3661 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3662 if (processor_alias_table[i].flags & PTA_AVX512F
3663 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3664 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3665 if (processor_alias_table[i].flags & PTA_AVX512ER
3666 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3667 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3668 if (processor_alias_table[i].flags & PTA_AVX512PF
3669 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3670 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3671 if (processor_alias_table[i].flags & PTA_AVX512CD
3672 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3673 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3674 if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3675 && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3676 opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3677 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3678 x86_prefetch_sse = true;
3679
3680 break;
3681 }
3682
3683 if (!strcmp (opts->x_ix86_arch_string, "generic"))
3684 error ("generic CPU can be used only for %stune=%s %s",
3685 prefix, suffix, sw);
3686 else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3687 error ("intel CPU can be used only for %stune=%s %s",
3688 prefix, suffix, sw);
3689 else if (i == pta_size)
3690 error ("bad value (%s) for %sarch=%s %s",
3691 opts->x_ix86_arch_string, prefix, suffix, sw);
3692
3693 ix86_arch_mask = 1u << ix86_arch;
3694 for (i = 0; i < X86_ARCH_LAST; ++i)
3695 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3696
3697 for (i = 0; i < pta_size; i++)
3698 if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3699 {
3700 ix86_schedule = processor_alias_table[i].schedule;
3701 ix86_tune = processor_alias_table[i].processor;
3702 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3703 {
3704 if (!(processor_alias_table[i].flags & PTA_64BIT))
3705 {
3706 if (ix86_tune_defaulted)
3707 {
3708 opts->x_ix86_tune_string = "x86-64";
3709 for (i = 0; i < pta_size; i++)
3710 if (! strcmp (opts->x_ix86_tune_string,
3711 processor_alias_table[i].name))
3712 break;
3713 ix86_schedule = processor_alias_table[i].schedule;
3714 ix86_tune = processor_alias_table[i].processor;
3715 }
3716 else
3717 error ("CPU you selected does not support x86-64 "
3718 "instruction set");
3719 }
3720 }
3721 /* Intel CPUs have always interpreted SSE prefetch instructions as
3722 NOPs; so, we can enable SSE prefetch instructions even when
3723 -mtune (rather than -march) points us to a processor that has them.
3724 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3725 higher processors. */
3726 if (TARGET_CMOV
3727 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3728 x86_prefetch_sse = true;
3729 break;
3730 }
3731
3732 if (ix86_tune_specified && i == pta_size)
3733 error ("bad value (%s) for %stune=%s %s",
3734 opts->x_ix86_tune_string, prefix, suffix, sw);
3735
3736 set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3737
3738 #ifndef USE_IX86_FRAME_POINTER
3739 #define USE_IX86_FRAME_POINTER 0
3740 #endif
3741
3742 #ifndef USE_X86_64_FRAME_POINTER
3743 #define USE_X86_64_FRAME_POINTER 0
3744 #endif
3745
3746 /* Set the default values for switches whose default depends on TARGET_64BIT
3747 in case they weren't overwritten by command line options. */
3748 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3749 {
3750 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3751 opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3752 if (opts->x_flag_asynchronous_unwind_tables
3753 && !opts_set->x_flag_unwind_tables
3754 && TARGET_64BIT_MS_ABI)
3755 opts->x_flag_unwind_tables = 1;
3756 if (opts->x_flag_asynchronous_unwind_tables == 2)
3757 opts->x_flag_unwind_tables
3758 = opts->x_flag_asynchronous_unwind_tables = 1;
3759 if (opts->x_flag_pcc_struct_return == 2)
3760 opts->x_flag_pcc_struct_return = 0;
3761 }
3762 else
3763 {
3764 if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3765 opts->x_flag_omit_frame_pointer
3766 = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3767 if (opts->x_flag_asynchronous_unwind_tables == 2)
3768 opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3769 if (opts->x_flag_pcc_struct_return == 2)
3770 opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3771 }
3772
3773 ix86_tune_cost = processor_target_table[ix86_tune].cost;
3774 if (opts->x_optimize_size)
3775 ix86_cost = &ix86_size_cost;
3776 else
3777 ix86_cost = ix86_tune_cost;
3778
3779 /* Arrange to set up i386_stack_locals for all functions. */
3780 init_machine_status = ix86_init_machine_status;
3781
3782 /* Validate -mregparm= value. */
3783 if (opts_set->x_ix86_regparm)
3784 {
3785 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3786 warning (0, "-mregparm is ignored in 64-bit mode");
3787 if (opts->x_ix86_regparm > REGPARM_MAX)
3788 {
3789 error ("-mregparm=%d is not between 0 and %d",
3790 opts->x_ix86_regparm, REGPARM_MAX);
3791 opts->x_ix86_regparm = 0;
3792 }
3793 }
3794 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3795 opts->x_ix86_regparm = REGPARM_MAX;
3796
3797 /* Default align_* from the processor table. */
3798 if (opts->x_align_loops == 0)
3799 {
3800 opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3801 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3802 }
3803 if (opts->x_align_jumps == 0)
3804 {
3805 opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3806 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3807 }
3808 if (opts->x_align_functions == 0)
3809 {
3810 opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3811 }
3812
3813 /* Provide default for -mbranch-cost= value. */
3814 if (!opts_set->x_ix86_branch_cost)
3815 opts->x_ix86_branch_cost = ix86_cost->branch_cost;
3816
3817 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3818 {
3819 opts->x_target_flags
3820 |= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3821
3822 /* Enable by default the SSE and MMX builtins. Do allow the user to
3823 explicitly disable any of these. In particular, disabling SSE and
3824 MMX for kernel code is extremely useful. */
3825 if (!ix86_arch_specified)
3826 opts->x_ix86_isa_flags
3827 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3828 | TARGET_SUBTARGET64_ISA_DEFAULT)
3829 & ~opts->x_ix86_isa_flags_explicit);
3830
3831 if (TARGET_RTD_P (opts->x_target_flags))
3832 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3833 }
3834 else
3835 {
3836 opts->x_target_flags
3837 |= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3838
3839 if (!ix86_arch_specified)
3840 opts->x_ix86_isa_flags
3841 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3842
3843 /* i386 ABI does not specify red zone. It still makes sense to use it
3844 when programmer takes care to stack from being destroyed. */
3845 if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3846 opts->x_target_flags |= MASK_NO_RED_ZONE;
3847 }
3848
3849 /* Keep nonleaf frame pointers. */
3850 if (opts->x_flag_omit_frame_pointer)
3851 opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3852 else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3853 opts->x_flag_omit_frame_pointer = 1;
3854
3855 /* If we're doing fast math, we don't care about comparison order
3856 wrt NaNs. This lets us use a shorter comparison sequence. */
3857 if (opts->x_flag_finite_math_only)
3858 opts->x_target_flags &= ~MASK_IEEE_FP;
3859
3860 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3861 since the insns won't need emulation. */
3862 if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3863 opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3864
3865 /* Likewise, if the target doesn't have a 387, or we've specified
3866 software floating point, don't use 387 inline intrinsics. */
3867 if (!TARGET_80387_P (opts->x_target_flags))
3868 opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3869
3870 /* Turn on MMX builtins for -msse. */
3871 if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3872 opts->x_ix86_isa_flags
3873 |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
3874
3875 /* Enable SSE prefetch. */
3876 if (TARGET_SSE_P (opts->x_ix86_isa_flags)
3877 || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
3878 x86_prefetch_sse = true;
3879
3880 /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
3881 if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
3882 || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
3883 opts->x_ix86_isa_flags
3884 |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
3885
3886 /* Enable popcnt instruction for -msse4.2 or -mabm. */
3887 if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
3888 || TARGET_ABM_P (opts->x_ix86_isa_flags))
3889 opts->x_ix86_isa_flags
3890 |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
3891
3892 /* Enable lzcnt instruction for -mabm. */
3893 if (TARGET_ABM_P(opts->x_ix86_isa_flags))
3894 opts->x_ix86_isa_flags
3895 |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
3896
3897 /* Validate -mpreferred-stack-boundary= value or default it to
3898 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3899 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3900 if (opts_set->x_ix86_preferred_stack_boundary_arg)
3901 {
3902 int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3903 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
3904 int max = (TARGET_SEH ? 4 : 12);
3905
3906 if (opts->x_ix86_preferred_stack_boundary_arg < min
3907 || opts->x_ix86_preferred_stack_boundary_arg > max)
3908 {
3909 if (min == max)
3910 error ("-mpreferred-stack-boundary is not supported "
3911 "for this target");
3912 else
3913 error ("-mpreferred-stack-boundary=%d is not between %d and %d",
3914 opts->x_ix86_preferred_stack_boundary_arg, min, max);
3915 }
3916 else
3917 ix86_preferred_stack_boundary
3918 = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
3919 }
3920
3921 /* Set the default value for -mstackrealign. */
3922 if (opts->x_ix86_force_align_arg_pointer == -1)
3923 opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3924
3925 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3926
3927 /* Validate -mincoming-stack-boundary= value or default it to
3928 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3929 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3930 if (opts_set->x_ix86_incoming_stack_boundary_arg)
3931 {
3932 if (opts->x_ix86_incoming_stack_boundary_arg
3933 < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2)
3934 || opts->x_ix86_incoming_stack_boundary_arg > 12)
3935 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3936 opts->x_ix86_incoming_stack_boundary_arg,
3937 TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 4 : 2);
3938 else
3939 {
3940 ix86_user_incoming_stack_boundary
3941 = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
3942 ix86_incoming_stack_boundary
3943 = ix86_user_incoming_stack_boundary;
3944 }
3945 }
3946
3947 /* Accept -msseregparm only if at least SSE support is enabled. */
3948 if (TARGET_SSEREGPARM_P (opts->x_target_flags)
3949 && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
3950 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3951
3952 if (opts_set->x_ix86_fpmath)
3953 {
3954 if (opts->x_ix86_fpmath & FPMATH_SSE)
3955 {
3956 if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
3957 {
3958 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3959 opts->x_ix86_fpmath = FPMATH_387;
3960 }
3961 else if ((opts->x_ix86_fpmath & FPMATH_387)
3962 && !TARGET_80387_P (opts->x_target_flags))
3963 {
3964 warning (0, "387 instruction set disabled, using SSE arithmetics");
3965 opts->x_ix86_fpmath = FPMATH_SSE;
3966 }
3967 }
3968 }
3969 /* For all chips supporting SSE2, -mfpmath=sse performs better than
3970 fpmath=387. The second is however default at many targets since the
3971 extra 80bit precision of temporaries is considered to be part of ABI.
3972 Overwrite the default at least for -ffast-math.
3973 TODO: -mfpmath=both seems to produce same performing code with bit
3974 smaller binaries. It is however not clear if register allocation is
3975 ready for this setting.
3976 Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
3977 codegen. We may switch to 387 with -ffast-math for size optimized
3978 functions. */
3979 else if (fast_math_flags_set_p (&global_options)
3980 && TARGET_SSE2_P (opts->x_ix86_isa_flags))
3981 opts->x_ix86_fpmath = FPMATH_SSE;
3982 else
3983 opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
3984
3985 /* If the i387 is disabled, then do not return values in it. */
3986 if (!TARGET_80387_P (opts->x_target_flags))
3987 opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
3988
3989 /* Use external vectorized library in vectorizing intrinsics. */
3990 if (opts_set->x_ix86_veclibabi_type)
3991 switch (opts->x_ix86_veclibabi_type)
3992 {
3993 case ix86_veclibabi_type_svml:
3994 ix86_veclib_handler = ix86_veclibabi_svml;
3995 break;
3996
3997 case ix86_veclibabi_type_acml:
3998 ix86_veclib_handler = ix86_veclibabi_acml;
3999 break;
4000
4001 default:
4002 gcc_unreachable ();
4003 }
4004
4005 if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4006 && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4007 && !opts->x_optimize_size)
4008 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4009
4010 /* If stack probes are required, the space used for large function
4011 arguments on the stack must also be probed, so enable
4012 -maccumulate-outgoing-args so this happens in the prologue. */
4013 if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4014 && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4015 {
4016 if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4017 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4018 "for correctness", prefix, suffix);
4019 opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4020 }
4021
4022 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
4023 {
4024 char *p;
4025 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4026 p = strchr (internal_label_prefix, 'X');
4027 internal_label_prefix_len = p - internal_label_prefix;
4028 *p = '\0';
4029 }
4030
4031 /* When scheduling description is not available, disable scheduler pass
4032 so it won't slow down the compilation and make x87 code slower. */
4033 if (!TARGET_SCHEDULE)
4034 opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4035
4036 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4037 ix86_tune_cost->simultaneous_prefetches,
4038 opts->x_param_values,
4039 opts_set->x_param_values);
4040 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4041 ix86_tune_cost->prefetch_block,
4042 opts->x_param_values,
4043 opts_set->x_param_values);
4044 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4045 ix86_tune_cost->l1_cache_size,
4046 opts->x_param_values,
4047 opts_set->x_param_values);
4048 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4049 ix86_tune_cost->l2_cache_size,
4050 opts->x_param_values,
4051 opts_set->x_param_values);
4052
4053 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
4054 if (opts->x_flag_prefetch_loop_arrays < 0
4055 && HAVE_prefetch
4056 && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4057 && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4058 opts->x_flag_prefetch_loop_arrays = 1;
4059
4060 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4061 can be opts->x_optimized to ap = __builtin_next_arg (0). */
4062 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4063 targetm.expand_builtin_va_start = NULL;
4064
4065 if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4066 {
4067 ix86_gen_leave = gen_leave_rex64;
4068 if (Pmode == DImode)
4069 {
4070 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4071 ix86_gen_tls_local_dynamic_base_64
4072 = gen_tls_local_dynamic_base_64_di;
4073 }
4074 else
4075 {
4076 ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4077 ix86_gen_tls_local_dynamic_base_64
4078 = gen_tls_local_dynamic_base_64_si;
4079 }
4080 }
4081 else
4082 ix86_gen_leave = gen_leave;
4083
4084 if (Pmode == DImode)
4085 {
4086 ix86_gen_add3 = gen_adddi3;
4087 ix86_gen_sub3 = gen_subdi3;
4088 ix86_gen_sub3_carry = gen_subdi3_carry;
4089 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4090 ix86_gen_andsp = gen_anddi3;
4091 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4092 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4093 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4094 ix86_gen_monitor = gen_sse3_monitor_di;
4095 }
4096 else
4097 {
4098 ix86_gen_add3 = gen_addsi3;
4099 ix86_gen_sub3 = gen_subsi3;
4100 ix86_gen_sub3_carry = gen_subsi3_carry;
4101 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4102 ix86_gen_andsp = gen_andsi3;
4103 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4104 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4105 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4106 ix86_gen_monitor = gen_sse3_monitor_si;
4107 }
4108
4109 #ifdef USE_IX86_CLD
4110 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
4111 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4112 opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4113 #endif
4114
4115 if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4116 {
4117 if (opts->x_flag_fentry > 0)
4118 sorry ("-mfentry isn%'t supported for 32-bit in combination "
4119 "with -fpic");
4120 opts->x_flag_fentry = 0;
4121 }
4122 else if (TARGET_SEH)
4123 {
4124 if (opts->x_flag_fentry == 0)
4125 sorry ("-mno-fentry isn%'t compatible with SEH");
4126 opts->x_flag_fentry = 1;
4127 }
4128 else if (opts->x_flag_fentry < 0)
4129 {
4130 #if defined(PROFILE_BEFORE_PROLOGUE)
4131 opts->x_flag_fentry = 1;
4132 #else
4133 opts->x_flag_fentry = 0;
4134 #endif
4135 }
4136
4137 /* When not opts->x_optimize for size, enable vzeroupper optimization for
4138 TARGET_AVX with -fexpensive-optimizations and split 32-byte
4139 AVX unaligned load/store. */
4140 if (!opts->x_optimize_size)
4141 {
4142 if (flag_expensive_optimizations
4143 && !(opts_set->x_target_flags & MASK_VZEROUPPER))
4144 opts->x_target_flags |= MASK_VZEROUPPER;
4145 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4146 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4147 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4148 if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4149 && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4150 opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4151 /* Enable 128-bit AVX instruction generation
4152 for the auto-vectorizer. */
4153 if (TARGET_AVX128_OPTIMAL
4154 && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4155 opts->x_target_flags |= MASK_PREFER_AVX128;
4156 }
4157
4158 if (opts->x_ix86_recip_name)
4159 {
4160 char *p = ASTRDUP (opts->x_ix86_recip_name);
4161 char *q;
4162 unsigned int mask, i;
4163 bool invert;
4164
4165 while ((q = strtok (p, ",")) != NULL)
4166 {
4167 p = NULL;
4168 if (*q == '!')
4169 {
4170 invert = true;
4171 q++;
4172 }
4173 else
4174 invert = false;
4175
4176 if (!strcmp (q, "default"))
4177 mask = RECIP_MASK_ALL;
4178 else
4179 {
4180 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4181 if (!strcmp (q, recip_options[i].string))
4182 {
4183 mask = recip_options[i].mask;
4184 break;
4185 }
4186
4187 if (i == ARRAY_SIZE (recip_options))
4188 {
4189 error ("unknown option for -mrecip=%s", q);
4190 invert = false;
4191 mask = RECIP_MASK_NONE;
4192 }
4193 }
4194
4195 opts->x_recip_mask_explicit |= mask;
4196 if (invert)
4197 opts->x_recip_mask &= ~mask;
4198 else
4199 opts->x_recip_mask |= mask;
4200 }
4201 }
4202
4203 if (TARGET_RECIP_P (opts->x_target_flags))
4204 opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4205 else if (opts_set->x_target_flags & MASK_RECIP)
4206 opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4207
4208 /* Default long double to 64-bit for 32-bit Bionic and to __float128
4209 for 64-bit Bionic. */
4210 if (TARGET_HAS_BIONIC
4211 && !(opts_set->x_target_flags
4212 & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4213 opts->x_target_flags |= (TARGET_64BIT
4214 ? MASK_LONG_DOUBLE_128
4215 : MASK_LONG_DOUBLE_64);
4216
4217 /* Only one of them can be active. */
4218 gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4219 || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4220
4221 /* Save the initial options in case the user does function specific
4222 options. */
4223 if (main_args_p)
4224 target_option_default_node = target_option_current_node
4225 = build_target_option_node (opts);
4226
4227 /* Handle stack protector */
4228 if (!opts_set->x_ix86_stack_protector_guard)
4229 opts->x_ix86_stack_protector_guard
4230 = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4231
4232 /* Handle -mmemcpy-strategy= and -mmemset-strategy= */
4233 if (opts->x_ix86_tune_memcpy_strategy)
4234 {
4235 char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4236 ix86_parse_stringop_strategy_string (str, false);
4237 free (str);
4238 }
4239
4240 if (opts->x_ix86_tune_memset_strategy)
4241 {
4242 char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4243 ix86_parse_stringop_strategy_string (str, true);
4244 free (str);
4245 }
4246 }
4247
4248 /* Implement the TARGET_OPTION_OVERRIDE hook. */
4249
4250 static void
4251 ix86_option_override (void)
4252 {
4253 opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4254 static struct register_pass_info insert_vzeroupper_info
4255 = { pass_insert_vzeroupper, "reload",
4256 1, PASS_POS_INSERT_AFTER
4257 };
4258
4259 ix86_option_override_internal (true, &global_options, &global_options_set);
4260
4261
4262 /* This needs to be done at start up. It's convenient to do it here. */
4263 register_pass (&insert_vzeroupper_info);
4264 }
4265
4266 /* Update register usage after having seen the compiler flags. */
4267
4268 static void
4269 ix86_conditional_register_usage (void)
4270 {
4271 int i, c_mask;
4272 unsigned int j;
4273
4274 /* The PIC register, if it exists, is fixed. */
4275 j = PIC_OFFSET_TABLE_REGNUM;
4276 if (j != INVALID_REGNUM)
4277 fixed_regs[j] = call_used_regs[j] = 1;
4278
4279 /* For 32-bit targets, squash the REX registers. */
4280 if (! TARGET_64BIT)
4281 {
4282 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4283 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4284 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4285 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4286 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4287 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4288 }
4289
4290 /* See the definition of CALL_USED_REGISTERS in i386.h. */
4291 c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4292 : TARGET_64BIT ? (1 << 2)
4293 : (1 << 1));
4294
4295 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4296
4297 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4298 {
4299 /* Set/reset conditionally defined registers from
4300 CALL_USED_REGISTERS initializer. */
4301 if (call_used_regs[i] > 1)
4302 call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4303
4304 /* Calculate registers of CLOBBERED_REGS register set
4305 as call used registers from GENERAL_REGS register set. */
4306 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4307 && call_used_regs[i])
4308 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4309 }
4310
4311 /* If MMX is disabled, squash the registers. */
4312 if (! TARGET_MMX)
4313 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4314 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4315 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4316
4317 /* If SSE is disabled, squash the registers. */
4318 if (! TARGET_SSE)
4319 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4320 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4321 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4322
4323 /* If the FPU is disabled, squash the registers. */
4324 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4325 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4326 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4327 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4328
4329 /* If AVX512F is disabled, squash the registers. */
4330 if (! TARGET_AVX512F)
4331 {
4332 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4333 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4334
4335 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4336 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4337 }
4338 }
4339
4340 \f
4341 /* Save the current options */
4342
4343 static void
4344 ix86_function_specific_save (struct cl_target_option *ptr,
4345 struct gcc_options *opts)
4346 {
4347 ptr->arch = ix86_arch;
4348 ptr->schedule = ix86_schedule;
4349 ptr->tune = ix86_tune;
4350 ptr->branch_cost = ix86_branch_cost;
4351 ptr->tune_defaulted = ix86_tune_defaulted;
4352 ptr->arch_specified = ix86_arch_specified;
4353 ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4354 ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4355 ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4356 ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4357 ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4358 ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4359 ptr->x_ix86_abi = opts->x_ix86_abi;
4360 ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4361 ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4362 ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4363 ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4364 ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4365 ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4366 ptr->x_ix86_pmode = opts->x_ix86_pmode;
4367 ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4368 ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4369 ptr->x_ix86_regparm = opts->x_ix86_regparm;
4370 ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4371 ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4372 ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4373 ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4374 ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4375 ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4376 ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4377 ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4378 ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4379 ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4380
4381 /* The fields are char but the variables are not; make sure the
4382 values fit in the fields. */
4383 gcc_assert (ptr->arch == ix86_arch);
4384 gcc_assert (ptr->schedule == ix86_schedule);
4385 gcc_assert (ptr->tune == ix86_tune);
4386 gcc_assert (ptr->branch_cost == ix86_branch_cost);
4387 }
4388
4389 /* Restore the current options */
4390
4391 static void
4392 ix86_function_specific_restore (struct gcc_options *opts,
4393 struct cl_target_option *ptr)
4394 {
4395 enum processor_type old_tune = ix86_tune;
4396 enum processor_type old_arch = ix86_arch;
4397 unsigned int ix86_arch_mask;
4398 int i;
4399
4400 /* We don't change -fPIC. */
4401 opts->x_flag_pic = flag_pic;
4402
4403 ix86_arch = (enum processor_type) ptr->arch;
4404 ix86_schedule = (enum attr_cpu) ptr->schedule;
4405 ix86_tune = (enum processor_type) ptr->tune;
4406 opts->x_ix86_branch_cost = ptr->branch_cost;
4407 ix86_tune_defaulted = ptr->tune_defaulted;
4408 ix86_arch_specified = ptr->arch_specified;
4409 opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4410 opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4411 opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4412 opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4413 opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4414 opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4415 opts->x_ix86_abi = ptr->x_ix86_abi;
4416 opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4417 opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4418 opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4419 opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4420 opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4421 opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4422 opts->x_ix86_pmode = ptr->x_ix86_pmode;
4423 opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4424 opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4425 opts->x_ix86_regparm = ptr->x_ix86_regparm;
4426 opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4427 opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4428 opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4429 opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4430 opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4431 opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4432 opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4433 opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4434 opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4435 opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4436
4437 /* Recreate the arch feature tests if the arch changed */
4438 if (old_arch != ix86_arch)
4439 {
4440 ix86_arch_mask = 1u << ix86_arch;
4441 for (i = 0; i < X86_ARCH_LAST; ++i)
4442 ix86_arch_features[i]
4443 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4444 }
4445
4446 /* Recreate the tune optimization tests */
4447 if (old_tune != ix86_tune)
4448 set_ix86_tune_features (ix86_tune, false);
4449 }
4450
4451 /* Print the current options */
4452
4453 static void
4454 ix86_function_specific_print (FILE *file, int indent,
4455 struct cl_target_option *ptr)
4456 {
4457 char *target_string
4458 = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4459 NULL, NULL, ptr->x_ix86_fpmath, false);
4460
4461 gcc_assert (ptr->arch < PROCESSOR_max);
4462 fprintf (file, "%*sarch = %d (%s)\n",
4463 indent, "",
4464 ptr->arch, processor_target_table[ptr->arch].name);
4465
4466 gcc_assert (ptr->tune < PROCESSOR_max);
4467 fprintf (file, "%*stune = %d (%s)\n",
4468 indent, "",
4469 ptr->tune, processor_target_table[ptr->tune].name);
4470
4471 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4472
4473 if (target_string)
4474 {
4475 fprintf (file, "%*s%s\n", indent, "", target_string);
4476 free (target_string);
4477 }
4478 }
4479
4480 \f
4481 /* Inner function to process the attribute((target(...))), take an argument and
4482 set the current options from the argument. If we have a list, recursively go
4483 over the list. */
4484
4485 static bool
4486 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4487 struct gcc_options *opts,
4488 struct gcc_options *opts_set,
4489 struct gcc_options *enum_opts_set)
4490 {
4491 char *next_optstr;
4492 bool ret = true;
4493
4494 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4495 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4496 #define IX86_ATTR_ENUM(S,O) { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4497 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4498 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
4499
4500 enum ix86_opt_type
4501 {
4502 ix86_opt_unknown,
4503 ix86_opt_yes,
4504 ix86_opt_no,
4505 ix86_opt_str,
4506 ix86_opt_enum,
4507 ix86_opt_isa
4508 };
4509
4510 static const struct
4511 {
4512 const char *string;
4513 size_t len;
4514 enum ix86_opt_type type;
4515 int opt;
4516 int mask;
4517 } attrs[] = {
4518 /* isa options */
4519 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
4520 IX86_ATTR_ISA ("abm", OPT_mabm),
4521 IX86_ATTR_ISA ("bmi", OPT_mbmi),
4522 IX86_ATTR_ISA ("bmi2", OPT_mbmi2),
4523 IX86_ATTR_ISA ("lzcnt", OPT_mlzcnt),
4524 IX86_ATTR_ISA ("tbm", OPT_mtbm),
4525 IX86_ATTR_ISA ("aes", OPT_maes),
4526 IX86_ATTR_ISA ("sha", OPT_msha),
4527 IX86_ATTR_ISA ("avx", OPT_mavx),
4528 IX86_ATTR_ISA ("avx2", OPT_mavx2),
4529 IX86_ATTR_ISA ("avx512f", OPT_mavx512f),
4530 IX86_ATTR_ISA ("avx512pf", OPT_mavx512pf),
4531 IX86_ATTR_ISA ("avx512er", OPT_mavx512er),
4532 IX86_ATTR_ISA ("avx512cd", OPT_mavx512cd),
4533 IX86_ATTR_ISA ("mmx", OPT_mmmx),
4534 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
4535 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
4536 IX86_ATTR_ISA ("sse", OPT_msse),
4537 IX86_ATTR_ISA ("sse2", OPT_msse2),
4538 IX86_ATTR_ISA ("sse3", OPT_msse3),
4539 IX86_ATTR_ISA ("sse4", OPT_msse4),
4540 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
4541 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
4542 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
4543 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
4544 IX86_ATTR_ISA ("fma4", OPT_mfma4),
4545 IX86_ATTR_ISA ("fma", OPT_mfma),
4546 IX86_ATTR_ISA ("xop", OPT_mxop),
4547 IX86_ATTR_ISA ("lwp", OPT_mlwp),
4548 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
4549 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
4550 IX86_ATTR_ISA ("f16c", OPT_mf16c),
4551 IX86_ATTR_ISA ("rtm", OPT_mrtm),
4552 IX86_ATTR_ISA ("hle", OPT_mhle),
4553 IX86_ATTR_ISA ("prfchw", OPT_mprfchw),
4554 IX86_ATTR_ISA ("rdseed", OPT_mrdseed),
4555 IX86_ATTR_ISA ("adx", OPT_madx),
4556 IX86_ATTR_ISA ("fxsr", OPT_mfxsr),
4557 IX86_ATTR_ISA ("xsave", OPT_mxsave),
4558 IX86_ATTR_ISA ("xsaveopt", OPT_mxsaveopt),
4559 IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4560
4561 /* enum options */
4562 IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
4563
4564 /* string options */
4565 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
4566 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
4567
4568 /* flag options */
4569 IX86_ATTR_YES ("cld",
4570 OPT_mcld,
4571 MASK_CLD),
4572
4573 IX86_ATTR_NO ("fancy-math-387",
4574 OPT_mfancy_math_387,
4575 MASK_NO_FANCY_MATH_387),
4576
4577 IX86_ATTR_YES ("ieee-fp",
4578 OPT_mieee_fp,
4579 MASK_IEEE_FP),
4580
4581 IX86_ATTR_YES ("inline-all-stringops",
4582 OPT_minline_all_stringops,
4583 MASK_INLINE_ALL_STRINGOPS),
4584
4585 IX86_ATTR_YES ("inline-stringops-dynamically",
4586 OPT_minline_stringops_dynamically,
4587 MASK_INLINE_STRINGOPS_DYNAMICALLY),
4588
4589 IX86_ATTR_NO ("align-stringops",
4590 OPT_mno_align_stringops,
4591 MASK_NO_ALIGN_STRINGOPS),
4592
4593 IX86_ATTR_YES ("recip",
4594 OPT_mrecip,
4595 MASK_RECIP),
4596
4597 };
4598
4599 /* If this is a list, recurse to get the options. */
4600 if (TREE_CODE (args) == TREE_LIST)
4601 {
4602 bool ret = true;
4603
4604 for (; args; args = TREE_CHAIN (args))
4605 if (TREE_VALUE (args)
4606 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4607 p_strings, opts, opts_set,
4608 enum_opts_set))
4609 ret = false;
4610
4611 return ret;
4612 }
4613
4614 else if (TREE_CODE (args) != STRING_CST)
4615 {
4616 error ("attribute %<target%> argument not a string");
4617 return false;
4618 }
4619
4620 /* Handle multiple arguments separated by commas. */
4621 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4622
4623 while (next_optstr && *next_optstr != '\0')
4624 {
4625 char *p = next_optstr;
4626 char *orig_p = p;
4627 char *comma = strchr (next_optstr, ',');
4628 const char *opt_string;
4629 size_t len, opt_len;
4630 int opt;
4631 bool opt_set_p;
4632 char ch;
4633 unsigned i;
4634 enum ix86_opt_type type = ix86_opt_unknown;
4635 int mask = 0;
4636
4637 if (comma)
4638 {
4639 *comma = '\0';
4640 len = comma - next_optstr;
4641 next_optstr = comma + 1;
4642 }
4643 else
4644 {
4645 len = strlen (p);
4646 next_optstr = NULL;
4647 }
4648
4649 /* Recognize no-xxx. */
4650 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4651 {
4652 opt_set_p = false;
4653 p += 3;
4654 len -= 3;
4655 }
4656 else
4657 opt_set_p = true;
4658
4659 /* Find the option. */
4660 ch = *p;
4661 opt = N_OPTS;
4662 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4663 {
4664 type = attrs[i].type;
4665 opt_len = attrs[i].len;
4666 if (ch == attrs[i].string[0]
4667 && ((type != ix86_opt_str && type != ix86_opt_enum)
4668 ? len == opt_len
4669 : len > opt_len)
4670 && memcmp (p, attrs[i].string, opt_len) == 0)
4671 {
4672 opt = attrs[i].opt;
4673 mask = attrs[i].mask;
4674 opt_string = attrs[i].string;
4675 break;
4676 }
4677 }
4678
4679 /* Process the option. */
4680 if (opt == N_OPTS)
4681 {
4682 error ("attribute(target(\"%s\")) is unknown", orig_p);
4683 ret = false;
4684 }
4685
4686 else if (type == ix86_opt_isa)
4687 {
4688 struct cl_decoded_option decoded;
4689
4690 generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4691 ix86_handle_option (opts, opts_set,
4692 &decoded, input_location);
4693 }
4694
4695 else if (type == ix86_opt_yes || type == ix86_opt_no)
4696 {
4697 if (type == ix86_opt_no)
4698 opt_set_p = !opt_set_p;
4699
4700 if (opt_set_p)
4701 opts->x_target_flags |= mask;
4702 else
4703 opts->x_target_flags &= ~mask;
4704 }
4705
4706 else if (type == ix86_opt_str)
4707 {
4708 if (p_strings[opt])
4709 {
4710 error ("option(\"%s\") was already specified", opt_string);
4711 ret = false;
4712 }
4713 else
4714 p_strings[opt] = xstrdup (p + opt_len);
4715 }
4716
4717 else if (type == ix86_opt_enum)
4718 {
4719 bool arg_ok;
4720 int value;
4721
4722 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4723 if (arg_ok)
4724 set_option (opts, enum_opts_set, opt, value,
4725 p + opt_len, DK_UNSPECIFIED, input_location,
4726 global_dc);
4727 else
4728 {
4729 error ("attribute(target(\"%s\")) is unknown", orig_p);
4730 ret = false;
4731 }
4732 }
4733
4734 else
4735 gcc_unreachable ();
4736 }
4737
4738 return ret;
4739 }
4740
4741 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4742
4743 tree
4744 ix86_valid_target_attribute_tree (tree args,
4745 struct gcc_options *opts,
4746 struct gcc_options *opts_set)
4747 {
4748 const char *orig_arch_string = opts->x_ix86_arch_string;
4749 const char *orig_tune_string = opts->x_ix86_tune_string;
4750 enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4751 int orig_tune_defaulted = ix86_tune_defaulted;
4752 int orig_arch_specified = ix86_arch_specified;
4753 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4754 tree t = NULL_TREE;
4755 int i;
4756 struct cl_target_option *def
4757 = TREE_TARGET_OPTION (target_option_default_node);
4758 struct gcc_options enum_opts_set;
4759
4760 memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4761
4762 /* Process each of the options on the chain. */
4763 if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4764 opts_set, &enum_opts_set))
4765 return error_mark_node;
4766
4767 /* If the changed options are different from the default, rerun
4768 ix86_option_override_internal, and then save the options away.
4769 The string options are are attribute options, and will be undone
4770 when we copy the save structure. */
4771 if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4772 || opts->x_target_flags != def->x_target_flags
4773 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4774 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4775 || enum_opts_set.x_ix86_fpmath)
4776 {
4777 /* If we are using the default tune= or arch=, undo the string assigned,
4778 and use the default. */
4779 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4780 opts->x_ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4781 else if (!orig_arch_specified)
4782 opts->x_ix86_arch_string = NULL;
4783
4784 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4785 opts->x_ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4786 else if (orig_tune_defaulted)
4787 opts->x_ix86_tune_string = NULL;
4788
4789 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4790 if (enum_opts_set.x_ix86_fpmath)
4791 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4792 else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
4793 && TARGET_SSE_P (opts->x_ix86_isa_flags))
4794 {
4795 opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
4796 opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
4797 }
4798
4799 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4800 ix86_option_override_internal (false, opts, opts_set);
4801
4802 /* Add any builtin functions with the new isa if any. */
4803 ix86_add_new_builtins (opts->x_ix86_isa_flags);
4804
4805 /* Save the current options unless we are validating options for
4806 #pragma. */
4807 t = build_target_option_node (opts);
4808
4809 opts->x_ix86_arch_string = orig_arch_string;
4810 opts->x_ix86_tune_string = orig_tune_string;
4811 opts_set->x_ix86_fpmath = orig_fpmath_set;
4812
4813 /* Free up memory allocated to hold the strings */
4814 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4815 free (option_strings[i]);
4816 }
4817
4818 return t;
4819 }
4820
4821 /* Hook to validate attribute((target("string"))). */
4822
4823 static bool
4824 ix86_valid_target_attribute_p (tree fndecl,
4825 tree ARG_UNUSED (name),
4826 tree args,
4827 int ARG_UNUSED (flags))
4828 {
4829 struct gcc_options func_options;
4830 tree new_target, new_optimize;
4831 bool ret = true;
4832
4833 /* attribute((target("default"))) does nothing, beyond
4834 affecting multi-versioning. */
4835 if (TREE_VALUE (args)
4836 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
4837 && TREE_CHAIN (args) == NULL_TREE
4838 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
4839 return true;
4840
4841 tree old_optimize = build_optimization_node (&global_options);
4842
4843 /* Get the optimization options of the current function. */
4844 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4845
4846 if (!func_optimize)
4847 func_optimize = old_optimize;
4848
4849 /* Init func_options. */
4850 memset (&func_options, 0, sizeof (func_options));
4851 init_options_struct (&func_options, NULL);
4852 lang_hooks.init_options_struct (&func_options);
4853
4854 cl_optimization_restore (&func_options,
4855 TREE_OPTIMIZATION (func_optimize));
4856
4857 /* Initialize func_options to the default before its target options can
4858 be set. */
4859 cl_target_option_restore (&func_options,
4860 TREE_TARGET_OPTION (target_option_default_node));
4861
4862 new_target = ix86_valid_target_attribute_tree (args, &func_options,
4863 &global_options_set);
4864
4865 new_optimize = build_optimization_node (&func_options);
4866
4867 if (new_target == error_mark_node)
4868 ret = false;
4869
4870 else if (fndecl && new_target)
4871 {
4872 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4873
4874 if (old_optimize != new_optimize)
4875 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4876 }
4877
4878 return ret;
4879 }
4880
4881 \f
4882 /* Hook to determine if one function can safely inline another. */
4883
4884 static bool
4885 ix86_can_inline_p (tree caller, tree callee)
4886 {
4887 bool ret = false;
4888 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4889 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4890
4891 /* If callee has no option attributes, then it is ok to inline. */
4892 if (!callee_tree)
4893 ret = true;
4894
4895 /* If caller has no option attributes, but callee does then it is not ok to
4896 inline. */
4897 else if (!caller_tree)
4898 ret = false;
4899
4900 else
4901 {
4902 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4903 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4904
4905 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4906 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4907 function. */
4908 if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
4909 != callee_opts->x_ix86_isa_flags)
4910 ret = false;
4911
4912 /* See if we have the same non-isa options. */
4913 else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
4914 ret = false;
4915
4916 /* See if arch, tune, etc. are the same. */
4917 else if (caller_opts->arch != callee_opts->arch)
4918 ret = false;
4919
4920 else if (caller_opts->tune != callee_opts->tune)
4921 ret = false;
4922
4923 else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
4924 ret = false;
4925
4926 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4927 ret = false;
4928
4929 else
4930 ret = true;
4931 }
4932
4933 return ret;
4934 }
4935
4936 \f
4937 /* Remember the last target of ix86_set_current_function. */
4938 static GTY(()) tree ix86_previous_fndecl;
4939
4940 /* Invalidate ix86_previous_fndecl cache. */
4941 void
4942 ix86_reset_previous_fndecl (void)
4943 {
4944 ix86_previous_fndecl = NULL_TREE;
4945 }
4946
4947 /* Establish appropriate back-end context for processing the function
4948 FNDECL. The argument might be NULL to indicate processing at top
4949 level, outside of any function scope. */
4950 static void
4951 ix86_set_current_function (tree fndecl)
4952 {
4953 /* Only change the context if the function changes. This hook is called
4954 several times in the course of compiling a function, and we don't want to
4955 slow things down too much or call target_reinit when it isn't safe. */
4956 if (fndecl && fndecl != ix86_previous_fndecl)
4957 {
4958 tree old_tree = (ix86_previous_fndecl
4959 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4960 : NULL_TREE);
4961
4962 tree new_tree = (fndecl
4963 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4964 : NULL_TREE);
4965
4966 ix86_previous_fndecl = fndecl;
4967 if (old_tree == new_tree)
4968 ;
4969
4970 else if (new_tree)
4971 {
4972 cl_target_option_restore (&global_options,
4973 TREE_TARGET_OPTION (new_tree));
4974 if (TREE_TARGET_GLOBALS (new_tree))
4975 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4976 else
4977 TREE_TARGET_GLOBALS (new_tree)
4978 = save_target_globals_default_opts ();
4979 }
4980
4981 else if (old_tree)
4982 {
4983 new_tree = target_option_current_node;
4984 cl_target_option_restore (&global_options,
4985 TREE_TARGET_OPTION (new_tree));
4986 if (TREE_TARGET_GLOBALS (new_tree))
4987 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
4988 else if (new_tree == target_option_default_node)
4989 restore_target_globals (&default_target_globals);
4990 else
4991 TREE_TARGET_GLOBALS (new_tree)
4992 = save_target_globals_default_opts ();
4993 }
4994 }
4995 }
4996
4997 \f
4998 /* Return true if this goes in large data/bss. */
4999
5000 static bool
5001 ix86_in_large_data_p (tree exp)
5002 {
5003 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5004 return false;
5005
5006 /* Functions are never large data. */
5007 if (TREE_CODE (exp) == FUNCTION_DECL)
5008 return false;
5009
5010 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5011 {
5012 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
5013 if (strcmp (section, ".ldata") == 0
5014 || strcmp (section, ".lbss") == 0)
5015 return true;
5016 return false;
5017 }
5018 else
5019 {
5020 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5021
5022 /* If this is an incomplete type with size 0, then we can't put it
5023 in data because it might be too big when completed. */
5024 if (!size || size > ix86_section_threshold)
5025 return true;
5026 }
5027
5028 return false;
5029 }
5030
5031 /* Switch to the appropriate section for output of DECL.
5032 DECL is either a `VAR_DECL' node or a constant of some sort.
5033 RELOC indicates whether forming the initial value of DECL requires
5034 link-time relocations. */
5035
5036 ATTRIBUTE_UNUSED static section *
5037 x86_64_elf_select_section (tree decl, int reloc,
5038 unsigned HOST_WIDE_INT align)
5039 {
5040 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5041 && ix86_in_large_data_p (decl))
5042 {
5043 const char *sname = NULL;
5044 unsigned int flags = SECTION_WRITE;
5045 switch (categorize_decl_for_section (decl, reloc))
5046 {
5047 case SECCAT_DATA:
5048 sname = ".ldata";
5049 break;
5050 case SECCAT_DATA_REL:
5051 sname = ".ldata.rel";
5052 break;
5053 case SECCAT_DATA_REL_LOCAL:
5054 sname = ".ldata.rel.local";
5055 break;
5056 case SECCAT_DATA_REL_RO:
5057 sname = ".ldata.rel.ro";
5058 break;
5059 case SECCAT_DATA_REL_RO_LOCAL:
5060 sname = ".ldata.rel.ro.local";
5061 break;
5062 case SECCAT_BSS:
5063 sname = ".lbss";
5064 flags |= SECTION_BSS;
5065 break;
5066 case SECCAT_RODATA:
5067 case SECCAT_RODATA_MERGE_STR:
5068 case SECCAT_RODATA_MERGE_STR_INIT:
5069 case SECCAT_RODATA_MERGE_CONST:
5070 sname = ".lrodata";
5071 flags = 0;
5072 break;
5073 case SECCAT_SRODATA:
5074 case SECCAT_SDATA:
5075 case SECCAT_SBSS:
5076 gcc_unreachable ();
5077 case SECCAT_TEXT:
5078 case SECCAT_TDATA:
5079 case SECCAT_TBSS:
5080 /* We don't split these for medium model. Place them into
5081 default sections and hope for best. */
5082 break;
5083 }
5084 if (sname)
5085 {
5086 /* We might get called with string constants, but get_named_section
5087 doesn't like them as they are not DECLs. Also, we need to set
5088 flags in that case. */
5089 if (!DECL_P (decl))
5090 return get_section (sname, flags, NULL);
5091 return get_named_section (decl, sname, reloc);
5092 }
5093 }
5094 return default_elf_select_section (decl, reloc, align);
5095 }
5096
5097 /* Select a set of attributes for section NAME based on the properties
5098 of DECL and whether or not RELOC indicates that DECL's initializer
5099 might contain runtime relocations. */
5100
5101 static unsigned int ATTRIBUTE_UNUSED
5102 x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5103 {
5104 unsigned int flags = default_section_type_flags (decl, name, reloc);
5105
5106 if (decl == NULL_TREE
5107 && (strcmp (name, ".ldata.rel.ro") == 0
5108 || strcmp (name, ".ldata.rel.ro.local") == 0))
5109 flags |= SECTION_RELRO;
5110
5111 if (strcmp (name, ".lbss") == 0
5112 || strncmp (name, ".lbss.", 5) == 0
5113 || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5114 flags |= SECTION_BSS;
5115
5116 return flags;
5117 }
5118
5119 /* Build up a unique section name, expressed as a
5120 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5121 RELOC indicates whether the initial value of EXP requires
5122 link-time relocations. */
5123
5124 static void ATTRIBUTE_UNUSED
5125 x86_64_elf_unique_section (tree decl, int reloc)
5126 {
5127 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5128 && ix86_in_large_data_p (decl))
5129 {
5130 const char *prefix = NULL;
5131 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
5132 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
5133
5134 switch (categorize_decl_for_section (decl, reloc))
5135 {
5136 case SECCAT_DATA:
5137 case SECCAT_DATA_REL:
5138 case SECCAT_DATA_REL_LOCAL:
5139 case SECCAT_DATA_REL_RO:
5140 case SECCAT_DATA_REL_RO_LOCAL:
5141 prefix = one_only ? ".ld" : ".ldata";
5142 break;
5143 case SECCAT_BSS:
5144 prefix = one_only ? ".lb" : ".lbss";
5145 break;
5146 case SECCAT_RODATA:
5147 case SECCAT_RODATA_MERGE_STR:
5148 case SECCAT_RODATA_MERGE_STR_INIT:
5149 case SECCAT_RODATA_MERGE_CONST:
5150 prefix = one_only ? ".lr" : ".lrodata";
5151 break;
5152 case SECCAT_SRODATA:
5153 case SECCAT_SDATA:
5154 case SECCAT_SBSS:
5155 gcc_unreachable ();
5156 case SECCAT_TEXT:
5157 case SECCAT_TDATA:
5158 case SECCAT_TBSS:
5159 /* We don't split these for medium model. Place them into
5160 default sections and hope for best. */
5161 break;
5162 }
5163 if (prefix)
5164 {
5165 const char *name, *linkonce;
5166 char *string;
5167
5168 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5169 name = targetm.strip_name_encoding (name);
5170
5171 /* If we're using one_only, then there needs to be a .gnu.linkonce
5172 prefix to the section name. */
5173 linkonce = one_only ? ".gnu.linkonce" : "";
5174
5175 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5176
5177 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
5178 return;
5179 }
5180 }
5181 default_unique_section (decl, reloc);
5182 }
5183
5184 #ifdef COMMON_ASM_OP
5185 /* This says how to output assembler code to declare an
5186 uninitialized external linkage data object.
5187
5188 For medium model x86-64 we need to use .largecomm opcode for
5189 large objects. */
5190 void
5191 x86_elf_aligned_common (FILE *file,
5192 const char *name, unsigned HOST_WIDE_INT size,
5193 int align)
5194 {
5195 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5196 && size > (unsigned int)ix86_section_threshold)
5197 fputs (".largecomm\t", file);
5198 else
5199 fputs (COMMON_ASM_OP, file);
5200 assemble_name (file, name);
5201 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5202 size, align / BITS_PER_UNIT);
5203 }
5204 #endif
5205
5206 /* Utility function for targets to use in implementing
5207 ASM_OUTPUT_ALIGNED_BSS. */
5208
5209 void
5210 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
5211 const char *name, unsigned HOST_WIDE_INT size,
5212 int align)
5213 {
5214 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5215 && size > (unsigned int)ix86_section_threshold)
5216 switch_to_section (get_named_section (decl, ".lbss", 0));
5217 else
5218 switch_to_section (bss_section);
5219 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5220 #ifdef ASM_DECLARE_OBJECT_NAME
5221 last_assemble_variable_decl = decl;
5222 ASM_DECLARE_OBJECT_NAME (file, name, decl);
5223 #else
5224 /* Standard thing is just output label for the object. */
5225 ASM_OUTPUT_LABEL (file, name);
5226 #endif /* ASM_DECLARE_OBJECT_NAME */
5227 ASM_OUTPUT_SKIP (file, size ? size : 1);
5228 }
5229 \f
5230 /* Decide whether we must probe the stack before any space allocation
5231 on this target. It's essentially TARGET_STACK_PROBE except when
5232 -fstack-check causes the stack to be already probed differently. */
5233
5234 bool
5235 ix86_target_stack_probe (void)
5236 {
5237 /* Do not probe the stack twice if static stack checking is enabled. */
5238 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5239 return false;
5240
5241 return TARGET_STACK_PROBE;
5242 }
5243 \f
5244 /* Decide whether we can make a sibling call to a function. DECL is the
5245 declaration of the function being targeted by the call and EXP is the
5246 CALL_EXPR representing the call. */
5247
5248 static bool
5249 ix86_function_ok_for_sibcall (tree decl, tree exp)
5250 {
5251 tree type, decl_or_type;
5252 rtx a, b;
5253
5254 /* If we are generating position-independent code, we cannot sibcall
5255 optimize any indirect call, or a direct call to a global function,
5256 as the PLT requires %ebx be live. (Darwin does not have a PLT.) */
5257 if (!TARGET_MACHO
5258 && !TARGET_64BIT
5259 && flag_pic
5260 && (!decl || !targetm.binds_local_p (decl)))
5261 return false;
5262
5263 /* If we need to align the outgoing stack, then sibcalling would
5264 unalign the stack, which may break the called function. */
5265 if (ix86_minimum_incoming_stack_boundary (true)
5266 < PREFERRED_STACK_BOUNDARY)
5267 return false;
5268
5269 if (decl)
5270 {
5271 decl_or_type = decl;
5272 type = TREE_TYPE (decl);
5273 }
5274 else
5275 {
5276 /* We're looking at the CALL_EXPR, we need the type of the function. */
5277 type = CALL_EXPR_FN (exp); /* pointer expression */
5278 type = TREE_TYPE (type); /* pointer type */
5279 type = TREE_TYPE (type); /* function type */
5280 decl_or_type = type;
5281 }
5282
5283 /* Check that the return value locations are the same. Like
5284 if we are returning floats on the 80387 register stack, we cannot
5285 make a sibcall from a function that doesn't return a float to a
5286 function that does or, conversely, from a function that does return
5287 a float to a function that doesn't; the necessary stack adjustment
5288 would not be executed. This is also the place we notice
5289 differences in the return value ABI. Note that it is ok for one
5290 of the functions to have void return type as long as the return
5291 value of the other is passed in a register. */
5292 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5293 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5294 cfun->decl, false);
5295 if (STACK_REG_P (a) || STACK_REG_P (b))
5296 {
5297 if (!rtx_equal_p (a, b))
5298 return false;
5299 }
5300 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5301 ;
5302 else if (!rtx_equal_p (a, b))
5303 return false;
5304
5305 if (TARGET_64BIT)
5306 {
5307 /* The SYSV ABI has more call-clobbered registers;
5308 disallow sibcalls from MS to SYSV. */
5309 if (cfun->machine->call_abi == MS_ABI
5310 && ix86_function_type_abi (type) == SYSV_ABI)
5311 return false;
5312 }
5313 else
5314 {
5315 /* If this call is indirect, we'll need to be able to use a
5316 call-clobbered register for the address of the target function.
5317 Make sure that all such registers are not used for passing
5318 parameters. Note that DLLIMPORT functions are indirect. */
5319 if (!decl
5320 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5321 {
5322 if (ix86_function_regparm (type, NULL) >= 3)
5323 {
5324 /* ??? Need to count the actual number of registers to be used,
5325 not the possible number of registers. Fix later. */
5326 return false;
5327 }
5328 }
5329 }
5330
5331 /* Otherwise okay. That also includes certain types of indirect calls. */
5332 return true;
5333 }
5334
5335 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5336 and "sseregparm" calling convention attributes;
5337 arguments as in struct attribute_spec.handler. */
5338
5339 static tree
5340 ix86_handle_cconv_attribute (tree *node, tree name,
5341 tree args,
5342 int flags ATTRIBUTE_UNUSED,
5343 bool *no_add_attrs)
5344 {
5345 if (TREE_CODE (*node) != FUNCTION_TYPE
5346 && TREE_CODE (*node) != METHOD_TYPE
5347 && TREE_CODE (*node) != FIELD_DECL
5348 && TREE_CODE (*node) != TYPE_DECL)
5349 {
5350 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5351 name);
5352 *no_add_attrs = true;
5353 return NULL_TREE;
5354 }
5355
5356 /* Can combine regparm with all attributes but fastcall, and thiscall. */
5357 if (is_attribute_p ("regparm", name))
5358 {
5359 tree cst;
5360
5361 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5362 {
5363 error ("fastcall and regparm attributes are not compatible");
5364 }
5365
5366 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5367 {
5368 error ("regparam and thiscall attributes are not compatible");
5369 }
5370
5371 cst = TREE_VALUE (args);
5372 if (TREE_CODE (cst) != INTEGER_CST)
5373 {
5374 warning (OPT_Wattributes,
5375 "%qE attribute requires an integer constant argument",
5376 name);
5377 *no_add_attrs = true;
5378 }
5379 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5380 {
5381 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5382 name, REGPARM_MAX);
5383 *no_add_attrs = true;
5384 }
5385
5386 return NULL_TREE;
5387 }
5388
5389 if (TARGET_64BIT)
5390 {
5391 /* Do not warn when emulating the MS ABI. */
5392 if ((TREE_CODE (*node) != FUNCTION_TYPE
5393 && TREE_CODE (*node) != METHOD_TYPE)
5394 || ix86_function_type_abi (*node) != MS_ABI)
5395 warning (OPT_Wattributes, "%qE attribute ignored",
5396 name);
5397 *no_add_attrs = true;
5398 return NULL_TREE;
5399 }
5400
5401 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
5402 if (is_attribute_p ("fastcall", name))
5403 {
5404 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5405 {
5406 error ("fastcall and cdecl attributes are not compatible");
5407 }
5408 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5409 {
5410 error ("fastcall and stdcall attributes are not compatible");
5411 }
5412 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5413 {
5414 error ("fastcall and regparm attributes are not compatible");
5415 }
5416 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5417 {
5418 error ("fastcall and thiscall attributes are not compatible");
5419 }
5420 }
5421
5422 /* Can combine stdcall with fastcall (redundant), regparm and
5423 sseregparm. */
5424 else if (is_attribute_p ("stdcall", name))
5425 {
5426 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5427 {
5428 error ("stdcall and cdecl attributes are not compatible");
5429 }
5430 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5431 {
5432 error ("stdcall and fastcall attributes are not compatible");
5433 }
5434 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5435 {
5436 error ("stdcall and thiscall attributes are not compatible");
5437 }
5438 }
5439
5440 /* Can combine cdecl with regparm and sseregparm. */
5441 else if (is_attribute_p ("cdecl", name))
5442 {
5443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5444 {
5445 error ("stdcall and cdecl attributes are not compatible");
5446 }
5447 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5448 {
5449 error ("fastcall and cdecl attributes are not compatible");
5450 }
5451 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5452 {
5453 error ("cdecl and thiscall attributes are not compatible");
5454 }
5455 }
5456 else if (is_attribute_p ("thiscall", name))
5457 {
5458 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5459 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
5460 name);
5461 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5462 {
5463 error ("stdcall and thiscall attributes are not compatible");
5464 }
5465 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5466 {
5467 error ("fastcall and thiscall attributes are not compatible");
5468 }
5469 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5470 {
5471 error ("cdecl and thiscall attributes are not compatible");
5472 }
5473 }
5474
5475 /* Can combine sseregparm with all attributes. */
5476
5477 return NULL_TREE;
5478 }
5479
5480 /* The transactional memory builtins are implicitly regparm or fastcall
5481 depending on the ABI. Override the generic do-nothing attribute that
5482 these builtins were declared with, and replace it with one of the two
5483 attributes that we expect elsewhere. */
5484
5485 static tree
5486 ix86_handle_tm_regparm_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
5487 tree args ATTRIBUTE_UNUSED,
5488 int flags, bool *no_add_attrs)
5489 {
5490 tree alt;
5491
5492 /* In no case do we want to add the placeholder attribute. */
5493 *no_add_attrs = true;
5494
5495 /* The 64-bit ABI is unchanged for transactional memory. */
5496 if (TARGET_64BIT)
5497 return NULL_TREE;
5498
5499 /* ??? Is there a better way to validate 32-bit windows? We have
5500 cfun->machine->call_abi, but that seems to be set only for 64-bit. */
5501 if (CHECK_STACK_LIMIT > 0)
5502 alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5503 else
5504 {
5505 alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5506 alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5507 }
5508 decl_attributes (node, alt, flags);
5509
5510 return NULL_TREE;
5511 }
5512
5513 /* This function determines from TYPE the calling-convention. */
5514
5515 unsigned int
5516 ix86_get_callcvt (const_tree type)
5517 {
5518 unsigned int ret = 0;
5519 bool is_stdarg;
5520 tree attrs;
5521
5522 if (TARGET_64BIT)
5523 return IX86_CALLCVT_CDECL;
5524
5525 attrs = TYPE_ATTRIBUTES (type);
5526 if (attrs != NULL_TREE)
5527 {
5528 if (lookup_attribute ("cdecl", attrs))
5529 ret |= IX86_CALLCVT_CDECL;
5530 else if (lookup_attribute ("stdcall", attrs))
5531 ret |= IX86_CALLCVT_STDCALL;
5532 else if (lookup_attribute ("fastcall", attrs))
5533 ret |= IX86_CALLCVT_FASTCALL;
5534 else if (lookup_attribute ("thiscall", attrs))
5535 ret |= IX86_CALLCVT_THISCALL;
5536
5537 /* Regparam isn't allowed for thiscall and fastcall. */
5538 if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5539 {
5540 if (lookup_attribute ("regparm", attrs))
5541 ret |= IX86_CALLCVT_REGPARM;
5542 if (lookup_attribute ("sseregparm", attrs))
5543 ret |= IX86_CALLCVT_SSEREGPARM;
5544 }
5545
5546 if (IX86_BASE_CALLCVT(ret) != 0)
5547 return ret;
5548 }
5549
5550 is_stdarg = stdarg_p (type);
5551 if (TARGET_RTD && !is_stdarg)
5552 return IX86_CALLCVT_STDCALL | ret;
5553
5554 if (ret != 0
5555 || is_stdarg
5556 || TREE_CODE (type) != METHOD_TYPE
5557 || ix86_function_type_abi (type) != MS_ABI)
5558 return IX86_CALLCVT_CDECL | ret;
5559
5560 return IX86_CALLCVT_THISCALL;
5561 }
5562
5563 /* Return 0 if the attributes for two types are incompatible, 1 if they
5564 are compatible, and 2 if they are nearly compatible (which causes a
5565 warning to be generated). */
5566
5567 static int
5568 ix86_comp_type_attributes (const_tree type1, const_tree type2)
5569 {
5570 unsigned int ccvt1, ccvt2;
5571
5572 if (TREE_CODE (type1) != FUNCTION_TYPE
5573 && TREE_CODE (type1) != METHOD_TYPE)
5574 return 1;
5575
5576 ccvt1 = ix86_get_callcvt (type1);
5577 ccvt2 = ix86_get_callcvt (type2);
5578 if (ccvt1 != ccvt2)
5579 return 0;
5580 if (ix86_function_regparm (type1, NULL)
5581 != ix86_function_regparm (type2, NULL))
5582 return 0;
5583
5584 return 1;
5585 }
5586 \f
5587 /* Return the regparm value for a function with the indicated TYPE and DECL.
5588 DECL may be NULL when calling function indirectly
5589 or considering a libcall. */
5590
5591 static int
5592 ix86_function_regparm (const_tree type, const_tree decl)
5593 {
5594 tree attr;
5595 int regparm;
5596 unsigned int ccvt;
5597
5598 if (TARGET_64BIT)
5599 return (ix86_function_type_abi (type) == SYSV_ABI
5600 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5601 ccvt = ix86_get_callcvt (type);
5602 regparm = ix86_regparm;
5603
5604 if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5605 {
5606 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5607 if (attr)
5608 {
5609 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5610 return regparm;
5611 }
5612 }
5613 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5614 return 2;
5615 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5616 return 1;
5617
5618 /* Use register calling convention for local functions when possible. */
5619 if (decl
5620 && TREE_CODE (decl) == FUNCTION_DECL
5621 /* Caller and callee must agree on the calling convention, so
5622 checking here just optimize means that with
5623 __attribute__((optimize (...))) caller could use regparm convention
5624 and callee not, or vice versa. Instead look at whether the callee
5625 is optimized or not. */
5626 && opt_for_fn (decl, optimize)
5627 && !(profile_flag && !flag_fentry))
5628 {
5629 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5630 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
5631 if (i && i->local && i->can_change_signature)
5632 {
5633 int local_regparm, globals = 0, regno;
5634
5635 /* Make sure no regparm register is taken by a
5636 fixed register variable. */
5637 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
5638 if (fixed_regs[local_regparm])
5639 break;
5640
5641 /* We don't want to use regparm(3) for nested functions as
5642 these use a static chain pointer in the third argument. */
5643 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
5644 local_regparm = 2;
5645
5646 /* In 32-bit mode save a register for the split stack. */
5647 if (!TARGET_64BIT && local_regparm == 3 && flag_split_stack)
5648 local_regparm = 2;
5649
5650 /* Each fixed register usage increases register pressure,
5651 so less registers should be used for argument passing.
5652 This functionality can be overriden by an explicit
5653 regparm value. */
5654 for (regno = AX_REG; regno <= DI_REG; regno++)
5655 if (fixed_regs[regno])
5656 globals++;
5657
5658 local_regparm
5659 = globals < local_regparm ? local_regparm - globals : 0;
5660
5661 if (local_regparm > regparm)
5662 regparm = local_regparm;
5663 }
5664 }
5665
5666 return regparm;
5667 }
5668
5669 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5670 DFmode (2) arguments in SSE registers for a function with the
5671 indicated TYPE and DECL. DECL may be NULL when calling function
5672 indirectly or considering a libcall. Otherwise return 0. */
5673
5674 static int
5675 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5676 {
5677 gcc_assert (!TARGET_64BIT);
5678
5679 /* Use SSE registers to pass SFmode and DFmode arguments if requested
5680 by the sseregparm attribute. */
5681 if (TARGET_SSEREGPARM
5682 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5683 {
5684 if (!TARGET_SSE)
5685 {
5686 if (warn)
5687 {
5688 if (decl)
5689 error ("calling %qD with attribute sseregparm without "
5690 "SSE/SSE2 enabled", decl);
5691 else
5692 error ("calling %qT with attribute sseregparm without "
5693 "SSE/SSE2 enabled", type);
5694 }
5695 return 0;
5696 }
5697
5698 return 2;
5699 }
5700
5701 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5702 (and DFmode for SSE2) arguments in SSE registers. */
5703 if (decl && TARGET_SSE_MATH && optimize
5704 && !(profile_flag && !flag_fentry))
5705 {
5706 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
5707 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
5708 if (i && i->local && i->can_change_signature)
5709 return TARGET_SSE2 ? 2 : 1;
5710 }
5711
5712 return 0;
5713 }
5714
5715 /* Return true if EAX is live at the start of the function. Used by
5716 ix86_expand_prologue to determine if we need special help before
5717 calling allocate_stack_worker. */
5718
5719 static bool
5720 ix86_eax_live_at_start_p (void)
5721 {
5722 /* Cheat. Don't bother working forward from ix86_function_regparm
5723 to the function type to whether an actual argument is located in
5724 eax. Instead just look at cfg info, which is still close enough
5725 to correct at this point. This gives false positives for broken
5726 functions that might use uninitialized data that happens to be
5727 allocated in eax, but who cares? */
5728 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5729 }
5730
5731 static bool
5732 ix86_keep_aggregate_return_pointer (tree fntype)
5733 {
5734 tree attr;
5735
5736 if (!TARGET_64BIT)
5737 {
5738 attr = lookup_attribute ("callee_pop_aggregate_return",
5739 TYPE_ATTRIBUTES (fntype));
5740 if (attr)
5741 return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
5742
5743 /* For 32-bit MS-ABI the default is to keep aggregate
5744 return pointer. */
5745 if (ix86_function_type_abi (fntype) == MS_ABI)
5746 return true;
5747 }
5748 return KEEP_AGGREGATE_RETURN_POINTER != 0;
5749 }
5750
5751 /* Value is the number of bytes of arguments automatically
5752 popped when returning from a subroutine call.
5753 FUNDECL is the declaration node of the function (as a tree),
5754 FUNTYPE is the data type of the function (as a tree),
5755 or for a library call it is an identifier node for the subroutine name.
5756 SIZE is the number of bytes of arguments passed on the stack.
5757
5758 On the 80386, the RTD insn may be used to pop them if the number
5759 of args is fixed, but if the number is variable then the caller
5760 must pop them all. RTD can't be used for library calls now
5761 because the library is compiled with the Unix compiler.
5762 Use of RTD is a selectable option, since it is incompatible with
5763 standard Unix calling sequences. If the option is not selected,
5764 the caller must always pop the args.
5765
5766 The attribute stdcall is equivalent to RTD on a per module basis. */
5767
5768 static int
5769 ix86_return_pops_args (tree fundecl, tree funtype, int size)
5770 {
5771 unsigned int ccvt;
5772
5773 /* None of the 64-bit ABIs pop arguments. */
5774 if (TARGET_64BIT)
5775 return 0;
5776
5777 ccvt = ix86_get_callcvt (funtype);
5778
5779 if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
5780 | IX86_CALLCVT_THISCALL)) != 0
5781 && ! stdarg_p (funtype))
5782 return size;
5783
5784 /* Lose any fake structure return argument if it is passed on the stack. */
5785 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5786 && !ix86_keep_aggregate_return_pointer (funtype))
5787 {
5788 int nregs = ix86_function_regparm (funtype, fundecl);
5789 if (nregs == 0)
5790 return GET_MODE_SIZE (Pmode);
5791 }
5792
5793 return 0;
5794 }
5795
5796 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
5797
5798 static bool
5799 ix86_legitimate_combined_insn (rtx insn)
5800 {
5801 /* Check operand constraints in case hard registers were propagated
5802 into insn pattern. This check prevents combine pass from
5803 generating insn patterns with invalid hard register operands.
5804 These invalid insns can eventually confuse reload to error out
5805 with a spill failure. See also PRs 46829 and 46843. */
5806 if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
5807 {
5808 int i;
5809
5810 extract_insn (insn);
5811 preprocess_constraints ();
5812
5813 for (i = 0; i < recog_data.n_operands; i++)
5814 {
5815 rtx op = recog_data.operand[i];
5816 enum machine_mode mode = GET_MODE (op);
5817 struct operand_alternative *op_alt;
5818 int offset = 0;
5819 bool win;
5820 int j;
5821
5822 /* For pre-AVX disallow unaligned loads/stores where the
5823 instructions don't support it. */
5824 if (!TARGET_AVX
5825 && VECTOR_MODE_P (GET_MODE (op))
5826 && misaligned_operand (op, GET_MODE (op)))
5827 {
5828 int min_align = get_attr_ssememalign (insn);
5829 if (min_align == 0)
5830 return false;
5831 }
5832
5833 /* A unary operator may be accepted by the predicate, but it
5834 is irrelevant for matching constraints. */
5835 if (UNARY_P (op))
5836 op = XEXP (op, 0);
5837
5838 if (GET_CODE (op) == SUBREG)
5839 {
5840 if (REG_P (SUBREG_REG (op))
5841 && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
5842 offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
5843 GET_MODE (SUBREG_REG (op)),
5844 SUBREG_BYTE (op),
5845 GET_MODE (op));
5846 op = SUBREG_REG (op);
5847 }
5848
5849 if (!(REG_P (op) && HARD_REGISTER_P (op)))
5850 continue;
5851
5852 op_alt = recog_op_alt[i];
5853
5854 /* Operand has no constraints, anything is OK. */
5855 win = !recog_data.n_alternatives;
5856
5857 for (j = 0; j < recog_data.n_alternatives; j++)
5858 {
5859 if (op_alt[j].anything_ok
5860 || (op_alt[j].matches != -1
5861 && operands_match_p
5862 (recog_data.operand[i],
5863 recog_data.operand[op_alt[j].matches]))
5864 || reg_fits_class_p (op, op_alt[j].cl, offset, mode))
5865 {
5866 win = true;
5867 break;
5868 }
5869 }
5870
5871 if (!win)
5872 return false;
5873 }
5874 }
5875
5876 return true;
5877 }
5878 \f
5879 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
5880
5881 static unsigned HOST_WIDE_INT
5882 ix86_asan_shadow_offset (void)
5883 {
5884 return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
5885 : HOST_WIDE_INT_C (0x7fff8000))
5886 : (HOST_WIDE_INT_1 << 29);
5887 }
5888 \f
5889 /* Argument support functions. */
5890
5891 /* Return true when register may be used to pass function parameters. */
5892 bool
5893 ix86_function_arg_regno_p (int regno)
5894 {
5895 int i;
5896 const int *parm_regs;
5897
5898 if (!TARGET_64BIT)
5899 {
5900 if (TARGET_MACHO)
5901 return (regno < REGPARM_MAX
5902 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5903 else
5904 return (regno < REGPARM_MAX
5905 || (TARGET_MMX && MMX_REGNO_P (regno)
5906 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5907 || (TARGET_SSE && SSE_REGNO_P (regno)
5908 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5909 }
5910
5911 if (TARGET_SSE && SSE_REGNO_P (regno)
5912 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5913 return true;
5914
5915 /* TODO: The function should depend on current function ABI but
5916 builtins.c would need updating then. Therefore we use the
5917 default ABI. */
5918
5919 /* RAX is used as hidden argument to va_arg functions. */
5920 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5921 return true;
5922
5923 if (ix86_abi == MS_ABI)
5924 parm_regs = x86_64_ms_abi_int_parameter_registers;
5925 else
5926 parm_regs = x86_64_int_parameter_registers;
5927 for (i = 0; i < (ix86_abi == MS_ABI
5928 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5929 if (regno == parm_regs[i])
5930 return true;
5931 return false;
5932 }
5933
5934 /* Return if we do not know how to pass TYPE solely in registers. */
5935
5936 static bool
5937 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5938 {
5939 if (must_pass_in_stack_var_size_or_pad (mode, type))
5940 return true;
5941
5942 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5943 The layout_type routine is crafty and tries to trick us into passing
5944 currently unsupported vector types on the stack by using TImode. */
5945 return (!TARGET_64BIT && mode == TImode
5946 && type && TREE_CODE (type) != VECTOR_TYPE);
5947 }
5948
5949 /* It returns the size, in bytes, of the area reserved for arguments passed
5950 in registers for the function represented by fndecl dependent to the used
5951 abi format. */
5952 int
5953 ix86_reg_parm_stack_space (const_tree fndecl)
5954 {
5955 enum calling_abi call_abi = SYSV_ABI;
5956 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5957 call_abi = ix86_function_abi (fndecl);
5958 else
5959 call_abi = ix86_function_type_abi (fndecl);
5960 if (TARGET_64BIT && call_abi == MS_ABI)
5961 return 32;
5962 return 0;
5963 }
5964
5965 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5966 call abi used. */
5967 enum calling_abi
5968 ix86_function_type_abi (const_tree fntype)
5969 {
5970 if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
5971 {
5972 enum calling_abi abi = ix86_abi;
5973 if (abi == SYSV_ABI)
5974 {
5975 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5976 abi = MS_ABI;
5977 }
5978 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5979 abi = SYSV_ABI;
5980 return abi;
5981 }
5982 return ix86_abi;
5983 }
5984
5985 /* We add this as a workaround in order to use libc_has_function
5986 hook in i386.md. */
5987 bool
5988 ix86_libc_has_function (enum function_class fn_class)
5989 {
5990 return targetm.libc_has_function (fn_class);
5991 }
5992
5993 static bool
5994 ix86_function_ms_hook_prologue (const_tree fn)
5995 {
5996 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5997 {
5998 if (decl_function_context (fn) != NULL_TREE)
5999 error_at (DECL_SOURCE_LOCATION (fn),
6000 "ms_hook_prologue is not compatible with nested function");
6001 else
6002 return true;
6003 }
6004 return false;
6005 }
6006
6007 static enum calling_abi
6008 ix86_function_abi (const_tree fndecl)
6009 {
6010 if (! fndecl)
6011 return ix86_abi;
6012 return ix86_function_type_abi (TREE_TYPE (fndecl));
6013 }
6014
6015 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6016 call abi used. */
6017 enum calling_abi
6018 ix86_cfun_abi (void)
6019 {
6020 if (! cfun)
6021 return ix86_abi;
6022 return cfun->machine->call_abi;
6023 }
6024
6025 /* Write the extra assembler code needed to declare a function properly. */
6026
6027 void
6028 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6029 tree decl)
6030 {
6031 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6032
6033 if (is_ms_hook)
6034 {
6035 int i, filler_count = (TARGET_64BIT ? 32 : 16);
6036 unsigned int filler_cc = 0xcccccccc;
6037
6038 for (i = 0; i < filler_count; i += 4)
6039 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6040 }
6041
6042 #ifdef SUBTARGET_ASM_UNWIND_INIT
6043 SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6044 #endif
6045
6046 ASM_OUTPUT_LABEL (asm_out_file, fname);
6047
6048 /* Output magic byte marker, if hot-patch attribute is set. */
6049 if (is_ms_hook)
6050 {
6051 if (TARGET_64BIT)
6052 {
6053 /* leaq [%rsp + 0], %rsp */
6054 asm_fprintf (asm_out_file, ASM_BYTE
6055 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6056 }
6057 else
6058 {
6059 /* movl.s %edi, %edi
6060 push %ebp
6061 movl.s %esp, %ebp */
6062 asm_fprintf (asm_out_file, ASM_BYTE
6063 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6064 }
6065 }
6066 }
6067
6068 /* regclass.c */
6069 extern void init_regs (void);
6070
6071 /* Implementation of call abi switching target hook. Specific to FNDECL
6072 the specific call register sets are set. See also
6073 ix86_conditional_register_usage for more details. */
6074 void
6075 ix86_call_abi_override (const_tree fndecl)
6076 {
6077 if (fndecl == NULL_TREE)
6078 cfun->machine->call_abi = ix86_abi;
6079 else
6080 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6081 }
6082
6083 /* 64-bit MS and SYSV ABI have different set of call used registers. Avoid
6084 expensive re-initialization of init_regs each time we switch function context
6085 since this is needed only during RTL expansion. */
6086 static void
6087 ix86_maybe_switch_abi (void)
6088 {
6089 if (TARGET_64BIT &&
6090 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
6091 reinit_regs ();
6092 }
6093
6094 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6095 for a call to a function whose data type is FNTYPE.
6096 For a library call, FNTYPE is 0. */
6097
6098 void
6099 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
6100 tree fntype, /* tree ptr for function decl */
6101 rtx libname, /* SYMBOL_REF of library name or 0 */
6102 tree fndecl,
6103 int caller)
6104 {
6105 struct cgraph_local_info *i;
6106
6107 memset (cum, 0, sizeof (*cum));
6108
6109 if (fndecl)
6110 {
6111 i = cgraph_local_info (fndecl);
6112 cum->call_abi = ix86_function_abi (fndecl);
6113 }
6114 else
6115 {
6116 i = NULL;
6117 cum->call_abi = ix86_function_type_abi (fntype);
6118 }
6119
6120 cum->caller = caller;
6121
6122 /* Set up the number of registers to use for passing arguments. */
6123 cum->nregs = ix86_regparm;
6124 if (TARGET_64BIT)
6125 {
6126 cum->nregs = (cum->call_abi == SYSV_ABI
6127 ? X86_64_REGPARM_MAX
6128 : X86_64_MS_REGPARM_MAX);
6129 }
6130 if (TARGET_SSE)
6131 {
6132 cum->sse_nregs = SSE_REGPARM_MAX;
6133 if (TARGET_64BIT)
6134 {
6135 cum->sse_nregs = (cum->call_abi == SYSV_ABI
6136 ? X86_64_SSE_REGPARM_MAX
6137 : X86_64_MS_SSE_REGPARM_MAX);
6138 }
6139 }
6140 if (TARGET_MMX)
6141 cum->mmx_nregs = MMX_REGPARM_MAX;
6142 cum->warn_avx512f = true;
6143 cum->warn_avx = true;
6144 cum->warn_sse = true;
6145 cum->warn_mmx = true;
6146
6147 /* Because type might mismatch in between caller and callee, we need to
6148 use actual type of function for local calls.
6149 FIXME: cgraph_analyze can be told to actually record if function uses
6150 va_start so for local functions maybe_vaarg can be made aggressive
6151 helping K&R code.
6152 FIXME: once typesytem is fixed, we won't need this code anymore. */
6153 if (i && i->local && i->can_change_signature)
6154 fntype = TREE_TYPE (fndecl);
6155 cum->maybe_vaarg = (fntype
6156 ? (!prototype_p (fntype) || stdarg_p (fntype))
6157 : !libname);
6158
6159 if (!TARGET_64BIT)
6160 {
6161 /* If there are variable arguments, then we won't pass anything
6162 in registers in 32-bit mode. */
6163 if (stdarg_p (fntype))
6164 {
6165 cum->nregs = 0;
6166 cum->sse_nregs = 0;
6167 cum->mmx_nregs = 0;
6168 cum->warn_avx512f = false;
6169 cum->warn_avx = false;
6170 cum->warn_sse = false;
6171 cum->warn_mmx = false;
6172 return;
6173 }
6174
6175 /* Use ecx and edx registers if function has fastcall attribute,
6176 else look for regparm information. */
6177 if (fntype)
6178 {
6179 unsigned int ccvt = ix86_get_callcvt (fntype);
6180 if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6181 {
6182 cum->nregs = 1;
6183 cum->fastcall = 1; /* Same first register as in fastcall. */
6184 }
6185 else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6186 {
6187 cum->nregs = 2;
6188 cum->fastcall = 1;
6189 }
6190 else
6191 cum->nregs = ix86_function_regparm (fntype, fndecl);
6192 }
6193
6194 /* Set up the number of SSE registers used for passing SFmode
6195 and DFmode arguments. Warn for mismatching ABI. */
6196 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6197 }
6198 }
6199
6200 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
6201 But in the case of vector types, it is some vector mode.
6202
6203 When we have only some of our vector isa extensions enabled, then there
6204 are some modes for which vector_mode_supported_p is false. For these
6205 modes, the generic vector support in gcc will choose some non-vector mode
6206 in order to implement the type. By computing the natural mode, we'll
6207 select the proper ABI location for the operand and not depend on whatever
6208 the middle-end decides to do with these vector types.
6209
6210 The midde-end can't deal with the vector types > 16 bytes. In this
6211 case, we return the original mode and warn ABI change if CUM isn't
6212 NULL.
6213
6214 If INT_RETURN is true, warn ABI change if the vector mode isn't
6215 available for function return value. */
6216
6217 static enum machine_mode
6218 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6219 bool in_return)
6220 {
6221 enum machine_mode mode = TYPE_MODE (type);
6222
6223 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6224 {
6225 HOST_WIDE_INT size = int_size_in_bytes (type);
6226 if ((size == 8 || size == 16 || size == 32 || size == 64)
6227 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
6228 && TYPE_VECTOR_SUBPARTS (type) > 1)
6229 {
6230 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6231
6232 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6233 mode = MIN_MODE_VECTOR_FLOAT;
6234 else
6235 mode = MIN_MODE_VECTOR_INT;
6236
6237 /* Get the mode which has this inner mode and number of units. */
6238 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6239 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6240 && GET_MODE_INNER (mode) == innermode)
6241 {
6242 if (size == 64 && !TARGET_AVX512F)
6243 {
6244 static bool warnedavx512f;
6245 static bool warnedavx512f_ret;
6246
6247 if (cum && cum->warn_avx512f && !warnedavx512f)
6248 {
6249 if (warning (OPT_Wpsabi, "AVX512F vector argument "
6250 "without AVX512F enabled changes the ABI"))
6251 warnedavx512f = true;
6252 }
6253 else if (in_return && !warnedavx512f_ret)
6254 {
6255 if (warning (OPT_Wpsabi, "AVX512F vector return "
6256 "without AVX512F enabled changes the ABI"))
6257 warnedavx512f_ret = true;
6258 }
6259
6260 return TYPE_MODE (type);
6261 }
6262 else if (size == 32 && !TARGET_AVX)
6263 {
6264 static bool warnedavx;
6265 static bool warnedavx_ret;
6266
6267 if (cum && cum->warn_avx && !warnedavx)
6268 {
6269 if (warning (OPT_Wpsabi, "AVX vector argument "
6270 "without AVX enabled changes the ABI"))
6271 warnedavx = true;
6272 }
6273 else if (in_return && !warnedavx_ret)
6274 {
6275 if (warning (OPT_Wpsabi, "AVX vector return "
6276 "without AVX enabled changes the ABI"))
6277 warnedavx_ret = true;
6278 }
6279
6280 return TYPE_MODE (type);
6281 }
6282 else if (((size == 8 && TARGET_64BIT) || size == 16)
6283 && !TARGET_SSE)
6284 {
6285 static bool warnedsse;
6286 static bool warnedsse_ret;
6287
6288 if (cum && cum->warn_sse && !warnedsse)
6289 {
6290 if (warning (OPT_Wpsabi, "SSE vector argument "
6291 "without SSE enabled changes the ABI"))
6292 warnedsse = true;
6293 }
6294 else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6295 {
6296 if (warning (OPT_Wpsabi, "SSE vector return "
6297 "without SSE enabled changes the ABI"))
6298 warnedsse_ret = true;
6299 }
6300 }
6301 else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6302 {
6303 static bool warnedmmx;
6304 static bool warnedmmx_ret;
6305
6306 if (cum && cum->warn_mmx && !warnedmmx)
6307 {
6308 if (warning (OPT_Wpsabi, "MMX vector argument "
6309 "without MMX enabled changes the ABI"))
6310 warnedmmx = true;
6311 }
6312 else if (in_return && !warnedmmx_ret)
6313 {
6314 if (warning (OPT_Wpsabi, "MMX vector return "
6315 "without MMX enabled changes the ABI"))
6316 warnedmmx_ret = true;
6317 }
6318 }
6319 return mode;
6320 }
6321
6322 gcc_unreachable ();
6323 }
6324 }
6325
6326 return mode;
6327 }
6328
6329 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
6330 this may not agree with the mode that the type system has chosen for the
6331 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
6332 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
6333
6334 static rtx
6335 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
6336 unsigned int regno)
6337 {
6338 rtx tmp;
6339
6340 if (orig_mode != BLKmode)
6341 tmp = gen_rtx_REG (orig_mode, regno);
6342 else
6343 {
6344 tmp = gen_rtx_REG (mode, regno);
6345 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6346 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6347 }
6348
6349 return tmp;
6350 }
6351
6352 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
6353 of this code is to classify each 8bytes of incoming argument by the register
6354 class and assign registers accordingly. */
6355
6356 /* Return the union class of CLASS1 and CLASS2.
6357 See the x86-64 PS ABI for details. */
6358
6359 static enum x86_64_reg_class
6360 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6361 {
6362 /* Rule #1: If both classes are equal, this is the resulting class. */
6363 if (class1 == class2)
6364 return class1;
6365
6366 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6367 the other class. */
6368 if (class1 == X86_64_NO_CLASS)
6369 return class2;
6370 if (class2 == X86_64_NO_CLASS)
6371 return class1;
6372
6373 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
6374 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6375 return X86_64_MEMORY_CLASS;
6376
6377 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
6378 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6379 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6380 return X86_64_INTEGERSI_CLASS;
6381 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6382 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6383 return X86_64_INTEGER_CLASS;
6384
6385 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6386 MEMORY is used. */
6387 if (class1 == X86_64_X87_CLASS
6388 || class1 == X86_64_X87UP_CLASS
6389 || class1 == X86_64_COMPLEX_X87_CLASS
6390 || class2 == X86_64_X87_CLASS
6391 || class2 == X86_64_X87UP_CLASS
6392 || class2 == X86_64_COMPLEX_X87_CLASS)
6393 return X86_64_MEMORY_CLASS;
6394
6395 /* Rule #6: Otherwise class SSE is used. */
6396 return X86_64_SSE_CLASS;
6397 }
6398
6399 /* Classify the argument of type TYPE and mode MODE.
6400 CLASSES will be filled by the register class used to pass each word
6401 of the operand. The number of words is returned. In case the parameter
6402 should be passed in memory, 0 is returned. As a special case for zero
6403 sized containers, classes[0] will be NO_CLASS and 1 is returned.
6404
6405 BIT_OFFSET is used internally for handling records and specifies offset
6406 of the offset in bits modulo 512 to avoid overflow cases.
6407
6408 See the x86-64 PS ABI for details.
6409 */
6410
6411 static int
6412 classify_argument (enum machine_mode mode, const_tree type,
6413 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6414 {
6415 HOST_WIDE_INT bytes =
6416 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6417 int words
6418 = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6419
6420 /* Variable sized entities are always passed/returned in memory. */
6421 if (bytes < 0)
6422 return 0;
6423
6424 if (mode != VOIDmode
6425 && targetm.calls.must_pass_in_stack (mode, type))
6426 return 0;
6427
6428 if (type && AGGREGATE_TYPE_P (type))
6429 {
6430 int i;
6431 tree field;
6432 enum x86_64_reg_class subclasses[MAX_CLASSES];
6433
6434 /* On x86-64 we pass structures larger than 64 bytes on the stack. */
6435 if (bytes > 64)
6436 return 0;
6437
6438 for (i = 0; i < words; i++)
6439 classes[i] = X86_64_NO_CLASS;
6440
6441 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
6442 signalize memory class, so handle it as special case. */
6443 if (!words)
6444 {
6445 classes[0] = X86_64_NO_CLASS;
6446 return 1;
6447 }
6448
6449 /* Classify each field of record and merge classes. */
6450 switch (TREE_CODE (type))
6451 {
6452 case RECORD_TYPE:
6453 /* And now merge the fields of structure. */
6454 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6455 {
6456 if (TREE_CODE (field) == FIELD_DECL)
6457 {
6458 int num;
6459
6460 if (TREE_TYPE (field) == error_mark_node)
6461 continue;
6462
6463 /* Bitfields are always classified as integer. Handle them
6464 early, since later code would consider them to be
6465 misaligned integers. */
6466 if (DECL_BIT_FIELD (field))
6467 {
6468 for (i = (int_bit_position (field)
6469 + (bit_offset % 64)) / 8 / 8;
6470 i < ((int_bit_position (field) + (bit_offset % 64))
6471 + tree_to_shwi (DECL_SIZE (field))
6472 + 63) / 8 / 8; i++)
6473 classes[i] =
6474 merge_classes (X86_64_INTEGER_CLASS,
6475 classes[i]);
6476 }
6477 else
6478 {
6479 int pos;
6480
6481 type = TREE_TYPE (field);
6482
6483 /* Flexible array member is ignored. */
6484 if (TYPE_MODE (type) == BLKmode
6485 && TREE_CODE (type) == ARRAY_TYPE
6486 && TYPE_SIZE (type) == NULL_TREE
6487 && TYPE_DOMAIN (type) != NULL_TREE
6488 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6489 == NULL_TREE))
6490 {
6491 static bool warned;
6492
6493 if (!warned && warn_psabi)
6494 {
6495 warned = true;
6496 inform (input_location,
6497 "the ABI of passing struct with"
6498 " a flexible array member has"
6499 " changed in GCC 4.4");
6500 }
6501 continue;
6502 }
6503 num = classify_argument (TYPE_MODE (type), type,
6504 subclasses,
6505 (int_bit_position (field)
6506 + bit_offset) % 512);
6507 if (!num)
6508 return 0;
6509 pos = (int_bit_position (field)
6510 + (bit_offset % 64)) / 8 / 8;
6511 for (i = 0; i < num && (i + pos) < words; i++)
6512 classes[i + pos] =
6513 merge_classes (subclasses[i], classes[i + pos]);
6514 }
6515 }
6516 }
6517 break;
6518
6519 case ARRAY_TYPE:
6520 /* Arrays are handled as small records. */
6521 {
6522 int num;
6523 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6524 TREE_TYPE (type), subclasses, bit_offset);
6525 if (!num)
6526 return 0;
6527
6528 /* The partial classes are now full classes. */
6529 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6530 subclasses[0] = X86_64_SSE_CLASS;
6531 if (subclasses[0] == X86_64_INTEGERSI_CLASS
6532 && !((bit_offset % 64) == 0 && bytes == 4))
6533 subclasses[0] = X86_64_INTEGER_CLASS;
6534
6535 for (i = 0; i < words; i++)
6536 classes[i] = subclasses[i % num];
6537
6538 break;
6539 }
6540 case UNION_TYPE:
6541 case QUAL_UNION_TYPE:
6542 /* Unions are similar to RECORD_TYPE but offset is always 0.
6543 */
6544 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6545 {
6546 if (TREE_CODE (field) == FIELD_DECL)
6547 {
6548 int num;
6549
6550 if (TREE_TYPE (field) == error_mark_node)
6551 continue;
6552
6553 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6554 TREE_TYPE (field), subclasses,
6555 bit_offset);
6556 if (!num)
6557 return 0;
6558 for (i = 0; i < num; i++)
6559 classes[i] = merge_classes (subclasses[i], classes[i]);
6560 }
6561 }
6562 break;
6563
6564 default:
6565 gcc_unreachable ();
6566 }
6567
6568 if (words > 2)
6569 {
6570 /* When size > 16 bytes, if the first one isn't
6571 X86_64_SSE_CLASS or any other ones aren't
6572 X86_64_SSEUP_CLASS, everything should be passed in
6573 memory. */
6574 if (classes[0] != X86_64_SSE_CLASS)
6575 return 0;
6576
6577 for (i = 1; i < words; i++)
6578 if (classes[i] != X86_64_SSEUP_CLASS)
6579 return 0;
6580 }
6581
6582 /* Final merger cleanup. */
6583 for (i = 0; i < words; i++)
6584 {
6585 /* If one class is MEMORY, everything should be passed in
6586 memory. */
6587 if (classes[i] == X86_64_MEMORY_CLASS)
6588 return 0;
6589
6590 /* The X86_64_SSEUP_CLASS should be always preceded by
6591 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
6592 if (classes[i] == X86_64_SSEUP_CLASS
6593 && classes[i - 1] != X86_64_SSE_CLASS
6594 && classes[i - 1] != X86_64_SSEUP_CLASS)
6595 {
6596 /* The first one should never be X86_64_SSEUP_CLASS. */
6597 gcc_assert (i != 0);
6598 classes[i] = X86_64_SSE_CLASS;
6599 }
6600
6601 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6602 everything should be passed in memory. */
6603 if (classes[i] == X86_64_X87UP_CLASS
6604 && (classes[i - 1] != X86_64_X87_CLASS))
6605 {
6606 static bool warned;
6607
6608 /* The first one should never be X86_64_X87UP_CLASS. */
6609 gcc_assert (i != 0);
6610 if (!warned && warn_psabi)
6611 {
6612 warned = true;
6613 inform (input_location,
6614 "the ABI of passing union with long double"
6615 " has changed in GCC 4.4");
6616 }
6617 return 0;
6618 }
6619 }
6620 return words;
6621 }
6622
6623 /* Compute alignment needed. We align all types to natural boundaries with
6624 exception of XFmode that is aligned to 64bits. */
6625 if (mode != VOIDmode && mode != BLKmode)
6626 {
6627 int mode_alignment = GET_MODE_BITSIZE (mode);
6628
6629 if (mode == XFmode)
6630 mode_alignment = 128;
6631 else if (mode == XCmode)
6632 mode_alignment = 256;
6633 if (COMPLEX_MODE_P (mode))
6634 mode_alignment /= 2;
6635 /* Misaligned fields are always returned in memory. */
6636 if (bit_offset % mode_alignment)
6637 return 0;
6638 }
6639
6640 /* for V1xx modes, just use the base mode */
6641 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
6642 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
6643 mode = GET_MODE_INNER (mode);
6644
6645 /* Classification of atomic types. */
6646 switch (mode)
6647 {
6648 case SDmode:
6649 case DDmode:
6650 classes[0] = X86_64_SSE_CLASS;
6651 return 1;
6652 case TDmode:
6653 classes[0] = X86_64_SSE_CLASS;
6654 classes[1] = X86_64_SSEUP_CLASS;
6655 return 2;
6656 case DImode:
6657 case SImode:
6658 case HImode:
6659 case QImode:
6660 case CSImode:
6661 case CHImode:
6662 case CQImode:
6663 {
6664 int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
6665
6666 /* Analyze last 128 bits only. */
6667 size = (size - 1) & 0x7f;
6668
6669 if (size < 32)
6670 {
6671 classes[0] = X86_64_INTEGERSI_CLASS;
6672 return 1;
6673 }
6674 else if (size < 64)
6675 {
6676 classes[0] = X86_64_INTEGER_CLASS;
6677 return 1;
6678 }
6679 else if (size < 64+32)
6680 {
6681 classes[0] = X86_64_INTEGER_CLASS;
6682 classes[1] = X86_64_INTEGERSI_CLASS;
6683 return 2;
6684 }
6685 else if (size < 64+64)
6686 {
6687 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6688 return 2;
6689 }
6690 else
6691 gcc_unreachable ();
6692 }
6693 case CDImode:
6694 case TImode:
6695 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
6696 return 2;
6697 case COImode:
6698 case OImode:
6699 /* OImode shouldn't be used directly. */
6700 gcc_unreachable ();
6701 case CTImode:
6702 return 0;
6703 case SFmode:
6704 if (!(bit_offset % 64))
6705 classes[0] = X86_64_SSESF_CLASS;
6706 else
6707 classes[0] = X86_64_SSE_CLASS;
6708 return 1;
6709 case DFmode:
6710 classes[0] = X86_64_SSEDF_CLASS;
6711 return 1;
6712 case XFmode:
6713 classes[0] = X86_64_X87_CLASS;
6714 classes[1] = X86_64_X87UP_CLASS;
6715 return 2;
6716 case TFmode:
6717 classes[0] = X86_64_SSE_CLASS;
6718 classes[1] = X86_64_SSEUP_CLASS;
6719 return 2;
6720 case SCmode:
6721 classes[0] = X86_64_SSE_CLASS;
6722 if (!(bit_offset % 64))
6723 return 1;
6724 else
6725 {
6726 static bool warned;
6727
6728 if (!warned && warn_psabi)
6729 {
6730 warned = true;
6731 inform (input_location,
6732 "the ABI of passing structure with complex float"
6733 " member has changed in GCC 4.4");
6734 }
6735 classes[1] = X86_64_SSESF_CLASS;
6736 return 2;
6737 }
6738 case DCmode:
6739 classes[0] = X86_64_SSEDF_CLASS;
6740 classes[1] = X86_64_SSEDF_CLASS;
6741 return 2;
6742 case XCmode:
6743 classes[0] = X86_64_COMPLEX_X87_CLASS;
6744 return 1;
6745 case TCmode:
6746 /* This modes is larger than 16 bytes. */
6747 return 0;
6748 case V8SFmode:
6749 case V8SImode:
6750 case V32QImode:
6751 case V16HImode:
6752 case V4DFmode:
6753 case V4DImode:
6754 classes[0] = X86_64_SSE_CLASS;
6755 classes[1] = X86_64_SSEUP_CLASS;
6756 classes[2] = X86_64_SSEUP_CLASS;
6757 classes[3] = X86_64_SSEUP_CLASS;
6758 return 4;
6759 case V8DFmode:
6760 case V16SFmode:
6761 case V8DImode:
6762 case V16SImode:
6763 case V32HImode:
6764 case V64QImode:
6765 classes[0] = X86_64_SSE_CLASS;
6766 classes[1] = X86_64_SSEUP_CLASS;
6767 classes[2] = X86_64_SSEUP_CLASS;
6768 classes[3] = X86_64_SSEUP_CLASS;
6769 classes[4] = X86_64_SSEUP_CLASS;
6770 classes[5] = X86_64_SSEUP_CLASS;
6771 classes[6] = X86_64_SSEUP_CLASS;
6772 classes[7] = X86_64_SSEUP_CLASS;
6773 return 8;
6774 case V4SFmode:
6775 case V4SImode:
6776 case V16QImode:
6777 case V8HImode:
6778 case V2DFmode:
6779 case V2DImode:
6780 classes[0] = X86_64_SSE_CLASS;
6781 classes[1] = X86_64_SSEUP_CLASS;
6782 return 2;
6783 case V1TImode:
6784 case V1DImode:
6785 case V2SFmode:
6786 case V2SImode:
6787 case V4HImode:
6788 case V8QImode:
6789 classes[0] = X86_64_SSE_CLASS;
6790 return 1;
6791 case BLKmode:
6792 case VOIDmode:
6793 return 0;
6794 default:
6795 gcc_assert (VECTOR_MODE_P (mode));
6796
6797 if (bytes > 16)
6798 return 0;
6799
6800 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
6801
6802 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
6803 classes[0] = X86_64_INTEGERSI_CLASS;
6804 else
6805 classes[0] = X86_64_INTEGER_CLASS;
6806 classes[1] = X86_64_INTEGER_CLASS;
6807 return 1 + (bytes > 8);
6808 }
6809 }
6810
6811 /* Examine the argument and return set number of register required in each
6812 class. Return true iff parameter should be passed in memory. */
6813
6814 static bool
6815 examine_argument (enum machine_mode mode, const_tree type, int in_return,
6816 int *int_nregs, int *sse_nregs)
6817 {
6818 enum x86_64_reg_class regclass[MAX_CLASSES];
6819 int n = classify_argument (mode, type, regclass, 0);
6820
6821 *int_nregs = 0;
6822 *sse_nregs = 0;
6823
6824 if (!n)
6825 return true;
6826 for (n--; n >= 0; n--)
6827 switch (regclass[n])
6828 {
6829 case X86_64_INTEGER_CLASS:
6830 case X86_64_INTEGERSI_CLASS:
6831 (*int_nregs)++;
6832 break;
6833 case X86_64_SSE_CLASS:
6834 case X86_64_SSESF_CLASS:
6835 case X86_64_SSEDF_CLASS:
6836 (*sse_nregs)++;
6837 break;
6838 case X86_64_NO_CLASS:
6839 case X86_64_SSEUP_CLASS:
6840 break;
6841 case X86_64_X87_CLASS:
6842 case X86_64_X87UP_CLASS:
6843 case X86_64_COMPLEX_X87_CLASS:
6844 if (!in_return)
6845 return true;
6846 break;
6847 case X86_64_MEMORY_CLASS:
6848 gcc_unreachable ();
6849 }
6850
6851 return false;
6852 }
6853
6854 /* Construct container for the argument used by GCC interface. See
6855 FUNCTION_ARG for the detailed description. */
6856
6857 static rtx
6858 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
6859 const_tree type, int in_return, int nintregs, int nsseregs,
6860 const int *intreg, int sse_regno)
6861 {
6862 /* The following variables hold the static issued_error state. */
6863 static bool issued_sse_arg_error;
6864 static bool issued_sse_ret_error;
6865 static bool issued_x87_ret_error;
6866
6867 enum machine_mode tmpmode;
6868 int bytes =
6869 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6870 enum x86_64_reg_class regclass[MAX_CLASSES];
6871 int n;
6872 int i;
6873 int nexps = 0;
6874 int needed_sseregs, needed_intregs;
6875 rtx exp[MAX_CLASSES];
6876 rtx ret;
6877
6878 n = classify_argument (mode, type, regclass, 0);
6879 if (!n)
6880 return NULL;
6881 if (examine_argument (mode, type, in_return, &needed_intregs,
6882 &needed_sseregs))
6883 return NULL;
6884 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
6885 return NULL;
6886
6887 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
6888 some less clueful developer tries to use floating-point anyway. */
6889 if (needed_sseregs && !TARGET_SSE)
6890 {
6891 if (in_return)
6892 {
6893 if (!issued_sse_ret_error)
6894 {
6895 error ("SSE register return with SSE disabled");
6896 issued_sse_ret_error = true;
6897 }
6898 }
6899 else if (!issued_sse_arg_error)
6900 {
6901 error ("SSE register argument with SSE disabled");
6902 issued_sse_arg_error = true;
6903 }
6904 return NULL;
6905 }
6906
6907 /* Likewise, error if the ABI requires us to return values in the
6908 x87 registers and the user specified -mno-80387. */
6909 if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
6910 for (i = 0; i < n; i++)
6911 if (regclass[i] == X86_64_X87_CLASS
6912 || regclass[i] == X86_64_X87UP_CLASS
6913 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
6914 {
6915 if (!issued_x87_ret_error)
6916 {
6917 error ("x87 register return with x87 disabled");
6918 issued_x87_ret_error = true;
6919 }
6920 return NULL;
6921 }
6922
6923 /* First construct simple cases. Avoid SCmode, since we want to use
6924 single register to pass this type. */
6925 if (n == 1 && mode != SCmode)
6926 switch (regclass[0])
6927 {
6928 case X86_64_INTEGER_CLASS:
6929 case X86_64_INTEGERSI_CLASS:
6930 return gen_rtx_REG (mode, intreg[0]);
6931 case X86_64_SSE_CLASS:
6932 case X86_64_SSESF_CLASS:
6933 case X86_64_SSEDF_CLASS:
6934 if (mode != BLKmode)
6935 return gen_reg_or_parallel (mode, orig_mode,
6936 SSE_REGNO (sse_regno));
6937 break;
6938 case X86_64_X87_CLASS:
6939 case X86_64_COMPLEX_X87_CLASS:
6940 return gen_rtx_REG (mode, FIRST_STACK_REG);
6941 case X86_64_NO_CLASS:
6942 /* Zero sized array, struct or class. */
6943 return NULL;
6944 default:
6945 gcc_unreachable ();
6946 }
6947 if (n == 2
6948 && regclass[0] == X86_64_SSE_CLASS
6949 && regclass[1] == X86_64_SSEUP_CLASS
6950 && mode != BLKmode)
6951 return gen_reg_or_parallel (mode, orig_mode,
6952 SSE_REGNO (sse_regno));
6953 if (n == 4
6954 && regclass[0] == X86_64_SSE_CLASS
6955 && regclass[1] == X86_64_SSEUP_CLASS
6956 && regclass[2] == X86_64_SSEUP_CLASS
6957 && regclass[3] == X86_64_SSEUP_CLASS
6958 && mode != BLKmode)
6959 return gen_reg_or_parallel (mode, orig_mode,
6960 SSE_REGNO (sse_regno));
6961 if (n == 8
6962 && regclass[0] == X86_64_SSE_CLASS
6963 && regclass[1] == X86_64_SSEUP_CLASS
6964 && regclass[2] == X86_64_SSEUP_CLASS
6965 && regclass[3] == X86_64_SSEUP_CLASS
6966 && regclass[4] == X86_64_SSEUP_CLASS
6967 && regclass[5] == X86_64_SSEUP_CLASS
6968 && regclass[6] == X86_64_SSEUP_CLASS
6969 && regclass[7] == X86_64_SSEUP_CLASS
6970 && mode != BLKmode)
6971 return gen_reg_or_parallel (mode, orig_mode,
6972 SSE_REGNO (sse_regno));
6973 if (n == 2
6974 && regclass[0] == X86_64_X87_CLASS
6975 && regclass[1] == X86_64_X87UP_CLASS)
6976 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
6977
6978 if (n == 2
6979 && regclass[0] == X86_64_INTEGER_CLASS
6980 && regclass[1] == X86_64_INTEGER_CLASS
6981 && (mode == CDImode || mode == TImode)
6982 && intreg[0] + 1 == intreg[1])
6983 return gen_rtx_REG (mode, intreg[0]);
6984
6985 /* Otherwise figure out the entries of the PARALLEL. */
6986 for (i = 0; i < n; i++)
6987 {
6988 int pos;
6989
6990 switch (regclass[i])
6991 {
6992 case X86_64_NO_CLASS:
6993 break;
6994 case X86_64_INTEGER_CLASS:
6995 case X86_64_INTEGERSI_CLASS:
6996 /* Merge TImodes on aligned occasions here too. */
6997 if (i * 8 + 8 > bytes)
6998 tmpmode
6999 = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7000 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7001 tmpmode = SImode;
7002 else
7003 tmpmode = DImode;
7004 /* We've requested 24 bytes we
7005 don't have mode for. Use DImode. */
7006 if (tmpmode == BLKmode)
7007 tmpmode = DImode;
7008 exp [nexps++]
7009 = gen_rtx_EXPR_LIST (VOIDmode,
7010 gen_rtx_REG (tmpmode, *intreg),
7011 GEN_INT (i*8));
7012 intreg++;
7013 break;
7014 case X86_64_SSESF_CLASS:
7015 exp [nexps++]
7016 = gen_rtx_EXPR_LIST (VOIDmode,
7017 gen_rtx_REG (SFmode,
7018 SSE_REGNO (sse_regno)),
7019 GEN_INT (i*8));
7020 sse_regno++;
7021 break;
7022 case X86_64_SSEDF_CLASS:
7023 exp [nexps++]
7024 = gen_rtx_EXPR_LIST (VOIDmode,
7025 gen_rtx_REG (DFmode,
7026 SSE_REGNO (sse_regno)),
7027 GEN_INT (i*8));
7028 sse_regno++;
7029 break;
7030 case X86_64_SSE_CLASS:
7031 pos = i;
7032 switch (n)
7033 {
7034 case 1:
7035 tmpmode = DImode;
7036 break;
7037 case 2:
7038 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7039 {
7040 tmpmode = TImode;
7041 i++;
7042 }
7043 else
7044 tmpmode = DImode;
7045 break;
7046 case 4:
7047 gcc_assert (i == 0
7048 && regclass[1] == X86_64_SSEUP_CLASS
7049 && regclass[2] == X86_64_SSEUP_CLASS
7050 && regclass[3] == X86_64_SSEUP_CLASS);
7051 tmpmode = OImode;
7052 i += 3;
7053 break;
7054 case 8:
7055 gcc_assert (i == 0
7056 && regclass[1] == X86_64_SSEUP_CLASS
7057 && regclass[2] == X86_64_SSEUP_CLASS
7058 && regclass[3] == X86_64_SSEUP_CLASS
7059 && regclass[4] == X86_64_SSEUP_CLASS
7060 && regclass[5] == X86_64_SSEUP_CLASS
7061 && regclass[6] == X86_64_SSEUP_CLASS
7062 && regclass[7] == X86_64_SSEUP_CLASS);
7063 tmpmode = XImode;
7064 i += 7;
7065 break;
7066 default:
7067 gcc_unreachable ();
7068 }
7069 exp [nexps++]
7070 = gen_rtx_EXPR_LIST (VOIDmode,
7071 gen_rtx_REG (tmpmode,
7072 SSE_REGNO (sse_regno)),
7073 GEN_INT (pos*8));
7074 sse_regno++;
7075 break;
7076 default:
7077 gcc_unreachable ();
7078 }
7079 }
7080
7081 /* Empty aligned struct, union or class. */
7082 if (nexps == 0)
7083 return NULL;
7084
7085 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7086 for (i = 0; i < nexps; i++)
7087 XVECEXP (ret, 0, i) = exp [i];
7088 return ret;
7089 }
7090
7091 /* Update the data in CUM to advance over an argument of mode MODE
7092 and data type TYPE. (TYPE is null for libcalls where that information
7093 may not be available.) */
7094
7095 static void
7096 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7097 const_tree type, HOST_WIDE_INT bytes,
7098 HOST_WIDE_INT words)
7099 {
7100 switch (mode)
7101 {
7102 default:
7103 break;
7104
7105 case BLKmode:
7106 if (bytes < 0)
7107 break;
7108 /* FALLTHRU */
7109
7110 case DImode:
7111 case SImode:
7112 case HImode:
7113 case QImode:
7114 cum->words += words;
7115 cum->nregs -= words;
7116 cum->regno += words;
7117
7118 if (cum->nregs <= 0)
7119 {
7120 cum->nregs = 0;
7121 cum->regno = 0;
7122 }
7123 break;
7124
7125 case OImode:
7126 /* OImode shouldn't be used directly. */
7127 gcc_unreachable ();
7128
7129 case DFmode:
7130 if (cum->float_in_sse < 2)
7131 break;
7132 case SFmode:
7133 if (cum->float_in_sse < 1)
7134 break;
7135 /* FALLTHRU */
7136
7137 case V8SFmode:
7138 case V8SImode:
7139 case V64QImode:
7140 case V32HImode:
7141 case V16SImode:
7142 case V8DImode:
7143 case V16SFmode:
7144 case V8DFmode:
7145 case V32QImode:
7146 case V16HImode:
7147 case V4DFmode:
7148 case V4DImode:
7149 case TImode:
7150 case V16QImode:
7151 case V8HImode:
7152 case V4SImode:
7153 case V2DImode:
7154 case V4SFmode:
7155 case V2DFmode:
7156 if (!type || !AGGREGATE_TYPE_P (type))
7157 {
7158 cum->sse_words += words;
7159 cum->sse_nregs -= 1;
7160 cum->sse_regno += 1;
7161 if (cum->sse_nregs <= 0)
7162 {
7163 cum->sse_nregs = 0;
7164 cum->sse_regno = 0;
7165 }
7166 }
7167 break;
7168
7169 case V8QImode:
7170 case V4HImode:
7171 case V2SImode:
7172 case V2SFmode:
7173 case V1TImode:
7174 case V1DImode:
7175 if (!type || !AGGREGATE_TYPE_P (type))
7176 {
7177 cum->mmx_words += words;
7178 cum->mmx_nregs -= 1;
7179 cum->mmx_regno += 1;
7180 if (cum->mmx_nregs <= 0)
7181 {
7182 cum->mmx_nregs = 0;
7183 cum->mmx_regno = 0;
7184 }
7185 }
7186 break;
7187 }
7188 }
7189
7190 static void
7191 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7192 const_tree type, HOST_WIDE_INT words, bool named)
7193 {
7194 int int_nregs, sse_nregs;
7195
7196 /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */
7197 if (!named && (VALID_AVX512F_REG_MODE (mode)
7198 || VALID_AVX256_REG_MODE (mode)))
7199 return;
7200
7201 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7202 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7203 {
7204 cum->nregs -= int_nregs;
7205 cum->sse_nregs -= sse_nregs;
7206 cum->regno += int_nregs;
7207 cum->sse_regno += sse_nregs;
7208 }
7209 else
7210 {
7211 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7212 cum->words = (cum->words + align - 1) & ~(align - 1);
7213 cum->words += words;
7214 }
7215 }
7216
7217 static void
7218 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7219 HOST_WIDE_INT words)
7220 {
7221 /* Otherwise, this should be passed indirect. */
7222 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7223
7224 cum->words += words;
7225 if (cum->nregs > 0)
7226 {
7227 cum->nregs -= 1;
7228 cum->regno += 1;
7229 }
7230 }
7231
7232 /* Update the data in CUM to advance over an argument of mode MODE and
7233 data type TYPE. (TYPE is null for libcalls where that information
7234 may not be available.) */
7235
7236 static void
7237 ix86_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
7238 const_tree type, bool named)
7239 {
7240 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7241 HOST_WIDE_INT bytes, words;
7242
7243 if (mode == BLKmode)
7244 bytes = int_size_in_bytes (type);
7245 else
7246 bytes = GET_MODE_SIZE (mode);
7247 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7248
7249 if (type)
7250 mode = type_natural_mode (type, NULL, false);
7251
7252 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7253 function_arg_advance_ms_64 (cum, bytes, words);
7254 else if (TARGET_64BIT)
7255 function_arg_advance_64 (cum, mode, type, words, named);
7256 else
7257 function_arg_advance_32 (cum, mode, type, bytes, words);
7258 }
7259
7260 /* Define where to put the arguments to a function.
7261 Value is zero to push the argument on the stack,
7262 or a hard register in which to store the argument.
7263
7264 MODE is the argument's machine mode.
7265 TYPE is the data type of the argument (as a tree).
7266 This is null for libcalls where that information may
7267 not be available.
7268 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7269 the preceding args and about the function being called.
7270 NAMED is nonzero if this argument is a named parameter
7271 (otherwise it is an extra parameter matching an ellipsis). */
7272
7273 static rtx
7274 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7275 enum machine_mode orig_mode, const_tree type,
7276 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7277 {
7278 /* Avoid the AL settings for the Unix64 ABI. */
7279 if (mode == VOIDmode)
7280 return constm1_rtx;
7281
7282 switch (mode)
7283 {
7284 default:
7285 break;
7286
7287 case BLKmode:
7288 if (bytes < 0)
7289 break;
7290 /* FALLTHRU */
7291 case DImode:
7292 case SImode:
7293 case HImode:
7294 case QImode:
7295 if (words <= cum->nregs)
7296 {
7297 int regno = cum->regno;
7298
7299 /* Fastcall allocates the first two DWORD (SImode) or
7300 smaller arguments to ECX and EDX if it isn't an
7301 aggregate type . */
7302 if (cum->fastcall)
7303 {
7304 if (mode == BLKmode
7305 || mode == DImode
7306 || (type && AGGREGATE_TYPE_P (type)))
7307 break;
7308
7309 /* ECX not EAX is the first allocated register. */
7310 if (regno == AX_REG)
7311 regno = CX_REG;
7312 }
7313 return gen_rtx_REG (mode, regno);
7314 }
7315 break;
7316
7317 case DFmode:
7318 if (cum->float_in_sse < 2)
7319 break;
7320 case SFmode:
7321 if (cum->float_in_sse < 1)
7322 break;
7323 /* FALLTHRU */
7324 case TImode:
7325 /* In 32bit, we pass TImode in xmm registers. */
7326 case V16QImode:
7327 case V8HImode:
7328 case V4SImode:
7329 case V2DImode:
7330 case V4SFmode:
7331 case V2DFmode:
7332 if (!type || !AGGREGATE_TYPE_P (type))
7333 {
7334 if (cum->sse_nregs)
7335 return gen_reg_or_parallel (mode, orig_mode,
7336 cum->sse_regno + FIRST_SSE_REG);
7337 }
7338 break;
7339
7340 case OImode:
7341 case XImode:
7342 /* OImode and XImode shouldn't be used directly. */
7343 gcc_unreachable ();
7344
7345 case V64QImode:
7346 case V32HImode:
7347 case V16SImode:
7348 case V8DImode:
7349 case V16SFmode:
7350 case V8DFmode:
7351 case V8SFmode:
7352 case V8SImode:
7353 case V32QImode:
7354 case V16HImode:
7355 case V4DFmode:
7356 case V4DImode:
7357 if (!type || !AGGREGATE_TYPE_P (type))
7358 {
7359 if (cum->sse_nregs)
7360 return gen_reg_or_parallel (mode, orig_mode,
7361 cum->sse_regno + FIRST_SSE_REG);
7362 }
7363 break;
7364
7365 case V8QImode:
7366 case V4HImode:
7367 case V2SImode:
7368 case V2SFmode:
7369 case V1TImode:
7370 case V1DImode:
7371 if (!type || !AGGREGATE_TYPE_P (type))
7372 {
7373 if (cum->mmx_nregs)
7374 return gen_reg_or_parallel (mode, orig_mode,
7375 cum->mmx_regno + FIRST_MMX_REG);
7376 }
7377 break;
7378 }
7379
7380 return NULL_RTX;
7381 }
7382
7383 static rtx
7384 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7385 enum machine_mode orig_mode, const_tree type, bool named)
7386 {
7387 /* Handle a hidden AL argument containing number of registers
7388 for varargs x86-64 functions. */
7389 if (mode == VOIDmode)
7390 return GEN_INT (cum->maybe_vaarg
7391 ? (cum->sse_nregs < 0
7392 ? X86_64_SSE_REGPARM_MAX
7393 : cum->sse_regno)
7394 : -1);
7395
7396 switch (mode)
7397 {
7398 default:
7399 break;
7400
7401 case V8SFmode:
7402 case V8SImode:
7403 case V32QImode:
7404 case V16HImode:
7405 case V4DFmode:
7406 case V4DImode:
7407 case V16SFmode:
7408 case V16SImode:
7409 case V64QImode:
7410 case V32HImode:
7411 case V8DFmode:
7412 case V8DImode:
7413 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
7414 if (!named)
7415 return NULL;
7416 break;
7417 }
7418
7419 return construct_container (mode, orig_mode, type, 0, cum->nregs,
7420 cum->sse_nregs,
7421 &x86_64_int_parameter_registers [cum->regno],
7422 cum->sse_regno);
7423 }
7424
7425 static rtx
7426 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
7427 enum machine_mode orig_mode, bool named,
7428 HOST_WIDE_INT bytes)
7429 {
7430 unsigned int regno;
7431
7432 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7433 We use value of -2 to specify that current function call is MSABI. */
7434 if (mode == VOIDmode)
7435 return GEN_INT (-2);
7436
7437 /* If we've run out of registers, it goes on the stack. */
7438 if (cum->nregs == 0)
7439 return NULL_RTX;
7440
7441 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7442
7443 /* Only floating point modes are passed in anything but integer regs. */
7444 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7445 {
7446 if (named)
7447 regno = cum->regno + FIRST_SSE_REG;
7448 else
7449 {
7450 rtx t1, t2;
7451
7452 /* Unnamed floating parameters are passed in both the
7453 SSE and integer registers. */
7454 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7455 t2 = gen_rtx_REG (mode, regno);
7456 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7457 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7458 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7459 }
7460 }
7461 /* Handle aggregated types passed in register. */
7462 if (orig_mode == BLKmode)
7463 {
7464 if (bytes > 0 && bytes <= 8)
7465 mode = (bytes > 4 ? DImode : SImode);
7466 if (mode == BLKmode)
7467 mode = DImode;
7468 }
7469
7470 return gen_reg_or_parallel (mode, orig_mode, regno);
7471 }
7472
7473 /* Return where to put the arguments to a function.
7474 Return zero to push the argument on the stack, or a hard register in which to store the argument.
7475
7476 MODE is the argument's machine mode. TYPE is the data type of the
7477 argument. It is null for libcalls where that information may not be
7478 available. CUM gives information about the preceding args and about
7479 the function being called. NAMED is nonzero if this argument is a
7480 named parameter (otherwise it is an extra parameter matching an
7481 ellipsis). */
7482
7483 static rtx
7484 ix86_function_arg (cumulative_args_t cum_v, enum machine_mode omode,
7485 const_tree type, bool named)
7486 {
7487 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7488 enum machine_mode mode = omode;
7489 HOST_WIDE_INT bytes, words;
7490 rtx arg;
7491
7492 if (mode == BLKmode)
7493 bytes = int_size_in_bytes (type);
7494 else
7495 bytes = GET_MODE_SIZE (mode);
7496 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7497
7498 /* To simplify the code below, represent vector types with a vector mode
7499 even if MMX/SSE are not active. */
7500 if (type && TREE_CODE (type) == VECTOR_TYPE)
7501 mode = type_natural_mode (type, cum, false);
7502
7503 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7504 arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7505 else if (TARGET_64BIT)
7506 arg = function_arg_64 (cum, mode, omode, type, named);
7507 else
7508 arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7509
7510 return arg;
7511 }
7512
7513 /* A C expression that indicates when an argument must be passed by
7514 reference. If nonzero for an argument, a copy of that argument is
7515 made in memory and a pointer to the argument is passed instead of
7516 the argument itself. The pointer is passed in whatever way is
7517 appropriate for passing a pointer to that type. */
7518
7519 static bool
7520 ix86_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
7521 const_tree type, bool named ATTRIBUTE_UNUSED)
7522 {
7523 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7524
7525 /* See Windows x64 Software Convention. */
7526 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7527 {
7528 int msize = (int) GET_MODE_SIZE (mode);
7529 if (type)
7530 {
7531 /* Arrays are passed by reference. */
7532 if (TREE_CODE (type) == ARRAY_TYPE)
7533 return true;
7534
7535 if (AGGREGATE_TYPE_P (type))
7536 {
7537 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
7538 are passed by reference. */
7539 msize = int_size_in_bytes (type);
7540 }
7541 }
7542
7543 /* __m128 is passed by reference. */
7544 switch (msize) {
7545 case 1: case 2: case 4: case 8:
7546 break;
7547 default:
7548 return true;
7549 }
7550 }
7551 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
7552 return 1;
7553
7554 return 0;
7555 }
7556
7557 /* Return true when TYPE should be 128bit aligned for 32bit argument
7558 passing ABI. XXX: This function is obsolete and is only used for
7559 checking psABI compatibility with previous versions of GCC. */
7560
7561 static bool
7562 ix86_compat_aligned_value_p (const_tree type)
7563 {
7564 enum machine_mode mode = TYPE_MODE (type);
7565 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
7566 || mode == TDmode
7567 || mode == TFmode
7568 || mode == TCmode)
7569 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
7570 return true;
7571 if (TYPE_ALIGN (type) < 128)
7572 return false;
7573
7574 if (AGGREGATE_TYPE_P (type))
7575 {
7576 /* Walk the aggregates recursively. */
7577 switch (TREE_CODE (type))
7578 {
7579 case RECORD_TYPE:
7580 case UNION_TYPE:
7581 case QUAL_UNION_TYPE:
7582 {
7583 tree field;
7584
7585 /* Walk all the structure fields. */
7586 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7587 {
7588 if (TREE_CODE (field) == FIELD_DECL
7589 && ix86_compat_aligned_value_p (TREE_TYPE (field)))
7590 return true;
7591 }
7592 break;
7593 }
7594
7595 case ARRAY_TYPE:
7596 /* Just for use if some languages passes arrays by value. */
7597 if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
7598 return true;
7599 break;
7600
7601 default:
7602 gcc_unreachable ();
7603 }
7604 }
7605 return false;
7606 }
7607
7608 /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
7609 XXX: This function is obsolete and is only used for checking psABI
7610 compatibility with previous versions of GCC. */
7611
7612 static unsigned int
7613 ix86_compat_function_arg_boundary (enum machine_mode mode,
7614 const_tree type, unsigned int align)
7615 {
7616 /* In 32bit, only _Decimal128 and __float128 are aligned to their
7617 natural boundaries. */
7618 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
7619 {
7620 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
7621 make an exception for SSE modes since these require 128bit
7622 alignment.
7623
7624 The handling here differs from field_alignment. ICC aligns MMX
7625 arguments to 4 byte boundaries, while structure fields are aligned
7626 to 8 byte boundaries. */
7627 if (!type)
7628 {
7629 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
7630 align = PARM_BOUNDARY;
7631 }
7632 else
7633 {
7634 if (!ix86_compat_aligned_value_p (type))
7635 align = PARM_BOUNDARY;
7636 }
7637 }
7638 if (align > BIGGEST_ALIGNMENT)
7639 align = BIGGEST_ALIGNMENT;
7640 return align;
7641 }
7642
7643 /* Return true when TYPE should be 128bit aligned for 32bit argument
7644 passing ABI. */
7645
7646 static bool
7647 ix86_contains_aligned_value_p (const_tree type)
7648 {
7649 enum machine_mode mode = TYPE_MODE (type);
7650
7651 if (mode == XFmode || mode == XCmode)
7652 return false;
7653
7654 if (TYPE_ALIGN (type) < 128)
7655 return false;
7656
7657 if (AGGREGATE_TYPE_P (type))
7658 {
7659 /* Walk the aggregates recursively. */
7660 switch (TREE_CODE (type))
7661 {
7662 case RECORD_TYPE:
7663 case UNION_TYPE:
7664 case QUAL_UNION_TYPE:
7665 {
7666 tree field;
7667
7668 /* Walk all the structure fields. */
7669 for (field = TYPE_FIELDS (type);
7670 field;
7671 field = DECL_CHAIN (field))
7672 {
7673 if (TREE_CODE (field) == FIELD_DECL
7674 && ix86_contains_aligned_value_p (TREE_TYPE (field)))
7675 return true;
7676 }
7677 break;
7678 }
7679
7680 case ARRAY_TYPE:
7681 /* Just for use if some languages passes arrays by value. */
7682 if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
7683 return true;
7684 break;
7685
7686 default:
7687 gcc_unreachable ();
7688 }
7689 }
7690 else
7691 return TYPE_ALIGN (type) >= 128;
7692
7693 return false;
7694 }
7695
7696 /* Gives the alignment boundary, in bits, of an argument with the
7697 specified mode and type. */
7698
7699 static unsigned int
7700 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
7701 {
7702 unsigned int align;
7703 if (type)
7704 {
7705 /* Since the main variant type is used for call, we convert it to
7706 the main variant type. */
7707 type = TYPE_MAIN_VARIANT (type);
7708 align = TYPE_ALIGN (type);
7709 }
7710 else
7711 align = GET_MODE_ALIGNMENT (mode);
7712 if (align < PARM_BOUNDARY)
7713 align = PARM_BOUNDARY;
7714 else
7715 {
7716 static bool warned;
7717 unsigned int saved_align = align;
7718
7719 if (!TARGET_64BIT)
7720 {
7721 /* i386 ABI defines XFmode arguments to be 4 byte aligned. */
7722 if (!type)
7723 {
7724 if (mode == XFmode || mode == XCmode)
7725 align = PARM_BOUNDARY;
7726 }
7727 else if (!ix86_contains_aligned_value_p (type))
7728 align = PARM_BOUNDARY;
7729
7730 if (align < 128)
7731 align = PARM_BOUNDARY;
7732 }
7733
7734 if (warn_psabi
7735 && !warned
7736 && align != ix86_compat_function_arg_boundary (mode, type,
7737 saved_align))
7738 {
7739 warned = true;
7740 inform (input_location,
7741 "The ABI for passing parameters with %d-byte"
7742 " alignment has changed in GCC 4.6",
7743 align / BITS_PER_UNIT);
7744 }
7745 }
7746
7747 return align;
7748 }
7749
7750 /* Return true if N is a possible register number of function value. */
7751
7752 static bool
7753 ix86_function_value_regno_p (const unsigned int regno)
7754 {
7755 switch (regno)
7756 {
7757 case AX_REG:
7758 case DX_REG:
7759 return true;
7760 case DI_REG:
7761 case SI_REG:
7762 return TARGET_64BIT && ix86_abi != MS_ABI;
7763
7764 /* Complex values are returned in %st(0)/%st(1) pair. */
7765 case ST0_REG:
7766 case ST1_REG:
7767 /* TODO: The function should depend on current function ABI but
7768 builtins.c would need updating then. Therefore we use the
7769 default ABI. */
7770 if (TARGET_64BIT && ix86_abi == MS_ABI)
7771 return false;
7772 return TARGET_FLOAT_RETURNS_IN_80387;
7773
7774 /* Complex values are returned in %xmm0/%xmm1 pair. */
7775 case XMM0_REG:
7776 case XMM1_REG:
7777 return TARGET_SSE;
7778
7779 case MM0_REG:
7780 if (TARGET_MACHO || TARGET_64BIT)
7781 return false;
7782 return TARGET_MMX;
7783 }
7784
7785 return false;
7786 }
7787
7788 /* Define how to find the value returned by a function.
7789 VALTYPE is the data type of the value (as a tree).
7790 If the precise function being called is known, FUNC is its FUNCTION_DECL;
7791 otherwise, FUNC is 0. */
7792
7793 static rtx
7794 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
7795 const_tree fntype, const_tree fn)
7796 {
7797 unsigned int regno;
7798
7799 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
7800 we normally prevent this case when mmx is not available. However
7801 some ABIs may require the result to be returned like DImode. */
7802 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
7803 regno = FIRST_MMX_REG;
7804
7805 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
7806 we prevent this case when sse is not available. However some ABIs
7807 may require the result to be returned like integer TImode. */
7808 else if (mode == TImode
7809 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
7810 regno = FIRST_SSE_REG;
7811
7812 /* 32-byte vector modes in %ymm0. */
7813 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
7814 regno = FIRST_SSE_REG;
7815
7816 /* 64-byte vector modes in %zmm0. */
7817 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
7818 regno = FIRST_SSE_REG;
7819
7820 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
7821 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
7822 regno = FIRST_FLOAT_REG;
7823 else
7824 /* Most things go in %eax. */
7825 regno = AX_REG;
7826
7827 /* Override FP return register with %xmm0 for local functions when
7828 SSE math is enabled or for functions with sseregparm attribute. */
7829 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
7830 {
7831 int sse_level = ix86_function_sseregparm (fntype, fn, false);
7832 if ((sse_level >= 1 && mode == SFmode)
7833 || (sse_level == 2 && mode == DFmode))
7834 regno = FIRST_SSE_REG;
7835 }
7836
7837 /* OImode shouldn't be used directly. */
7838 gcc_assert (mode != OImode);
7839
7840 return gen_rtx_REG (orig_mode, regno);
7841 }
7842
7843 static rtx
7844 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
7845 const_tree valtype)
7846 {
7847 rtx ret;
7848
7849 /* Handle libcalls, which don't provide a type node. */
7850 if (valtype == NULL)
7851 {
7852 unsigned int regno;
7853
7854 switch (mode)
7855 {
7856 case SFmode:
7857 case SCmode:
7858 case DFmode:
7859 case DCmode:
7860 case TFmode:
7861 case SDmode:
7862 case DDmode:
7863 case TDmode:
7864 regno = FIRST_SSE_REG;
7865 break;
7866 case XFmode:
7867 case XCmode:
7868 regno = FIRST_FLOAT_REG;
7869 break;
7870 case TCmode:
7871 return NULL;
7872 default:
7873 regno = AX_REG;
7874 }
7875
7876 return gen_rtx_REG (mode, regno);
7877 }
7878 else if (POINTER_TYPE_P (valtype))
7879 {
7880 /* Pointers are always returned in word_mode. */
7881 mode = word_mode;
7882 }
7883
7884 ret = construct_container (mode, orig_mode, valtype, 1,
7885 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
7886 x86_64_int_return_registers, 0);
7887
7888 /* For zero sized structures, construct_container returns NULL, but we
7889 need to keep rest of compiler happy by returning meaningful value. */
7890 if (!ret)
7891 ret = gen_rtx_REG (orig_mode, AX_REG);
7892
7893 return ret;
7894 }
7895
7896 static rtx
7897 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode,
7898 const_tree valtype)
7899 {
7900 unsigned int regno = AX_REG;
7901
7902 if (TARGET_SSE)
7903 {
7904 switch (GET_MODE_SIZE (mode))
7905 {
7906 case 16:
7907 if (valtype != NULL_TREE
7908 && !VECTOR_INTEGER_TYPE_P (valtype)
7909 && !VECTOR_INTEGER_TYPE_P (valtype)
7910 && !INTEGRAL_TYPE_P (valtype)
7911 && !VECTOR_FLOAT_TYPE_P (valtype))
7912 break;
7913 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
7914 && !COMPLEX_MODE_P (mode))
7915 regno = FIRST_SSE_REG;
7916 break;
7917 case 8:
7918 case 4:
7919 if (mode == SFmode || mode == DFmode)
7920 regno = FIRST_SSE_REG;
7921 break;
7922 default:
7923 break;
7924 }
7925 }
7926 return gen_rtx_REG (orig_mode, regno);
7927 }
7928
7929 static rtx
7930 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
7931 enum machine_mode orig_mode, enum machine_mode mode)
7932 {
7933 const_tree fn, fntype;
7934
7935 fn = NULL_TREE;
7936 if (fntype_or_decl && DECL_P (fntype_or_decl))
7937 fn = fntype_or_decl;
7938 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
7939
7940 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
7941 return function_value_ms_64 (orig_mode, mode, valtype);
7942 else if (TARGET_64BIT)
7943 return function_value_64 (orig_mode, mode, valtype);
7944 else
7945 return function_value_32 (orig_mode, mode, fntype, fn);
7946 }
7947
7948 static rtx
7949 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
7950 bool outgoing ATTRIBUTE_UNUSED)
7951 {
7952 enum machine_mode mode, orig_mode;
7953
7954 orig_mode = TYPE_MODE (valtype);
7955 mode = type_natural_mode (valtype, NULL, true);
7956 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
7957 }
7958
7959 /* Pointer function arguments and return values are promoted to
7960 word_mode. */
7961
7962 static enum machine_mode
7963 ix86_promote_function_mode (const_tree type, enum machine_mode mode,
7964 int *punsignedp, const_tree fntype,
7965 int for_return)
7966 {
7967 if (type != NULL_TREE && POINTER_TYPE_P (type))
7968 {
7969 *punsignedp = POINTERS_EXTEND_UNSIGNED;
7970 return word_mode;
7971 }
7972 return default_promote_function_mode (type, mode, punsignedp, fntype,
7973 for_return);
7974 }
7975
7976 /* Return true if a structure, union or array with MODE containing FIELD
7977 should be accessed using BLKmode. */
7978
7979 static bool
7980 ix86_member_type_forces_blk (const_tree field, enum machine_mode mode)
7981 {
7982 /* Union with XFmode must be in BLKmode. */
7983 return (mode == XFmode
7984 && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
7985 || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
7986 }
7987
7988 rtx
7989 ix86_libcall_value (enum machine_mode mode)
7990 {
7991 return ix86_function_value_1 (NULL, NULL, mode, mode);
7992 }
7993
7994 /* Return true iff type is returned in memory. */
7995
7996 static bool
7997 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7998 {
7999 #ifdef SUBTARGET_RETURN_IN_MEMORY
8000 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8001 #else
8002 const enum machine_mode mode = type_natural_mode (type, NULL, true);
8003 HOST_WIDE_INT size;
8004
8005 if (TARGET_64BIT)
8006 {
8007 if (ix86_function_type_abi (fntype) == MS_ABI)
8008 {
8009 size = int_size_in_bytes (type);
8010
8011 /* __m128 is returned in xmm0. */
8012 if ((!type || VECTOR_INTEGER_TYPE_P (type)
8013 || INTEGRAL_TYPE_P (type)
8014 || VECTOR_FLOAT_TYPE_P (type))
8015 && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8016 && !COMPLEX_MODE_P (mode)
8017 && (GET_MODE_SIZE (mode) == 16 || size == 16))
8018 return false;
8019
8020 /* Otherwise, the size must be exactly in [1248]. */
8021 return size != 1 && size != 2 && size != 4 && size != 8;
8022 }
8023 else
8024 {
8025 int needed_intregs, needed_sseregs;
8026
8027 return examine_argument (mode, type, 1,
8028 &needed_intregs, &needed_sseregs);
8029 }
8030 }
8031 else
8032 {
8033 if (mode == BLKmode)
8034 return true;
8035
8036 size = int_size_in_bytes (type);
8037
8038 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8039 return false;
8040
8041 if (VECTOR_MODE_P (mode) || mode == TImode)
8042 {
8043 /* User-created vectors small enough to fit in EAX. */
8044 if (size < 8)
8045 return false;
8046
8047 /* Unless ABI prescibes otherwise,
8048 MMX/3dNow values are returned in MM0 if available. */
8049
8050 if (size == 8)
8051 return TARGET_VECT8_RETURNS || !TARGET_MMX;
8052
8053 /* SSE values are returned in XMM0 if available. */
8054 if (size == 16)
8055 return !TARGET_SSE;
8056
8057 /* AVX values are returned in YMM0 if available. */
8058 if (size == 32)
8059 return !TARGET_AVX;
8060
8061 /* AVX512F values are returned in ZMM0 if available. */
8062 if (size == 64)
8063 return !TARGET_AVX512F;
8064 }
8065
8066 if (mode == XFmode)
8067 return false;
8068
8069 if (size > 12)
8070 return true;
8071
8072 /* OImode shouldn't be used directly. */
8073 gcc_assert (mode != OImode);
8074
8075 return false;
8076 }
8077 #endif
8078 }
8079
8080 \f
8081 /* Create the va_list data type. */
8082
8083 /* Returns the calling convention specific va_list date type.
8084 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
8085
8086 static tree
8087 ix86_build_builtin_va_list_abi (enum calling_abi abi)
8088 {
8089 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8090
8091 /* For i386 we use plain pointer to argument area. */
8092 if (!TARGET_64BIT || abi == MS_ABI)
8093 return build_pointer_type (char_type_node);
8094
8095 record = lang_hooks.types.make_type (RECORD_TYPE);
8096 type_decl = build_decl (BUILTINS_LOCATION,
8097 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8098
8099 f_gpr = build_decl (BUILTINS_LOCATION,
8100 FIELD_DECL, get_identifier ("gp_offset"),
8101 unsigned_type_node);
8102 f_fpr = build_decl (BUILTINS_LOCATION,
8103 FIELD_DECL, get_identifier ("fp_offset"),
8104 unsigned_type_node);
8105 f_ovf = build_decl (BUILTINS_LOCATION,
8106 FIELD_DECL, get_identifier ("overflow_arg_area"),
8107 ptr_type_node);
8108 f_sav = build_decl (BUILTINS_LOCATION,
8109 FIELD_DECL, get_identifier ("reg_save_area"),
8110 ptr_type_node);
8111
8112 va_list_gpr_counter_field = f_gpr;
8113 va_list_fpr_counter_field = f_fpr;
8114
8115 DECL_FIELD_CONTEXT (f_gpr) = record;
8116 DECL_FIELD_CONTEXT (f_fpr) = record;
8117 DECL_FIELD_CONTEXT (f_ovf) = record;
8118 DECL_FIELD_CONTEXT (f_sav) = record;
8119
8120 TYPE_STUB_DECL (record) = type_decl;
8121 TYPE_NAME (record) = type_decl;
8122 TYPE_FIELDS (record) = f_gpr;
8123 DECL_CHAIN (f_gpr) = f_fpr;
8124 DECL_CHAIN (f_fpr) = f_ovf;
8125 DECL_CHAIN (f_ovf) = f_sav;
8126
8127 layout_type (record);
8128
8129 /* The correct type is an array type of one element. */
8130 return build_array_type (record, build_index_type (size_zero_node));
8131 }
8132
8133 /* Setup the builtin va_list data type and for 64-bit the additional
8134 calling convention specific va_list data types. */
8135
8136 static tree
8137 ix86_build_builtin_va_list (void)
8138 {
8139 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8140
8141 /* Initialize abi specific va_list builtin types. */
8142 if (TARGET_64BIT)
8143 {
8144 tree t;
8145 if (ix86_abi == MS_ABI)
8146 {
8147 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8148 if (TREE_CODE (t) != RECORD_TYPE)
8149 t = build_variant_type_copy (t);
8150 sysv_va_list_type_node = t;
8151 }
8152 else
8153 {
8154 t = ret;
8155 if (TREE_CODE (t) != RECORD_TYPE)
8156 t = build_variant_type_copy (t);
8157 sysv_va_list_type_node = t;
8158 }
8159 if (ix86_abi != MS_ABI)
8160 {
8161 t = ix86_build_builtin_va_list_abi (MS_ABI);
8162 if (TREE_CODE (t) != RECORD_TYPE)
8163 t = build_variant_type_copy (t);
8164 ms_va_list_type_node = t;
8165 }
8166 else
8167 {
8168 t = ret;
8169 if (TREE_CODE (t) != RECORD_TYPE)
8170 t = build_variant_type_copy (t);
8171 ms_va_list_type_node = t;
8172 }
8173 }
8174
8175 return ret;
8176 }
8177
8178 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
8179
8180 static void
8181 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8182 {
8183 rtx save_area, mem;
8184 alias_set_type set;
8185 int i, max;
8186
8187 /* GPR size of varargs save area. */
8188 if (cfun->va_list_gpr_size)
8189 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8190 else
8191 ix86_varargs_gpr_size = 0;
8192
8193 /* FPR size of varargs save area. We don't need it if we don't pass
8194 anything in SSE registers. */
8195 if (TARGET_SSE && cfun->va_list_fpr_size)
8196 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8197 else
8198 ix86_varargs_fpr_size = 0;
8199
8200 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8201 return;
8202
8203 save_area = frame_pointer_rtx;
8204 set = get_varargs_alias_set ();
8205
8206 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8207 if (max > X86_64_REGPARM_MAX)
8208 max = X86_64_REGPARM_MAX;
8209
8210 for (i = cum->regno; i < max; i++)
8211 {
8212 mem = gen_rtx_MEM (word_mode,
8213 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8214 MEM_NOTRAP_P (mem) = 1;
8215 set_mem_alias_set (mem, set);
8216 emit_move_insn (mem,
8217 gen_rtx_REG (word_mode,
8218 x86_64_int_parameter_registers[i]));
8219 }
8220
8221 if (ix86_varargs_fpr_size)
8222 {
8223 enum machine_mode smode;
8224 rtx label, test;
8225
8226 /* Now emit code to save SSE registers. The AX parameter contains number
8227 of SSE parameter registers used to call this function, though all we
8228 actually check here is the zero/non-zero status. */
8229
8230 label = gen_label_rtx ();
8231 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8232 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8233 label));
8234
8235 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8236 we used movdqa (i.e. TImode) instead? Perhaps even better would
8237 be if we could determine the real mode of the data, via a hook
8238 into pass_stdarg. Ignore all that for now. */
8239 smode = V4SFmode;
8240 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8241 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8242
8243 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8244 if (max > X86_64_SSE_REGPARM_MAX)
8245 max = X86_64_SSE_REGPARM_MAX;
8246
8247 for (i = cum->sse_regno; i < max; ++i)
8248 {
8249 mem = plus_constant (Pmode, save_area,
8250 i * 16 + ix86_varargs_gpr_size);
8251 mem = gen_rtx_MEM (smode, mem);
8252 MEM_NOTRAP_P (mem) = 1;
8253 set_mem_alias_set (mem, set);
8254 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8255
8256 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8257 }
8258
8259 emit_label (label);
8260 }
8261 }
8262
8263 static void
8264 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8265 {
8266 alias_set_type set = get_varargs_alias_set ();
8267 int i;
8268
8269 /* Reset to zero, as there might be a sysv vaarg used
8270 before. */
8271 ix86_varargs_gpr_size = 0;
8272 ix86_varargs_fpr_size = 0;
8273
8274 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8275 {
8276 rtx reg, mem;
8277
8278 mem = gen_rtx_MEM (Pmode,
8279 plus_constant (Pmode, virtual_incoming_args_rtx,
8280 i * UNITS_PER_WORD));
8281 MEM_NOTRAP_P (mem) = 1;
8282 set_mem_alias_set (mem, set);
8283
8284 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8285 emit_move_insn (mem, reg);
8286 }
8287 }
8288
8289 static void
8290 ix86_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
8291 tree type, int *pretend_size ATTRIBUTE_UNUSED,
8292 int no_rtl)
8293 {
8294 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8295 CUMULATIVE_ARGS next_cum;
8296 tree fntype;
8297
8298 /* This argument doesn't appear to be used anymore. Which is good,
8299 because the old code here didn't suppress rtl generation. */
8300 gcc_assert (!no_rtl);
8301
8302 if (!TARGET_64BIT)
8303 return;
8304
8305 fntype = TREE_TYPE (current_function_decl);
8306
8307 /* For varargs, we do not want to skip the dummy va_dcl argument.
8308 For stdargs, we do want to skip the last named argument. */
8309 next_cum = *cum;
8310 if (stdarg_p (fntype))
8311 ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8312 true);
8313
8314 if (cum->call_abi == MS_ABI)
8315 setup_incoming_varargs_ms_64 (&next_cum);
8316 else
8317 setup_incoming_varargs_64 (&next_cum);
8318 }
8319
8320 /* Checks if TYPE is of kind va_list char *. */
8321
8322 static bool
8323 is_va_list_char_pointer (tree type)
8324 {
8325 tree canonic;
8326
8327 /* For 32-bit it is always true. */
8328 if (!TARGET_64BIT)
8329 return true;
8330 canonic = ix86_canonical_va_list_type (type);
8331 return (canonic == ms_va_list_type_node
8332 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8333 }
8334
8335 /* Implement va_start. */
8336
8337 static void
8338 ix86_va_start (tree valist, rtx nextarg)
8339 {
8340 HOST_WIDE_INT words, n_gpr, n_fpr;
8341 tree f_gpr, f_fpr, f_ovf, f_sav;
8342 tree gpr, fpr, ovf, sav, t;
8343 tree type;
8344 rtx ovf_rtx;
8345
8346 if (flag_split_stack
8347 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8348 {
8349 unsigned int scratch_regno;
8350
8351 /* When we are splitting the stack, we can't refer to the stack
8352 arguments using internal_arg_pointer, because they may be on
8353 the old stack. The split stack prologue will arrange to
8354 leave a pointer to the old stack arguments in a scratch
8355 register, which we here copy to a pseudo-register. The split
8356 stack prologue can't set the pseudo-register directly because
8357 it (the prologue) runs before any registers have been saved. */
8358
8359 scratch_regno = split_stack_prologue_scratch_regno ();
8360 if (scratch_regno != INVALID_REGNUM)
8361 {
8362 rtx reg, seq;
8363
8364 reg = gen_reg_rtx (Pmode);
8365 cfun->machine->split_stack_varargs_pointer = reg;
8366
8367 start_sequence ();
8368 emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8369 seq = get_insns ();
8370 end_sequence ();
8371
8372 push_topmost_sequence ();
8373 emit_insn_after (seq, entry_of_function ());
8374 pop_topmost_sequence ();
8375 }
8376 }
8377
8378 /* Only 64bit target needs something special. */
8379 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8380 {
8381 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8382 std_expand_builtin_va_start (valist, nextarg);
8383 else
8384 {
8385 rtx va_r, next;
8386
8387 va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8388 next = expand_binop (ptr_mode, add_optab,
8389 cfun->machine->split_stack_varargs_pointer,
8390 crtl->args.arg_offset_rtx,
8391 NULL_RTX, 0, OPTAB_LIB_WIDEN);
8392 convert_move (va_r, next, 0);
8393 }
8394 return;
8395 }
8396
8397 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8398 f_fpr = DECL_CHAIN (f_gpr);
8399 f_ovf = DECL_CHAIN (f_fpr);
8400 f_sav = DECL_CHAIN (f_ovf);
8401
8402 valist = build_simple_mem_ref (valist);
8403 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
8404 /* The following should be folded into the MEM_REF offset. */
8405 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
8406 f_gpr, NULL_TREE);
8407 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
8408 f_fpr, NULL_TREE);
8409 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
8410 f_ovf, NULL_TREE);
8411 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
8412 f_sav, NULL_TREE);
8413
8414 /* Count number of gp and fp argument registers used. */
8415 words = crtl->args.info.words;
8416 n_gpr = crtl->args.info.regno;
8417 n_fpr = crtl->args.info.sse_regno;
8418
8419 if (cfun->va_list_gpr_size)
8420 {
8421 type = TREE_TYPE (gpr);
8422 t = build2 (MODIFY_EXPR, type,
8423 gpr, build_int_cst (type, n_gpr * 8));
8424 TREE_SIDE_EFFECTS (t) = 1;
8425 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8426 }
8427
8428 if (TARGET_SSE && cfun->va_list_fpr_size)
8429 {
8430 type = TREE_TYPE (fpr);
8431 t = build2 (MODIFY_EXPR, type, fpr,
8432 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
8433 TREE_SIDE_EFFECTS (t) = 1;
8434 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8435 }
8436
8437 /* Find the overflow area. */
8438 type = TREE_TYPE (ovf);
8439 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8440 ovf_rtx = crtl->args.internal_arg_pointer;
8441 else
8442 ovf_rtx = cfun->machine->split_stack_varargs_pointer;
8443 t = make_tree (type, ovf_rtx);
8444 if (words != 0)
8445 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
8446 t = build2 (MODIFY_EXPR, type, ovf, t);
8447 TREE_SIDE_EFFECTS (t) = 1;
8448 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8449
8450 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
8451 {
8452 /* Find the register save area.
8453 Prologue of the function save it right above stack frame. */
8454 type = TREE_TYPE (sav);
8455 t = make_tree (type, frame_pointer_rtx);
8456 if (!ix86_varargs_gpr_size)
8457 t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
8458 t = build2 (MODIFY_EXPR, type, sav, t);
8459 TREE_SIDE_EFFECTS (t) = 1;
8460 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8461 }
8462 }
8463
8464 /* Implement va_arg. */
8465
8466 static tree
8467 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
8468 gimple_seq *post_p)
8469 {
8470 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
8471 tree f_gpr, f_fpr, f_ovf, f_sav;
8472 tree gpr, fpr, ovf, sav, t;
8473 int size, rsize;
8474 tree lab_false, lab_over = NULL_TREE;
8475 tree addr, t2;
8476 rtx container;
8477 int indirect_p = 0;
8478 tree ptrtype;
8479 enum machine_mode nat_mode;
8480 unsigned int arg_boundary;
8481
8482 /* Only 64bit target needs something special. */
8483 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8484 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
8485
8486 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
8487 f_fpr = DECL_CHAIN (f_gpr);
8488 f_ovf = DECL_CHAIN (f_fpr);
8489 f_sav = DECL_CHAIN (f_ovf);
8490
8491 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
8492 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
8493 valist = build_va_arg_indirect_ref (valist);
8494 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
8495 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
8496 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
8497
8498 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
8499 if (indirect_p)
8500 type = build_pointer_type (type);
8501 size = int_size_in_bytes (type);
8502 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8503
8504 nat_mode = type_natural_mode (type, NULL, false);
8505 switch (nat_mode)
8506 {
8507 case V8SFmode:
8508 case V8SImode:
8509 case V32QImode:
8510 case V16HImode:
8511 case V4DFmode:
8512 case V4DImode:
8513 case V16SFmode:
8514 case V16SImode:
8515 case V64QImode:
8516 case V32HImode:
8517 case V8DFmode:
8518 case V8DImode:
8519 /* Unnamed 256 and 512bit vector mode parameters are passed on stack. */
8520 if (!TARGET_64BIT_MS_ABI)
8521 {
8522 container = NULL;
8523 break;
8524 }
8525
8526 default:
8527 container = construct_container (nat_mode, TYPE_MODE (type),
8528 type, 0, X86_64_REGPARM_MAX,
8529 X86_64_SSE_REGPARM_MAX, intreg,
8530 0);
8531 break;
8532 }
8533
8534 /* Pull the value out of the saved registers. */
8535
8536 addr = create_tmp_var (ptr_type_node, "addr");
8537
8538 if (container)
8539 {
8540 int needed_intregs, needed_sseregs;
8541 bool need_temp;
8542 tree int_addr, sse_addr;
8543
8544 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8545 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8546
8547 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
8548
8549 need_temp = (!REG_P (container)
8550 && ((needed_intregs && TYPE_ALIGN (type) > 64)
8551 || TYPE_ALIGN (type) > 128));
8552
8553 /* In case we are passing structure, verify that it is consecutive block
8554 on the register save area. If not we need to do moves. */
8555 if (!need_temp && !REG_P (container))
8556 {
8557 /* Verify that all registers are strictly consecutive */
8558 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
8559 {
8560 int i;
8561
8562 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8563 {
8564 rtx slot = XVECEXP (container, 0, i);
8565 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
8566 || INTVAL (XEXP (slot, 1)) != i * 16)
8567 need_temp = 1;
8568 }
8569 }
8570 else
8571 {
8572 int i;
8573
8574 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
8575 {
8576 rtx slot = XVECEXP (container, 0, i);
8577 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
8578 || INTVAL (XEXP (slot, 1)) != i * 8)
8579 need_temp = 1;
8580 }
8581 }
8582 }
8583 if (!need_temp)
8584 {
8585 int_addr = addr;
8586 sse_addr = addr;
8587 }
8588 else
8589 {
8590 int_addr = create_tmp_var (ptr_type_node, "int_addr");
8591 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
8592 }
8593
8594 /* First ensure that we fit completely in registers. */
8595 if (needed_intregs)
8596 {
8597 t = build_int_cst (TREE_TYPE (gpr),
8598 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
8599 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
8600 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8601 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8602 gimplify_and_add (t, pre_p);
8603 }
8604 if (needed_sseregs)
8605 {
8606 t = build_int_cst (TREE_TYPE (fpr),
8607 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
8608 + X86_64_REGPARM_MAX * 8);
8609 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
8610 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
8611 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
8612 gimplify_and_add (t, pre_p);
8613 }
8614
8615 /* Compute index to start of area used for integer regs. */
8616 if (needed_intregs)
8617 {
8618 /* int_addr = gpr + sav; */
8619 t = fold_build_pointer_plus (sav, gpr);
8620 gimplify_assign (int_addr, t, pre_p);
8621 }
8622 if (needed_sseregs)
8623 {
8624 /* sse_addr = fpr + sav; */
8625 t = fold_build_pointer_plus (sav, fpr);
8626 gimplify_assign (sse_addr, t, pre_p);
8627 }
8628 if (need_temp)
8629 {
8630 int i, prev_size = 0;
8631 tree temp = create_tmp_var (type, "va_arg_tmp");
8632
8633 /* addr = &temp; */
8634 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
8635 gimplify_assign (addr, t, pre_p);
8636
8637 for (i = 0; i < XVECLEN (container, 0); i++)
8638 {
8639 rtx slot = XVECEXP (container, 0, i);
8640 rtx reg = XEXP (slot, 0);
8641 enum machine_mode mode = GET_MODE (reg);
8642 tree piece_type;
8643 tree addr_type;
8644 tree daddr_type;
8645 tree src_addr, src;
8646 int src_offset;
8647 tree dest_addr, dest;
8648 int cur_size = GET_MODE_SIZE (mode);
8649
8650 gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
8651 prev_size = INTVAL (XEXP (slot, 1));
8652 if (prev_size + cur_size > size)
8653 {
8654 cur_size = size - prev_size;
8655 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
8656 if (mode == BLKmode)
8657 mode = QImode;
8658 }
8659 piece_type = lang_hooks.types.type_for_mode (mode, 1);
8660 if (mode == GET_MODE (reg))
8661 addr_type = build_pointer_type (piece_type);
8662 else
8663 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8664 true);
8665 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
8666 true);
8667
8668 if (SSE_REGNO_P (REGNO (reg)))
8669 {
8670 src_addr = sse_addr;
8671 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
8672 }
8673 else
8674 {
8675 src_addr = int_addr;
8676 src_offset = REGNO (reg) * 8;
8677 }
8678 src_addr = fold_convert (addr_type, src_addr);
8679 src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
8680
8681 dest_addr = fold_convert (daddr_type, addr);
8682 dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
8683 if (cur_size == GET_MODE_SIZE (mode))
8684 {
8685 src = build_va_arg_indirect_ref (src_addr);
8686 dest = build_va_arg_indirect_ref (dest_addr);
8687
8688 gimplify_assign (dest, src, pre_p);
8689 }
8690 else
8691 {
8692 tree copy
8693 = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8694 3, dest_addr, src_addr,
8695 size_int (cur_size));
8696 gimplify_and_add (copy, pre_p);
8697 }
8698 prev_size += cur_size;
8699 }
8700 }
8701
8702 if (needed_intregs)
8703 {
8704 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
8705 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
8706 gimplify_assign (gpr, t, pre_p);
8707 }
8708
8709 if (needed_sseregs)
8710 {
8711 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
8712 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
8713 gimplify_assign (fpr, t, pre_p);
8714 }
8715
8716 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
8717
8718 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
8719 }
8720
8721 /* ... otherwise out of the overflow area. */
8722
8723 /* When we align parameter on stack for caller, if the parameter
8724 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
8725 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
8726 here with caller. */
8727 arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
8728 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
8729 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
8730
8731 /* Care for on-stack alignment if needed. */
8732 if (arg_boundary <= 64 || size == 0)
8733 t = ovf;
8734 else
8735 {
8736 HOST_WIDE_INT align = arg_boundary / 8;
8737 t = fold_build_pointer_plus_hwi (ovf, align - 1);
8738 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
8739 build_int_cst (TREE_TYPE (t), -align));
8740 }
8741
8742 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
8743 gimplify_assign (addr, t, pre_p);
8744
8745 t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
8746 gimplify_assign (unshare_expr (ovf), t, pre_p);
8747
8748 if (container)
8749 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
8750
8751 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
8752 addr = fold_convert (ptrtype, addr);
8753
8754 if (indirect_p)
8755 addr = build_va_arg_indirect_ref (addr);
8756 return build_va_arg_indirect_ref (addr);
8757 }
8758 \f
8759 /* Return true if OPNUM's MEM should be matched
8760 in movabs* patterns. */
8761
8762 bool
8763 ix86_check_movabs (rtx insn, int opnum)
8764 {
8765 rtx set, mem;
8766
8767 set = PATTERN (insn);
8768 if (GET_CODE (set) == PARALLEL)
8769 set = XVECEXP (set, 0, 0);
8770 gcc_assert (GET_CODE (set) == SET);
8771 mem = XEXP (set, opnum);
8772 while (GET_CODE (mem) == SUBREG)
8773 mem = SUBREG_REG (mem);
8774 gcc_assert (MEM_P (mem));
8775 return volatile_ok || !MEM_VOLATILE_P (mem);
8776 }
8777 \f
8778 /* Initialize the table of extra 80387 mathematical constants. */
8779
8780 static void
8781 init_ext_80387_constants (void)
8782 {
8783 static const char * cst[5] =
8784 {
8785 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
8786 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
8787 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
8788 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
8789 "3.1415926535897932385128089594061862044", /* 4: fldpi */
8790 };
8791 int i;
8792
8793 for (i = 0; i < 5; i++)
8794 {
8795 real_from_string (&ext_80387_constants_table[i], cst[i]);
8796 /* Ensure each constant is rounded to XFmode precision. */
8797 real_convert (&ext_80387_constants_table[i],
8798 XFmode, &ext_80387_constants_table[i]);
8799 }
8800
8801 ext_80387_constants_init = 1;
8802 }
8803
8804 /* Return non-zero if the constant is something that
8805 can be loaded with a special instruction. */
8806
8807 int
8808 standard_80387_constant_p (rtx x)
8809 {
8810 enum machine_mode mode = GET_MODE (x);
8811
8812 REAL_VALUE_TYPE r;
8813
8814 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
8815 return -1;
8816
8817 if (x == CONST0_RTX (mode))
8818 return 1;
8819 if (x == CONST1_RTX (mode))
8820 return 2;
8821
8822 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8823
8824 /* For XFmode constants, try to find a special 80387 instruction when
8825 optimizing for size or on those CPUs that benefit from them. */
8826 if (mode == XFmode
8827 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
8828 {
8829 int i;
8830
8831 if (! ext_80387_constants_init)
8832 init_ext_80387_constants ();
8833
8834 for (i = 0; i < 5; i++)
8835 if (real_identical (&r, &ext_80387_constants_table[i]))
8836 return i + 3;
8837 }
8838
8839 /* Load of the constant -0.0 or -1.0 will be split as
8840 fldz;fchs or fld1;fchs sequence. */
8841 if (real_isnegzero (&r))
8842 return 8;
8843 if (real_identical (&r, &dconstm1))
8844 return 9;
8845
8846 return 0;
8847 }
8848
8849 /* Return the opcode of the special instruction to be used to load
8850 the constant X. */
8851
8852 const char *
8853 standard_80387_constant_opcode (rtx x)
8854 {
8855 switch (standard_80387_constant_p (x))
8856 {
8857 case 1:
8858 return "fldz";
8859 case 2:
8860 return "fld1";
8861 case 3:
8862 return "fldlg2";
8863 case 4:
8864 return "fldln2";
8865 case 5:
8866 return "fldl2e";
8867 case 6:
8868 return "fldl2t";
8869 case 7:
8870 return "fldpi";
8871 case 8:
8872 case 9:
8873 return "#";
8874 default:
8875 gcc_unreachable ();
8876 }
8877 }
8878
8879 /* Return the CONST_DOUBLE representing the 80387 constant that is
8880 loaded by the specified special instruction. The argument IDX
8881 matches the return value from standard_80387_constant_p. */
8882
8883 rtx
8884 standard_80387_constant_rtx (int idx)
8885 {
8886 int i;
8887
8888 if (! ext_80387_constants_init)
8889 init_ext_80387_constants ();
8890
8891 switch (idx)
8892 {
8893 case 3:
8894 case 4:
8895 case 5:
8896 case 6:
8897 case 7:
8898 i = idx - 3;
8899 break;
8900
8901 default:
8902 gcc_unreachable ();
8903 }
8904
8905 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
8906 XFmode);
8907 }
8908
8909 /* Return 1 if X is all 0s and 2 if x is all 1s
8910 in supported SSE/AVX vector mode. */
8911
8912 int
8913 standard_sse_constant_p (rtx x)
8914 {
8915 enum machine_mode mode = GET_MODE (x);
8916
8917 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
8918 return 1;
8919 if (vector_all_ones_operand (x, mode))
8920 switch (mode)
8921 {
8922 case V16QImode:
8923 case V8HImode:
8924 case V4SImode:
8925 case V2DImode:
8926 if (TARGET_SSE2)
8927 return 2;
8928 case V32QImode:
8929 case V16HImode:
8930 case V8SImode:
8931 case V4DImode:
8932 if (TARGET_AVX2)
8933 return 2;
8934 case V64QImode:
8935 case V32HImode:
8936 case V16SImode:
8937 case V8DImode:
8938 if (TARGET_AVX512F)
8939 return 2;
8940 default:
8941 break;
8942 }
8943
8944 return 0;
8945 }
8946
8947 /* Return the opcode of the special instruction to be used to load
8948 the constant X. */
8949
8950 const char *
8951 standard_sse_constant_opcode (rtx insn, rtx x)
8952 {
8953 switch (standard_sse_constant_p (x))
8954 {
8955 case 1:
8956 switch (get_attr_mode (insn))
8957 {
8958 case MODE_XI:
8959 case MODE_V16SF:
8960 return "vpxord\t%g0, %g0, %g0";
8961 case MODE_V8DF:
8962 return "vpxorq\t%g0, %g0, %g0";
8963 case MODE_TI:
8964 return "%vpxor\t%0, %d0";
8965 case MODE_V2DF:
8966 return "%vxorpd\t%0, %d0";
8967 case MODE_V4SF:
8968 return "%vxorps\t%0, %d0";
8969
8970 case MODE_OI:
8971 return "vpxor\t%x0, %x0, %x0";
8972 case MODE_V4DF:
8973 return "vxorpd\t%x0, %x0, %x0";
8974 case MODE_V8SF:
8975 return "vxorps\t%x0, %x0, %x0";
8976
8977 default:
8978 break;
8979 }
8980
8981 case 2:
8982 if (get_attr_mode (insn) == MODE_XI
8983 || get_attr_mode (insn) == MODE_V8DF
8984 || get_attr_mode (insn) == MODE_V16SF)
8985 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
8986 if (TARGET_AVX)
8987 return "vpcmpeqd\t%0, %0, %0";
8988 else
8989 return "pcmpeqd\t%0, %0";
8990
8991 default:
8992 break;
8993 }
8994 gcc_unreachable ();
8995 }
8996
8997 /* Returns true if OP contains a symbol reference */
8998
8999 bool
9000 symbolic_reference_mentioned_p (rtx op)
9001 {
9002 const char *fmt;
9003 int i;
9004
9005 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9006 return true;
9007
9008 fmt = GET_RTX_FORMAT (GET_CODE (op));
9009 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9010 {
9011 if (fmt[i] == 'E')
9012 {
9013 int j;
9014
9015 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9016 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9017 return true;
9018 }
9019
9020 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9021 return true;
9022 }
9023
9024 return false;
9025 }
9026
9027 /* Return true if it is appropriate to emit `ret' instructions in the
9028 body of a function. Do this only if the epilogue is simple, needing a
9029 couple of insns. Prior to reloading, we can't tell how many registers
9030 must be saved, so return false then. Return false if there is no frame
9031 marker to de-allocate. */
9032
9033 bool
9034 ix86_can_use_return_insn_p (void)
9035 {
9036 struct ix86_frame frame;
9037
9038 if (! reload_completed || frame_pointer_needed)
9039 return 0;
9040
9041 /* Don't allow more than 32k pop, since that's all we can do
9042 with one instruction. */
9043 if (crtl->args.pops_args && crtl->args.size >= 32768)
9044 return 0;
9045
9046 ix86_compute_frame_layout (&frame);
9047 return (frame.stack_pointer_offset == UNITS_PER_WORD
9048 && (frame.nregs + frame.nsseregs) == 0);
9049 }
9050 \f
9051 /* Value should be nonzero if functions must have frame pointers.
9052 Zero means the frame pointer need not be set up (and parms may
9053 be accessed via the stack pointer) in functions that seem suitable. */
9054
9055 static bool
9056 ix86_frame_pointer_required (void)
9057 {
9058 /* If we accessed previous frames, then the generated code expects
9059 to be able to access the saved ebp value in our frame. */
9060 if (cfun->machine->accesses_prev_frame)
9061 return true;
9062
9063 /* Several x86 os'es need a frame pointer for other reasons,
9064 usually pertaining to setjmp. */
9065 if (SUBTARGET_FRAME_POINTER_REQUIRED)
9066 return true;
9067
9068 /* For older 32-bit runtimes setjmp requires valid frame-pointer. */
9069 if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9070 return true;
9071
9072 /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9073 allocation is 4GB. */
9074 if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9075 return true;
9076
9077 /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9078 turns off the frame pointer by default. Turn it back on now if
9079 we've not got a leaf function. */
9080 if (TARGET_OMIT_LEAF_FRAME_POINTER
9081 && (!crtl->is_leaf
9082 || ix86_current_function_calls_tls_descriptor))
9083 return true;
9084
9085 if (crtl->profile && !flag_fentry)
9086 return true;
9087
9088 return false;
9089 }
9090
9091 /* Record that the current function accesses previous call frames. */
9092
9093 void
9094 ix86_setup_frame_addresses (void)
9095 {
9096 cfun->machine->accesses_prev_frame = 1;
9097 }
9098 \f
9099 #ifndef USE_HIDDEN_LINKONCE
9100 # if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9101 # define USE_HIDDEN_LINKONCE 1
9102 # else
9103 # define USE_HIDDEN_LINKONCE 0
9104 # endif
9105 #endif
9106
9107 static int pic_labels_used;
9108
9109 /* Fills in the label name that should be used for a pc thunk for
9110 the given register. */
9111
9112 static void
9113 get_pc_thunk_name (char name[32], unsigned int regno)
9114 {
9115 gcc_assert (!TARGET_64BIT);
9116
9117 if (USE_HIDDEN_LINKONCE)
9118 sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9119 else
9120 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9121 }
9122
9123
9124 /* This function generates code for -fpic that loads %ebx with
9125 the return address of the caller and then returns. */
9126
9127 static void
9128 ix86_code_end (void)
9129 {
9130 rtx xops[2];
9131 int regno;
9132
9133 for (regno = AX_REG; regno <= SP_REG; regno++)
9134 {
9135 char name[32];
9136 tree decl;
9137
9138 if (!(pic_labels_used & (1 << regno)))
9139 continue;
9140
9141 get_pc_thunk_name (name, regno);
9142
9143 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9144 get_identifier (name),
9145 build_function_type_list (void_type_node, NULL_TREE));
9146 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9147 NULL_TREE, void_type_node);
9148 TREE_PUBLIC (decl) = 1;
9149 TREE_STATIC (decl) = 1;
9150 DECL_IGNORED_P (decl) = 1;
9151
9152 #if TARGET_MACHO
9153 if (TARGET_MACHO)
9154 {
9155 switch_to_section (darwin_sections[text_coal_section]);
9156 fputs ("\t.weak_definition\t", asm_out_file);
9157 assemble_name (asm_out_file, name);
9158 fputs ("\n\t.private_extern\t", asm_out_file);
9159 assemble_name (asm_out_file, name);
9160 putc ('\n', asm_out_file);
9161 ASM_OUTPUT_LABEL (asm_out_file, name);
9162 DECL_WEAK (decl) = 1;
9163 }
9164 else
9165 #endif
9166 if (USE_HIDDEN_LINKONCE)
9167 {
9168 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
9169
9170 targetm.asm_out.unique_section (decl, 0);
9171 switch_to_section (get_named_section (decl, NULL, 0));
9172
9173 targetm.asm_out.globalize_label (asm_out_file, name);
9174 fputs ("\t.hidden\t", asm_out_file);
9175 assemble_name (asm_out_file, name);
9176 putc ('\n', asm_out_file);
9177 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9178 }
9179 else
9180 {
9181 switch_to_section (text_section);
9182 ASM_OUTPUT_LABEL (asm_out_file, name);
9183 }
9184
9185 DECL_INITIAL (decl) = make_node (BLOCK);
9186 current_function_decl = decl;
9187 init_function_start (decl);
9188 first_function_block_is_cold = false;
9189 /* Make sure unwind info is emitted for the thunk if needed. */
9190 final_start_function (emit_barrier (), asm_out_file, 1);
9191
9192 /* Pad stack IP move with 4 instructions (two NOPs count
9193 as one instruction). */
9194 if (TARGET_PAD_SHORT_FUNCTION)
9195 {
9196 int i = 8;
9197
9198 while (i--)
9199 fputs ("\tnop\n", asm_out_file);
9200 }
9201
9202 xops[0] = gen_rtx_REG (Pmode, regno);
9203 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9204 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9205 fputs ("\tret\n", asm_out_file);
9206 final_end_function ();
9207 init_insn_lengths ();
9208 free_after_compilation (cfun);
9209 set_cfun (NULL);
9210 current_function_decl = NULL;
9211 }
9212
9213 if (flag_split_stack)
9214 file_end_indicate_split_stack ();
9215 }
9216
9217 /* Emit code for the SET_GOT patterns. */
9218
9219 const char *
9220 output_set_got (rtx dest, rtx label)
9221 {
9222 rtx xops[3];
9223
9224 xops[0] = dest;
9225
9226 if (TARGET_VXWORKS_RTP && flag_pic)
9227 {
9228 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
9229 xops[2] = gen_rtx_MEM (Pmode,
9230 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9231 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9232
9233 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9234 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9235 an unadorned address. */
9236 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9237 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9238 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9239 return "";
9240 }
9241
9242 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9243
9244 if (!flag_pic)
9245 {
9246 if (TARGET_MACHO)
9247 /* We don't need a pic base, we're not producing pic. */
9248 gcc_unreachable ();
9249
9250 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9251 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9252 targetm.asm_out.internal_label (asm_out_file, "L",
9253 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9254 }
9255 else
9256 {
9257 char name[32];
9258 get_pc_thunk_name (name, REGNO (dest));
9259 pic_labels_used |= 1 << REGNO (dest);
9260
9261 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9262 xops[2] = gen_rtx_MEM (QImode, xops[2]);
9263 output_asm_insn ("call\t%X2", xops);
9264
9265 #if TARGET_MACHO
9266 /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9267 This is what will be referenced by the Mach-O PIC subsystem. */
9268 if (machopic_should_output_picbase_label () || !label)
9269 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9270
9271 /* When we are restoring the pic base at the site of a nonlocal label,
9272 and we decided to emit the pic base above, we will still output a
9273 local label used for calculating the correction offset (even though
9274 the offset will be 0 in that case). */
9275 if (label)
9276 targetm.asm_out.internal_label (asm_out_file, "L",
9277 CODE_LABEL_NUMBER (label));
9278 #endif
9279 }
9280
9281 if (!TARGET_MACHO)
9282 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9283
9284 return "";
9285 }
9286
9287 /* Generate an "push" pattern for input ARG. */
9288
9289 static rtx
9290 gen_push (rtx arg)
9291 {
9292 struct machine_function *m = cfun->machine;
9293
9294 if (m->fs.cfa_reg == stack_pointer_rtx)
9295 m->fs.cfa_offset += UNITS_PER_WORD;
9296 m->fs.sp_offset += UNITS_PER_WORD;
9297
9298 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9299 arg = gen_rtx_REG (word_mode, REGNO (arg));
9300
9301 return gen_rtx_SET (VOIDmode,
9302 gen_rtx_MEM (word_mode,
9303 gen_rtx_PRE_DEC (Pmode,
9304 stack_pointer_rtx)),
9305 arg);
9306 }
9307
9308 /* Generate an "pop" pattern for input ARG. */
9309
9310 static rtx
9311 gen_pop (rtx arg)
9312 {
9313 if (REG_P (arg) && GET_MODE (arg) != word_mode)
9314 arg = gen_rtx_REG (word_mode, REGNO (arg));
9315
9316 return gen_rtx_SET (VOIDmode,
9317 arg,
9318 gen_rtx_MEM (word_mode,
9319 gen_rtx_POST_INC (Pmode,
9320 stack_pointer_rtx)));
9321 }
9322
9323 /* Return >= 0 if there is an unused call-clobbered register available
9324 for the entire function. */
9325
9326 static unsigned int
9327 ix86_select_alt_pic_regnum (void)
9328 {
9329 if (crtl->is_leaf
9330 && !crtl->profile
9331 && !ix86_current_function_calls_tls_descriptor)
9332 {
9333 int i, drap;
9334 /* Can't use the same register for both PIC and DRAP. */
9335 if (crtl->drap_reg)
9336 drap = REGNO (crtl->drap_reg);
9337 else
9338 drap = -1;
9339 for (i = 2; i >= 0; --i)
9340 if (i != drap && !df_regs_ever_live_p (i))
9341 return i;
9342 }
9343
9344 return INVALID_REGNUM;
9345 }
9346
9347 /* Return TRUE if we need to save REGNO. */
9348
9349 static bool
9350 ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9351 {
9352 if (pic_offset_table_rtx
9353 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
9354 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9355 || crtl->profile
9356 || crtl->calls_eh_return
9357 || crtl->uses_const_pool
9358 || cfun->has_nonlocal_label))
9359 return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9360
9361 if (crtl->calls_eh_return && maybe_eh_return)
9362 {
9363 unsigned i;
9364 for (i = 0; ; i++)
9365 {
9366 unsigned test = EH_RETURN_DATA_REGNO (i);
9367 if (test == INVALID_REGNUM)
9368 break;
9369 if (test == regno)
9370 return true;
9371 }
9372 }
9373
9374 if (crtl->drap_reg
9375 && regno == REGNO (crtl->drap_reg)
9376 && !cfun->machine->no_drap_save_restore)
9377 return true;
9378
9379 return (df_regs_ever_live_p (regno)
9380 && !call_used_regs[regno]
9381 && !fixed_regs[regno]
9382 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
9383 }
9384
9385 /* Return number of saved general prupose registers. */
9386
9387 static int
9388 ix86_nsaved_regs (void)
9389 {
9390 int nregs = 0;
9391 int regno;
9392
9393 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9394 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9395 nregs ++;
9396 return nregs;
9397 }
9398
9399 /* Return number of saved SSE registrers. */
9400
9401 static int
9402 ix86_nsaved_sseregs (void)
9403 {
9404 int nregs = 0;
9405 int regno;
9406
9407 if (!TARGET_64BIT_MS_ABI)
9408 return 0;
9409 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9410 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9411 nregs ++;
9412 return nregs;
9413 }
9414
9415 /* Given FROM and TO register numbers, say whether this elimination is
9416 allowed. If stack alignment is needed, we can only replace argument
9417 pointer with hard frame pointer, or replace frame pointer with stack
9418 pointer. Otherwise, frame pointer elimination is automatically
9419 handled and all other eliminations are valid. */
9420
9421 static bool
9422 ix86_can_eliminate (const int from, const int to)
9423 {
9424 if (stack_realign_fp)
9425 return ((from == ARG_POINTER_REGNUM
9426 && to == HARD_FRAME_POINTER_REGNUM)
9427 || (from == FRAME_POINTER_REGNUM
9428 && to == STACK_POINTER_REGNUM));
9429 else
9430 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
9431 }
9432
9433 /* Return the offset between two registers, one to be eliminated, and the other
9434 its replacement, at the start of a routine. */
9435
9436 HOST_WIDE_INT
9437 ix86_initial_elimination_offset (int from, int to)
9438 {
9439 struct ix86_frame frame;
9440 ix86_compute_frame_layout (&frame);
9441
9442 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9443 return frame.hard_frame_pointer_offset;
9444 else if (from == FRAME_POINTER_REGNUM
9445 && to == HARD_FRAME_POINTER_REGNUM)
9446 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
9447 else
9448 {
9449 gcc_assert (to == STACK_POINTER_REGNUM);
9450
9451 if (from == ARG_POINTER_REGNUM)
9452 return frame.stack_pointer_offset;
9453
9454 gcc_assert (from == FRAME_POINTER_REGNUM);
9455 return frame.stack_pointer_offset - frame.frame_pointer_offset;
9456 }
9457 }
9458
9459 /* In a dynamically-aligned function, we can't know the offset from
9460 stack pointer to frame pointer, so we must ensure that setjmp
9461 eliminates fp against the hard fp (%ebp) rather than trying to
9462 index from %esp up to the top of the frame across a gap that is
9463 of unknown (at compile-time) size. */
9464 static rtx
9465 ix86_builtin_setjmp_frame_value (void)
9466 {
9467 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
9468 }
9469
9470 /* When using -fsplit-stack, the allocation routines set a field in
9471 the TCB to the bottom of the stack plus this much space, measured
9472 in bytes. */
9473
9474 #define SPLIT_STACK_AVAILABLE 256
9475
9476 /* Fill structure ix86_frame about frame of currently computed function. */
9477
9478 static void
9479 ix86_compute_frame_layout (struct ix86_frame *frame)
9480 {
9481 unsigned HOST_WIDE_INT stack_alignment_needed;
9482 HOST_WIDE_INT offset;
9483 unsigned HOST_WIDE_INT preferred_alignment;
9484 HOST_WIDE_INT size = get_frame_size ();
9485 HOST_WIDE_INT to_allocate;
9486
9487 frame->nregs = ix86_nsaved_regs ();
9488 frame->nsseregs = ix86_nsaved_sseregs ();
9489
9490 /* 64-bit MS ABI seem to require stack alignment to be always 16 except for
9491 function prologues and leaf. */
9492 if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
9493 && (!crtl->is_leaf || cfun->calls_alloca != 0
9494 || ix86_current_function_calls_tls_descriptor))
9495 {
9496 crtl->preferred_stack_boundary = 128;
9497 crtl->stack_alignment_needed = 128;
9498 }
9499 /* preferred_stack_boundary is never updated for call
9500 expanded from tls descriptor. Update it here. We don't update it in
9501 expand stage because according to the comments before
9502 ix86_current_function_calls_tls_descriptor, tls calls may be optimized
9503 away. */
9504 else if (ix86_current_function_calls_tls_descriptor
9505 && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
9506 {
9507 crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
9508 if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY)
9509 crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY;
9510 }
9511
9512 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
9513 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
9514
9515 gcc_assert (!size || stack_alignment_needed);
9516 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
9517 gcc_assert (preferred_alignment <= stack_alignment_needed);
9518
9519 /* For SEH we have to limit the amount of code movement into the prologue.
9520 At present we do this via a BLOCKAGE, at which point there's very little
9521 scheduling that can be done, which means that there's very little point
9522 in doing anything except PUSHs. */
9523 if (TARGET_SEH)
9524 cfun->machine->use_fast_prologue_epilogue = false;
9525
9526 /* During reload iteration the amount of registers saved can change.
9527 Recompute the value as needed. Do not recompute when amount of registers
9528 didn't change as reload does multiple calls to the function and does not
9529 expect the decision to change within single iteration. */
9530 else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
9531 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
9532 {
9533 int count = frame->nregs;
9534 struct cgraph_node *node = cgraph_get_node (current_function_decl);
9535
9536 cfun->machine->use_fast_prologue_epilogue_nregs = count;
9537
9538 /* The fast prologue uses move instead of push to save registers. This
9539 is significantly longer, but also executes faster as modern hardware
9540 can execute the moves in parallel, but can't do that for push/pop.
9541
9542 Be careful about choosing what prologue to emit: When function takes
9543 many instructions to execute we may use slow version as well as in
9544 case function is known to be outside hot spot (this is known with
9545 feedback only). Weight the size of function by number of registers
9546 to save as it is cheap to use one or two push instructions but very
9547 slow to use many of them. */
9548 if (count)
9549 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
9550 if (node->frequency < NODE_FREQUENCY_NORMAL
9551 || (flag_branch_probabilities
9552 && node->frequency < NODE_FREQUENCY_HOT))
9553 cfun->machine->use_fast_prologue_epilogue = false;
9554 else
9555 cfun->machine->use_fast_prologue_epilogue
9556 = !expensive_function_p (count);
9557 }
9558
9559 frame->save_regs_using_mov
9560 = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
9561 /* If static stack checking is enabled and done with probes,
9562 the registers need to be saved before allocating the frame. */
9563 && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
9564
9565 /* Skip return address. */
9566 offset = UNITS_PER_WORD;
9567
9568 /* Skip pushed static chain. */
9569 if (ix86_static_chain_on_stack)
9570 offset += UNITS_PER_WORD;
9571
9572 /* Skip saved base pointer. */
9573 if (frame_pointer_needed)
9574 offset += UNITS_PER_WORD;
9575 frame->hfp_save_offset = offset;
9576
9577 /* The traditional frame pointer location is at the top of the frame. */
9578 frame->hard_frame_pointer_offset = offset;
9579
9580 /* Register save area */
9581 offset += frame->nregs * UNITS_PER_WORD;
9582 frame->reg_save_offset = offset;
9583
9584 /* On SEH target, registers are pushed just before the frame pointer
9585 location. */
9586 if (TARGET_SEH)
9587 frame->hard_frame_pointer_offset = offset;
9588
9589 /* Align and set SSE register save area. */
9590 if (frame->nsseregs)
9591 {
9592 /* The only ABI that has saved SSE registers (Win64) also has a
9593 16-byte aligned default stack, and thus we don't need to be
9594 within the re-aligned local stack frame to save them. */
9595 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
9596 offset = (offset + 16 - 1) & -16;
9597 offset += frame->nsseregs * 16;
9598 }
9599 frame->sse_reg_save_offset = offset;
9600
9601 /* The re-aligned stack starts here. Values before this point are not
9602 directly comparable with values below this point. In order to make
9603 sure that no value happens to be the same before and after, force
9604 the alignment computation below to add a non-zero value. */
9605 if (stack_realign_fp)
9606 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
9607
9608 /* Va-arg area */
9609 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
9610 offset += frame->va_arg_size;
9611
9612 /* Align start of frame for local function. */
9613 if (stack_realign_fp
9614 || offset != frame->sse_reg_save_offset
9615 || size != 0
9616 || !crtl->is_leaf
9617 || cfun->calls_alloca
9618 || ix86_current_function_calls_tls_descriptor)
9619 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
9620
9621 /* Frame pointer points here. */
9622 frame->frame_pointer_offset = offset;
9623
9624 offset += size;
9625
9626 /* Add outgoing arguments area. Can be skipped if we eliminated
9627 all the function calls as dead code.
9628 Skipping is however impossible when function calls alloca. Alloca
9629 expander assumes that last crtl->outgoing_args_size
9630 of stack frame are unused. */
9631 if (ACCUMULATE_OUTGOING_ARGS
9632 && (!crtl->is_leaf || cfun->calls_alloca
9633 || ix86_current_function_calls_tls_descriptor))
9634 {
9635 offset += crtl->outgoing_args_size;
9636 frame->outgoing_arguments_size = crtl->outgoing_args_size;
9637 }
9638 else
9639 frame->outgoing_arguments_size = 0;
9640
9641 /* Align stack boundary. Only needed if we're calling another function
9642 or using alloca. */
9643 if (!crtl->is_leaf || cfun->calls_alloca
9644 || ix86_current_function_calls_tls_descriptor)
9645 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
9646
9647 /* We've reached end of stack frame. */
9648 frame->stack_pointer_offset = offset;
9649
9650 /* Size prologue needs to allocate. */
9651 to_allocate = offset - frame->sse_reg_save_offset;
9652
9653 if ((!to_allocate && frame->nregs <= 1)
9654 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
9655 frame->save_regs_using_mov = false;
9656
9657 if (ix86_using_red_zone ()
9658 && crtl->sp_is_unchanging
9659 && crtl->is_leaf
9660 && !ix86_current_function_calls_tls_descriptor)
9661 {
9662 frame->red_zone_size = to_allocate;
9663 if (frame->save_regs_using_mov)
9664 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
9665 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
9666 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
9667 }
9668 else
9669 frame->red_zone_size = 0;
9670 frame->stack_pointer_offset -= frame->red_zone_size;
9671
9672 /* The SEH frame pointer location is near the bottom of the frame.
9673 This is enforced by the fact that the difference between the
9674 stack pointer and the frame pointer is limited to 240 bytes in
9675 the unwind data structure. */
9676 if (TARGET_SEH)
9677 {
9678 HOST_WIDE_INT diff;
9679
9680 /* If we can leave the frame pointer where it is, do so. Also, returns
9681 the establisher frame for __builtin_frame_address (0). */
9682 diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
9683 if (diff <= SEH_MAX_FRAME_SIZE
9684 && (diff > 240 || (diff & 15) != 0)
9685 && !crtl->accesses_prior_frames)
9686 {
9687 /* Ideally we'd determine what portion of the local stack frame
9688 (within the constraint of the lowest 240) is most heavily used.
9689 But without that complication, simply bias the frame pointer
9690 by 128 bytes so as to maximize the amount of the local stack
9691 frame that is addressable with 8-bit offsets. */
9692 frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
9693 }
9694 }
9695 }
9696
9697 /* This is semi-inlined memory_address_length, but simplified
9698 since we know that we're always dealing with reg+offset, and
9699 to avoid having to create and discard all that rtl. */
9700
9701 static inline int
9702 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
9703 {
9704 int len = 4;
9705
9706 if (offset == 0)
9707 {
9708 /* EBP and R13 cannot be encoded without an offset. */
9709 len = (regno == BP_REG || regno == R13_REG);
9710 }
9711 else if (IN_RANGE (offset, -128, 127))
9712 len = 1;
9713
9714 /* ESP and R12 must be encoded with a SIB byte. */
9715 if (regno == SP_REG || regno == R12_REG)
9716 len++;
9717
9718 return len;
9719 }
9720
9721 /* Return an RTX that points to CFA_OFFSET within the stack frame.
9722 The valid base registers are taken from CFUN->MACHINE->FS. */
9723
9724 static rtx
9725 choose_baseaddr (HOST_WIDE_INT cfa_offset)
9726 {
9727 const struct machine_function *m = cfun->machine;
9728 rtx base_reg = NULL;
9729 HOST_WIDE_INT base_offset = 0;
9730
9731 if (m->use_fast_prologue_epilogue)
9732 {
9733 /* Choose the base register most likely to allow the most scheduling
9734 opportunities. Generally FP is valid throughout the function,
9735 while DRAP must be reloaded within the epilogue. But choose either
9736 over the SP due to increased encoding size. */
9737
9738 if (m->fs.fp_valid)
9739 {
9740 base_reg = hard_frame_pointer_rtx;
9741 base_offset = m->fs.fp_offset - cfa_offset;
9742 }
9743 else if (m->fs.drap_valid)
9744 {
9745 base_reg = crtl->drap_reg;
9746 base_offset = 0 - cfa_offset;
9747 }
9748 else if (m->fs.sp_valid)
9749 {
9750 base_reg = stack_pointer_rtx;
9751 base_offset = m->fs.sp_offset - cfa_offset;
9752 }
9753 }
9754 else
9755 {
9756 HOST_WIDE_INT toffset;
9757 int len = 16, tlen;
9758
9759 /* Choose the base register with the smallest address encoding.
9760 With a tie, choose FP > DRAP > SP. */
9761 if (m->fs.sp_valid)
9762 {
9763 base_reg = stack_pointer_rtx;
9764 base_offset = m->fs.sp_offset - cfa_offset;
9765 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
9766 }
9767 if (m->fs.drap_valid)
9768 {
9769 toffset = 0 - cfa_offset;
9770 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
9771 if (tlen <= len)
9772 {
9773 base_reg = crtl->drap_reg;
9774 base_offset = toffset;
9775 len = tlen;
9776 }
9777 }
9778 if (m->fs.fp_valid)
9779 {
9780 toffset = m->fs.fp_offset - cfa_offset;
9781 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
9782 if (tlen <= len)
9783 {
9784 base_reg = hard_frame_pointer_rtx;
9785 base_offset = toffset;
9786 len = tlen;
9787 }
9788 }
9789 }
9790 gcc_assert (base_reg != NULL);
9791
9792 return plus_constant (Pmode, base_reg, base_offset);
9793 }
9794
9795 /* Emit code to save registers in the prologue. */
9796
9797 static void
9798 ix86_emit_save_regs (void)
9799 {
9800 unsigned int regno;
9801 rtx insn;
9802
9803 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
9804 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9805 {
9806 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
9807 RTX_FRAME_RELATED_P (insn) = 1;
9808 }
9809 }
9810
9811 /* Emit a single register save at CFA - CFA_OFFSET. */
9812
9813 static void
9814 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
9815 HOST_WIDE_INT cfa_offset)
9816 {
9817 struct machine_function *m = cfun->machine;
9818 rtx reg = gen_rtx_REG (mode, regno);
9819 rtx mem, addr, base, insn;
9820
9821 addr = choose_baseaddr (cfa_offset);
9822 mem = gen_frame_mem (mode, addr);
9823
9824 /* For SSE saves, we need to indicate the 128-bit alignment. */
9825 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
9826
9827 insn = emit_move_insn (mem, reg);
9828 RTX_FRAME_RELATED_P (insn) = 1;
9829
9830 base = addr;
9831 if (GET_CODE (base) == PLUS)
9832 base = XEXP (base, 0);
9833 gcc_checking_assert (REG_P (base));
9834
9835 /* When saving registers into a re-aligned local stack frame, avoid
9836 any tricky guessing by dwarf2out. */
9837 if (m->fs.realigned)
9838 {
9839 gcc_checking_assert (stack_realign_drap);
9840
9841 if (regno == REGNO (crtl->drap_reg))
9842 {
9843 /* A bit of a hack. We force the DRAP register to be saved in
9844 the re-aligned stack frame, which provides us with a copy
9845 of the CFA that will last past the prologue. Install it. */
9846 gcc_checking_assert (cfun->machine->fs.fp_valid);
9847 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9848 cfun->machine->fs.fp_offset - cfa_offset);
9849 mem = gen_rtx_MEM (mode, addr);
9850 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
9851 }
9852 else
9853 {
9854 /* The frame pointer is a stable reference within the
9855 aligned frame. Use it. */
9856 gcc_checking_assert (cfun->machine->fs.fp_valid);
9857 addr = plus_constant (Pmode, hard_frame_pointer_rtx,
9858 cfun->machine->fs.fp_offset - cfa_offset);
9859 mem = gen_rtx_MEM (mode, addr);
9860 add_reg_note (insn, REG_CFA_EXPRESSION,
9861 gen_rtx_SET (VOIDmode, mem, reg));
9862 }
9863 }
9864
9865 /* The memory may not be relative to the current CFA register,
9866 which means that we may need to generate a new pattern for
9867 use by the unwind info. */
9868 else if (base != m->fs.cfa_reg)
9869 {
9870 addr = plus_constant (Pmode, m->fs.cfa_reg,
9871 m->fs.cfa_offset - cfa_offset);
9872 mem = gen_rtx_MEM (mode, addr);
9873 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
9874 }
9875 }
9876
9877 /* Emit code to save registers using MOV insns.
9878 First register is stored at CFA - CFA_OFFSET. */
9879 static void
9880 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
9881 {
9882 unsigned int regno;
9883
9884 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9885 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9886 {
9887 ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
9888 cfa_offset -= UNITS_PER_WORD;
9889 }
9890 }
9891
9892 /* Emit code to save SSE registers using MOV insns.
9893 First register is stored at CFA - CFA_OFFSET. */
9894 static void
9895 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
9896 {
9897 unsigned int regno;
9898
9899 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9900 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
9901 {
9902 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
9903 cfa_offset -= 16;
9904 }
9905 }
9906
9907 static GTY(()) rtx queued_cfa_restores;
9908
9909 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
9910 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
9911 Don't add the note if the previously saved value will be left untouched
9912 within stack red-zone till return, as unwinders can find the same value
9913 in the register and on the stack. */
9914
9915 static void
9916 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
9917 {
9918 if (!crtl->shrink_wrapped
9919 && cfa_offset <= cfun->machine->fs.red_zone_offset)
9920 return;
9921
9922 if (insn)
9923 {
9924 add_reg_note (insn, REG_CFA_RESTORE, reg);
9925 RTX_FRAME_RELATED_P (insn) = 1;
9926 }
9927 else
9928 queued_cfa_restores
9929 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
9930 }
9931
9932 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
9933
9934 static void
9935 ix86_add_queued_cfa_restore_notes (rtx insn)
9936 {
9937 rtx last;
9938 if (!queued_cfa_restores)
9939 return;
9940 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
9941 ;
9942 XEXP (last, 1) = REG_NOTES (insn);
9943 REG_NOTES (insn) = queued_cfa_restores;
9944 queued_cfa_restores = NULL_RTX;
9945 RTX_FRAME_RELATED_P (insn) = 1;
9946 }
9947
9948 /* Expand prologue or epilogue stack adjustment.
9949 The pattern exist to put a dependency on all ebp-based memory accesses.
9950 STYLE should be negative if instructions should be marked as frame related,
9951 zero if %r11 register is live and cannot be freely used and positive
9952 otherwise. */
9953
9954 static void
9955 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
9956 int style, bool set_cfa)
9957 {
9958 struct machine_function *m = cfun->machine;
9959 rtx insn;
9960 bool add_frame_related_expr = false;
9961
9962 if (Pmode == SImode)
9963 insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
9964 else if (x86_64_immediate_operand (offset, DImode))
9965 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
9966 else
9967 {
9968 rtx tmp;
9969 /* r11 is used by indirect sibcall return as well, set before the
9970 epilogue and used after the epilogue. */
9971 if (style)
9972 tmp = gen_rtx_REG (DImode, R11_REG);
9973 else
9974 {
9975 gcc_assert (src != hard_frame_pointer_rtx
9976 && dest != hard_frame_pointer_rtx);
9977 tmp = hard_frame_pointer_rtx;
9978 }
9979 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
9980 if (style < 0)
9981 add_frame_related_expr = true;
9982
9983 insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
9984 }
9985
9986 insn = emit_insn (insn);
9987 if (style >= 0)
9988 ix86_add_queued_cfa_restore_notes (insn);
9989
9990 if (set_cfa)
9991 {
9992 rtx r;
9993
9994 gcc_assert (m->fs.cfa_reg == src);
9995 m->fs.cfa_offset += INTVAL (offset);
9996 m->fs.cfa_reg = dest;
9997
9998 r = gen_rtx_PLUS (Pmode, src, offset);
9999 r = gen_rtx_SET (VOIDmode, dest, r);
10000 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10001 RTX_FRAME_RELATED_P (insn) = 1;
10002 }
10003 else if (style < 0)
10004 {
10005 RTX_FRAME_RELATED_P (insn) = 1;
10006 if (add_frame_related_expr)
10007 {
10008 rtx r = gen_rtx_PLUS (Pmode, src, offset);
10009 r = gen_rtx_SET (VOIDmode, dest, r);
10010 add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10011 }
10012 }
10013
10014 if (dest == stack_pointer_rtx)
10015 {
10016 HOST_WIDE_INT ooffset = m->fs.sp_offset;
10017 bool valid = m->fs.sp_valid;
10018
10019 if (src == hard_frame_pointer_rtx)
10020 {
10021 valid = m->fs.fp_valid;
10022 ooffset = m->fs.fp_offset;
10023 }
10024 else if (src == crtl->drap_reg)
10025 {
10026 valid = m->fs.drap_valid;
10027 ooffset = 0;
10028 }
10029 else
10030 {
10031 /* Else there are two possibilities: SP itself, which we set
10032 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
10033 taken care of this by hand along the eh_return path. */
10034 gcc_checking_assert (src == stack_pointer_rtx
10035 || offset == const0_rtx);
10036 }
10037
10038 m->fs.sp_offset = ooffset - INTVAL (offset);
10039 m->fs.sp_valid = valid;
10040 }
10041 }
10042
10043 /* Find an available register to be used as dynamic realign argument
10044 pointer regsiter. Such a register will be written in prologue and
10045 used in begin of body, so it must not be
10046 1. parameter passing register.
10047 2. GOT pointer.
10048 We reuse static-chain register if it is available. Otherwise, we
10049 use DI for i386 and R13 for x86-64. We chose R13 since it has
10050 shorter encoding.
10051
10052 Return: the regno of chosen register. */
10053
10054 static unsigned int
10055 find_drap_reg (void)
10056 {
10057 tree decl = cfun->decl;
10058
10059 if (TARGET_64BIT)
10060 {
10061 /* Use R13 for nested function or function need static chain.
10062 Since function with tail call may use any caller-saved
10063 registers in epilogue, DRAP must not use caller-saved
10064 register in such case. */
10065 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10066 return R13_REG;
10067
10068 return R10_REG;
10069 }
10070 else
10071 {
10072 /* Use DI for nested function or function need static chain.
10073 Since function with tail call may use any caller-saved
10074 registers in epilogue, DRAP must not use caller-saved
10075 register in such case. */
10076 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10077 return DI_REG;
10078
10079 /* Reuse static chain register if it isn't used for parameter
10080 passing. */
10081 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10082 {
10083 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10084 if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10085 return CX_REG;
10086 }
10087 return DI_REG;
10088 }
10089 }
10090
10091 /* Return minimum incoming stack alignment. */
10092
10093 static unsigned int
10094 ix86_minimum_incoming_stack_boundary (bool sibcall)
10095 {
10096 unsigned int incoming_stack_boundary;
10097
10098 /* Prefer the one specified at command line. */
10099 if (ix86_user_incoming_stack_boundary)
10100 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10101 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10102 if -mstackrealign is used, it isn't used for sibcall check and
10103 estimated stack alignment is 128bit. */
10104 else if (!sibcall
10105 && !TARGET_64BIT
10106 && ix86_force_align_arg_pointer
10107 && crtl->stack_alignment_estimated == 128)
10108 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10109 else
10110 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10111
10112 /* Incoming stack alignment can be changed on individual functions
10113 via force_align_arg_pointer attribute. We use the smallest
10114 incoming stack boundary. */
10115 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10116 && lookup_attribute (ix86_force_align_arg_pointer_string,
10117 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10118 incoming_stack_boundary = MIN_STACK_BOUNDARY;
10119
10120 /* The incoming stack frame has to be aligned at least at
10121 parm_stack_boundary. */
10122 if (incoming_stack_boundary < crtl->parm_stack_boundary)
10123 incoming_stack_boundary = crtl->parm_stack_boundary;
10124
10125 /* Stack at entrance of main is aligned by runtime. We use the
10126 smallest incoming stack boundary. */
10127 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10128 && DECL_NAME (current_function_decl)
10129 && MAIN_NAME_P (DECL_NAME (current_function_decl))
10130 && DECL_FILE_SCOPE_P (current_function_decl))
10131 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10132
10133 return incoming_stack_boundary;
10134 }
10135
10136 /* Update incoming stack boundary and estimated stack alignment. */
10137
10138 static void
10139 ix86_update_stack_boundary (void)
10140 {
10141 ix86_incoming_stack_boundary
10142 = ix86_minimum_incoming_stack_boundary (false);
10143
10144 /* x86_64 vararg needs 16byte stack alignment for register save
10145 area. */
10146 if (TARGET_64BIT
10147 && cfun->stdarg
10148 && crtl->stack_alignment_estimated < 128)
10149 crtl->stack_alignment_estimated = 128;
10150 }
10151
10152 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
10153 needed or an rtx for DRAP otherwise. */
10154
10155 static rtx
10156 ix86_get_drap_rtx (void)
10157 {
10158 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10159 crtl->need_drap = true;
10160
10161 if (stack_realign_drap)
10162 {
10163 /* Assign DRAP to vDRAP and returns vDRAP */
10164 unsigned int regno = find_drap_reg ();
10165 rtx drap_vreg;
10166 rtx arg_ptr;
10167 rtx seq, insn;
10168
10169 arg_ptr = gen_rtx_REG (Pmode, regno);
10170 crtl->drap_reg = arg_ptr;
10171
10172 start_sequence ();
10173 drap_vreg = copy_to_reg (arg_ptr);
10174 seq = get_insns ();
10175 end_sequence ();
10176
10177 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10178 if (!optimize)
10179 {
10180 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10181 RTX_FRAME_RELATED_P (insn) = 1;
10182 }
10183 return drap_vreg;
10184 }
10185 else
10186 return NULL;
10187 }
10188
10189 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
10190
10191 static rtx
10192 ix86_internal_arg_pointer (void)
10193 {
10194 return virtual_incoming_args_rtx;
10195 }
10196
10197 struct scratch_reg {
10198 rtx reg;
10199 bool saved;
10200 };
10201
10202 /* Return a short-lived scratch register for use on function entry.
10203 In 32-bit mode, it is valid only after the registers are saved
10204 in the prologue. This register must be released by means of
10205 release_scratch_register_on_entry once it is dead. */
10206
10207 static void
10208 get_scratch_register_on_entry (struct scratch_reg *sr)
10209 {
10210 int regno;
10211
10212 sr->saved = false;
10213
10214 if (TARGET_64BIT)
10215 {
10216 /* We always use R11 in 64-bit mode. */
10217 regno = R11_REG;
10218 }
10219 else
10220 {
10221 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10222 bool fastcall_p
10223 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10224 bool thiscall_p
10225 = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10226 bool static_chain_p = DECL_STATIC_CHAIN (decl);
10227 int regparm = ix86_function_regparm (fntype, decl);
10228 int drap_regno
10229 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10230
10231 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10232 for the static chain register. */
10233 if ((regparm < 1 || (fastcall_p && !static_chain_p))
10234 && drap_regno != AX_REG)
10235 regno = AX_REG;
10236 /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10237 for the static chain register. */
10238 else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10239 regno = AX_REG;
10240 else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10241 regno = DX_REG;
10242 /* ecx is the static chain register. */
10243 else if (regparm < 3 && !fastcall_p && !thiscall_p
10244 && !static_chain_p
10245 && drap_regno != CX_REG)
10246 regno = CX_REG;
10247 else if (ix86_save_reg (BX_REG, true))
10248 regno = BX_REG;
10249 /* esi is the static chain register. */
10250 else if (!(regparm == 3 && static_chain_p)
10251 && ix86_save_reg (SI_REG, true))
10252 regno = SI_REG;
10253 else if (ix86_save_reg (DI_REG, true))
10254 regno = DI_REG;
10255 else
10256 {
10257 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10258 sr->saved = true;
10259 }
10260 }
10261
10262 sr->reg = gen_rtx_REG (Pmode, regno);
10263 if (sr->saved)
10264 {
10265 rtx insn = emit_insn (gen_push (sr->reg));
10266 RTX_FRAME_RELATED_P (insn) = 1;
10267 }
10268 }
10269
10270 /* Release a scratch register obtained from the preceding function. */
10271
10272 static void
10273 release_scratch_register_on_entry (struct scratch_reg *sr)
10274 {
10275 if (sr->saved)
10276 {
10277 struct machine_function *m = cfun->machine;
10278 rtx x, insn = emit_insn (gen_pop (sr->reg));
10279
10280 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
10281 RTX_FRAME_RELATED_P (insn) = 1;
10282 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10283 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10284 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10285 m->fs.sp_offset -= UNITS_PER_WORD;
10286 }
10287 }
10288
10289 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10290
10291 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
10292
10293 static void
10294 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10295 {
10296 /* We skip the probe for the first interval + a small dope of 4 words and
10297 probe that many bytes past the specified size to maintain a protection
10298 area at the botton of the stack. */
10299 const int dope = 4 * UNITS_PER_WORD;
10300 rtx size_rtx = GEN_INT (size), last;
10301
10302 /* See if we have a constant small number of probes to generate. If so,
10303 that's the easy case. The run-time loop is made up of 11 insns in the
10304 generic case while the compile-time loop is made up of 3+2*(n-1) insns
10305 for n # of intervals. */
10306 if (size <= 5 * PROBE_INTERVAL)
10307 {
10308 HOST_WIDE_INT i, adjust;
10309 bool first_probe = true;
10310
10311 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10312 values of N from 1 until it exceeds SIZE. If only one probe is
10313 needed, this will not generate any code. Then adjust and probe
10314 to PROBE_INTERVAL + SIZE. */
10315 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10316 {
10317 if (first_probe)
10318 {
10319 adjust = 2 * PROBE_INTERVAL + dope;
10320 first_probe = false;
10321 }
10322 else
10323 adjust = PROBE_INTERVAL;
10324
10325 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10326 plus_constant (Pmode, stack_pointer_rtx,
10327 -adjust)));
10328 emit_stack_probe (stack_pointer_rtx);
10329 }
10330
10331 if (first_probe)
10332 adjust = size + PROBE_INTERVAL + dope;
10333 else
10334 adjust = size + PROBE_INTERVAL - i;
10335
10336 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10337 plus_constant (Pmode, stack_pointer_rtx,
10338 -adjust)));
10339 emit_stack_probe (stack_pointer_rtx);
10340
10341 /* Adjust back to account for the additional first interval. */
10342 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10343 plus_constant (Pmode, stack_pointer_rtx,
10344 PROBE_INTERVAL + dope)));
10345 }
10346
10347 /* Otherwise, do the same as above, but in a loop. Note that we must be
10348 extra careful with variables wrapping around because we might be at
10349 the very top (or the very bottom) of the address space and we have
10350 to be able to handle this case properly; in particular, we use an
10351 equality test for the loop condition. */
10352 else
10353 {
10354 HOST_WIDE_INT rounded_size;
10355 struct scratch_reg sr;
10356
10357 get_scratch_register_on_entry (&sr);
10358
10359
10360 /* Step 1: round SIZE to the previous multiple of the interval. */
10361
10362 rounded_size = size & -PROBE_INTERVAL;
10363
10364
10365 /* Step 2: compute initial and final value of the loop counter. */
10366
10367 /* SP = SP_0 + PROBE_INTERVAL. */
10368 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10369 plus_constant (Pmode, stack_pointer_rtx,
10370 - (PROBE_INTERVAL + dope))));
10371
10372 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
10373 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
10374 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
10375 gen_rtx_PLUS (Pmode, sr.reg,
10376 stack_pointer_rtx)));
10377
10378
10379 /* Step 3: the loop
10380
10381 while (SP != LAST_ADDR)
10382 {
10383 SP = SP + PROBE_INTERVAL
10384 probe at SP
10385 }
10386
10387 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
10388 values of N from 1 until it is equal to ROUNDED_SIZE. */
10389
10390 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
10391
10392
10393 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
10394 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
10395
10396 if (size != rounded_size)
10397 {
10398 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10399 plus_constant (Pmode, stack_pointer_rtx,
10400 rounded_size - size)));
10401 emit_stack_probe (stack_pointer_rtx);
10402 }
10403
10404 /* Adjust back to account for the additional first interval. */
10405 last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10406 plus_constant (Pmode, stack_pointer_rtx,
10407 PROBE_INTERVAL + dope)));
10408
10409 release_scratch_register_on_entry (&sr);
10410 }
10411
10412 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
10413
10414 /* Even if the stack pointer isn't the CFA register, we need to correctly
10415 describe the adjustments made to it, in particular differentiate the
10416 frame-related ones from the frame-unrelated ones. */
10417 if (size > 0)
10418 {
10419 rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
10420 XVECEXP (expr, 0, 0)
10421 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10422 plus_constant (Pmode, stack_pointer_rtx, -size));
10423 XVECEXP (expr, 0, 1)
10424 = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10425 plus_constant (Pmode, stack_pointer_rtx,
10426 PROBE_INTERVAL + dope + size));
10427 add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
10428 RTX_FRAME_RELATED_P (last) = 1;
10429
10430 cfun->machine->fs.sp_offset += size;
10431 }
10432
10433 /* Make sure nothing is scheduled before we are done. */
10434 emit_insn (gen_blockage ());
10435 }
10436
10437 /* Adjust the stack pointer up to REG while probing it. */
10438
10439 const char *
10440 output_adjust_stack_and_probe (rtx reg)
10441 {
10442 static int labelno = 0;
10443 char loop_lab[32], end_lab[32];
10444 rtx xops[2];
10445
10446 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10447 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10448
10449 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10450
10451 /* Jump to END_LAB if SP == LAST_ADDR. */
10452 xops[0] = stack_pointer_rtx;
10453 xops[1] = reg;
10454 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10455 fputs ("\tje\t", asm_out_file);
10456 assemble_name_raw (asm_out_file, end_lab);
10457 fputc ('\n', asm_out_file);
10458
10459 /* SP = SP + PROBE_INTERVAL. */
10460 xops[1] = GEN_INT (PROBE_INTERVAL);
10461 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10462
10463 /* Probe at SP. */
10464 xops[1] = const0_rtx;
10465 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
10466
10467 fprintf (asm_out_file, "\tjmp\t");
10468 assemble_name_raw (asm_out_file, loop_lab);
10469 fputc ('\n', asm_out_file);
10470
10471 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10472
10473 return "";
10474 }
10475
10476 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
10477 inclusive. These are offsets from the current stack pointer. */
10478
10479 static void
10480 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
10481 {
10482 /* See if we have a constant small number of probes to generate. If so,
10483 that's the easy case. The run-time loop is made up of 7 insns in the
10484 generic case while the compile-time loop is made up of n insns for n #
10485 of intervals. */
10486 if (size <= 7 * PROBE_INTERVAL)
10487 {
10488 HOST_WIDE_INT i;
10489
10490 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
10491 it exceeds SIZE. If only one probe is needed, this will not
10492 generate any code. Then probe at FIRST + SIZE. */
10493 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10494 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10495 -(first + i)));
10496
10497 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
10498 -(first + size)));
10499 }
10500
10501 /* Otherwise, do the same as above, but in a loop. Note that we must be
10502 extra careful with variables wrapping around because we might be at
10503 the very top (or the very bottom) of the address space and we have
10504 to be able to handle this case properly; in particular, we use an
10505 equality test for the loop condition. */
10506 else
10507 {
10508 HOST_WIDE_INT rounded_size, last;
10509 struct scratch_reg sr;
10510
10511 get_scratch_register_on_entry (&sr);
10512
10513
10514 /* Step 1: round SIZE to the previous multiple of the interval. */
10515
10516 rounded_size = size & -PROBE_INTERVAL;
10517
10518
10519 /* Step 2: compute initial and final value of the loop counter. */
10520
10521 /* TEST_OFFSET = FIRST. */
10522 emit_move_insn (sr.reg, GEN_INT (-first));
10523
10524 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
10525 last = first + rounded_size;
10526
10527
10528 /* Step 3: the loop
10529
10530 while (TEST_ADDR != LAST_ADDR)
10531 {
10532 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
10533 probe at TEST_ADDR
10534 }
10535
10536 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
10537 until it is equal to ROUNDED_SIZE. */
10538
10539 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
10540
10541
10542 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
10543 that SIZE is equal to ROUNDED_SIZE. */
10544
10545 if (size != rounded_size)
10546 emit_stack_probe (plus_constant (Pmode,
10547 gen_rtx_PLUS (Pmode,
10548 stack_pointer_rtx,
10549 sr.reg),
10550 rounded_size - size));
10551
10552 release_scratch_register_on_entry (&sr);
10553 }
10554
10555 /* Make sure nothing is scheduled before we are done. */
10556 emit_insn (gen_blockage ());
10557 }
10558
10559 /* Probe a range of stack addresses from REG to END, inclusive. These are
10560 offsets from the current stack pointer. */
10561
10562 const char *
10563 output_probe_stack_range (rtx reg, rtx end)
10564 {
10565 static int labelno = 0;
10566 char loop_lab[32], end_lab[32];
10567 rtx xops[3];
10568
10569 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
10570 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
10571
10572 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
10573
10574 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
10575 xops[0] = reg;
10576 xops[1] = end;
10577 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
10578 fputs ("\tje\t", asm_out_file);
10579 assemble_name_raw (asm_out_file, end_lab);
10580 fputc ('\n', asm_out_file);
10581
10582 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
10583 xops[1] = GEN_INT (PROBE_INTERVAL);
10584 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
10585
10586 /* Probe at TEST_ADDR. */
10587 xops[0] = stack_pointer_rtx;
10588 xops[1] = reg;
10589 xops[2] = const0_rtx;
10590 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
10591
10592 fprintf (asm_out_file, "\tjmp\t");
10593 assemble_name_raw (asm_out_file, loop_lab);
10594 fputc ('\n', asm_out_file);
10595
10596 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
10597
10598 return "";
10599 }
10600
10601 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
10602 to be generated in correct form. */
10603 static void
10604 ix86_finalize_stack_realign_flags (void)
10605 {
10606 /* Check if stack realign is really needed after reload, and
10607 stores result in cfun */
10608 unsigned int incoming_stack_boundary
10609 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
10610 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
10611 unsigned int stack_realign = (incoming_stack_boundary
10612 < (crtl->is_leaf
10613 ? crtl->max_used_stack_slot_alignment
10614 : crtl->stack_alignment_needed));
10615
10616 if (crtl->stack_realign_finalized)
10617 {
10618 /* After stack_realign_needed is finalized, we can't no longer
10619 change it. */
10620 gcc_assert (crtl->stack_realign_needed == stack_realign);
10621 return;
10622 }
10623
10624 /* If the only reason for frame_pointer_needed is that we conservatively
10625 assumed stack realignment might be needed, but in the end nothing that
10626 needed the stack alignment had been spilled, clear frame_pointer_needed
10627 and say we don't need stack realignment. */
10628 if (stack_realign
10629 && frame_pointer_needed
10630 && crtl->is_leaf
10631 && flag_omit_frame_pointer
10632 && crtl->sp_is_unchanging
10633 && !ix86_current_function_calls_tls_descriptor
10634 && !crtl->accesses_prior_frames
10635 && !cfun->calls_alloca
10636 && !crtl->calls_eh_return
10637 && !(flag_stack_check && STACK_CHECK_MOVING_SP)
10638 && !ix86_frame_pointer_required ()
10639 && get_frame_size () == 0
10640 && ix86_nsaved_sseregs () == 0
10641 && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
10642 {
10643 HARD_REG_SET set_up_by_prologue, prologue_used;
10644 basic_block bb;
10645
10646 CLEAR_HARD_REG_SET (prologue_used);
10647 CLEAR_HARD_REG_SET (set_up_by_prologue);
10648 add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
10649 add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
10650 add_to_hard_reg_set (&set_up_by_prologue, Pmode,
10651 HARD_FRAME_POINTER_REGNUM);
10652 FOR_EACH_BB_FN (bb, cfun)
10653 {
10654 rtx insn;
10655 FOR_BB_INSNS (bb, insn)
10656 if (NONDEBUG_INSN_P (insn)
10657 && requires_stack_frame_p (insn, prologue_used,
10658 set_up_by_prologue))
10659 {
10660 crtl->stack_realign_needed = stack_realign;
10661 crtl->stack_realign_finalized = true;
10662 return;
10663 }
10664 }
10665
10666 /* If drap has been set, but it actually isn't live at the start
10667 of the function, there is no reason to set it up. */
10668 if (crtl->drap_reg)
10669 {
10670 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
10671 if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
10672 {
10673 crtl->drap_reg = NULL_RTX;
10674 crtl->need_drap = false;
10675 }
10676 }
10677 else
10678 cfun->machine->no_drap_save_restore = true;
10679
10680 frame_pointer_needed = false;
10681 stack_realign = false;
10682 crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
10683 crtl->stack_alignment_needed = incoming_stack_boundary;
10684 crtl->stack_alignment_estimated = incoming_stack_boundary;
10685 if (crtl->preferred_stack_boundary > incoming_stack_boundary)
10686 crtl->preferred_stack_boundary = incoming_stack_boundary;
10687 df_finish_pass (true);
10688 df_scan_alloc (NULL);
10689 df_scan_blocks ();
10690 df_compute_regs_ever_live (true);
10691 df_analyze ();
10692 }
10693
10694 crtl->stack_realign_needed = stack_realign;
10695 crtl->stack_realign_finalized = true;
10696 }
10697
10698 /* Expand the prologue into a bunch of separate insns. */
10699
10700 void
10701 ix86_expand_prologue (void)
10702 {
10703 struct machine_function *m = cfun->machine;
10704 rtx insn, t;
10705 bool pic_reg_used;
10706 struct ix86_frame frame;
10707 HOST_WIDE_INT allocate;
10708 bool int_registers_saved;
10709 bool sse_registers_saved;
10710
10711 ix86_finalize_stack_realign_flags ();
10712
10713 /* DRAP should not coexist with stack_realign_fp */
10714 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
10715
10716 memset (&m->fs, 0, sizeof (m->fs));
10717
10718 /* Initialize CFA state for before the prologue. */
10719 m->fs.cfa_reg = stack_pointer_rtx;
10720 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
10721
10722 /* Track SP offset to the CFA. We continue tracking this after we've
10723 swapped the CFA register away from SP. In the case of re-alignment
10724 this is fudged; we're interested to offsets within the local frame. */
10725 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10726 m->fs.sp_valid = true;
10727
10728 ix86_compute_frame_layout (&frame);
10729
10730 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
10731 {
10732 /* We should have already generated an error for any use of
10733 ms_hook on a nested function. */
10734 gcc_checking_assert (!ix86_static_chain_on_stack);
10735
10736 /* Check if profiling is active and we shall use profiling before
10737 prologue variant. If so sorry. */
10738 if (crtl->profile && flag_fentry != 0)
10739 sorry ("ms_hook_prologue attribute isn%'t compatible "
10740 "with -mfentry for 32-bit");
10741
10742 /* In ix86_asm_output_function_label we emitted:
10743 8b ff movl.s %edi,%edi
10744 55 push %ebp
10745 8b ec movl.s %esp,%ebp
10746
10747 This matches the hookable function prologue in Win32 API
10748 functions in Microsoft Windows XP Service Pack 2 and newer.
10749 Wine uses this to enable Windows apps to hook the Win32 API
10750 functions provided by Wine.
10751
10752 What that means is that we've already set up the frame pointer. */
10753
10754 if (frame_pointer_needed
10755 && !(crtl->drap_reg && crtl->stack_realign_needed))
10756 {
10757 rtx push, mov;
10758
10759 /* We've decided to use the frame pointer already set up.
10760 Describe this to the unwinder by pretending that both
10761 push and mov insns happen right here.
10762
10763 Putting the unwind info here at the end of the ms_hook
10764 is done so that we can make absolutely certain we get
10765 the required byte sequence at the start of the function,
10766 rather than relying on an assembler that can produce
10767 the exact encoding required.
10768
10769 However it does mean (in the unpatched case) that we have
10770 a 1 insn window where the asynchronous unwind info is
10771 incorrect. However, if we placed the unwind info at
10772 its correct location we would have incorrect unwind info
10773 in the patched case. Which is probably all moot since
10774 I don't expect Wine generates dwarf2 unwind info for the
10775 system libraries that use this feature. */
10776
10777 insn = emit_insn (gen_blockage ());
10778
10779 push = gen_push (hard_frame_pointer_rtx);
10780 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
10781 stack_pointer_rtx);
10782 RTX_FRAME_RELATED_P (push) = 1;
10783 RTX_FRAME_RELATED_P (mov) = 1;
10784
10785 RTX_FRAME_RELATED_P (insn) = 1;
10786 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10787 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
10788
10789 /* Note that gen_push incremented m->fs.cfa_offset, even
10790 though we didn't emit the push insn here. */
10791 m->fs.cfa_reg = hard_frame_pointer_rtx;
10792 m->fs.fp_offset = m->fs.cfa_offset;
10793 m->fs.fp_valid = true;
10794 }
10795 else
10796 {
10797 /* The frame pointer is not needed so pop %ebp again.
10798 This leaves us with a pristine state. */
10799 emit_insn (gen_pop (hard_frame_pointer_rtx));
10800 }
10801 }
10802
10803 /* The first insn of a function that accepts its static chain on the
10804 stack is to push the register that would be filled in by a direct
10805 call. This insn will be skipped by the trampoline. */
10806 else if (ix86_static_chain_on_stack)
10807 {
10808 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
10809 emit_insn (gen_blockage ());
10810
10811 /* We don't want to interpret this push insn as a register save,
10812 only as a stack adjustment. The real copy of the register as
10813 a save will be done later, if needed. */
10814 t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
10815 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
10816 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
10817 RTX_FRAME_RELATED_P (insn) = 1;
10818 }
10819
10820 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
10821 of DRAP is needed and stack realignment is really needed after reload */
10822 if (stack_realign_drap)
10823 {
10824 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10825
10826 /* Only need to push parameter pointer reg if it is caller saved. */
10827 if (!call_used_regs[REGNO (crtl->drap_reg)])
10828 {
10829 /* Push arg pointer reg */
10830 insn = emit_insn (gen_push (crtl->drap_reg));
10831 RTX_FRAME_RELATED_P (insn) = 1;
10832 }
10833
10834 /* Grab the argument pointer. */
10835 t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
10836 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
10837 RTX_FRAME_RELATED_P (insn) = 1;
10838 m->fs.cfa_reg = crtl->drap_reg;
10839 m->fs.cfa_offset = 0;
10840
10841 /* Align the stack. */
10842 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10843 stack_pointer_rtx,
10844 GEN_INT (-align_bytes)));
10845 RTX_FRAME_RELATED_P (insn) = 1;
10846
10847 /* Replicate the return address on the stack so that return
10848 address can be reached via (argp - 1) slot. This is needed
10849 to implement macro RETURN_ADDR_RTX and intrinsic function
10850 expand_builtin_return_addr etc. */
10851 t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
10852 t = gen_frame_mem (word_mode, t);
10853 insn = emit_insn (gen_push (t));
10854 RTX_FRAME_RELATED_P (insn) = 1;
10855
10856 /* For the purposes of frame and register save area addressing,
10857 we've started over with a new frame. */
10858 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
10859 m->fs.realigned = true;
10860 }
10861
10862 int_registers_saved = (frame.nregs == 0);
10863 sse_registers_saved = (frame.nsseregs == 0);
10864
10865 if (frame_pointer_needed && !m->fs.fp_valid)
10866 {
10867 /* Note: AT&T enter does NOT have reversed args. Enter is probably
10868 slower on all targets. Also sdb doesn't like it. */
10869 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
10870 RTX_FRAME_RELATED_P (insn) = 1;
10871
10872 /* Push registers now, before setting the frame pointer
10873 on SEH target. */
10874 if (!int_registers_saved
10875 && TARGET_SEH
10876 && !frame.save_regs_using_mov)
10877 {
10878 ix86_emit_save_regs ();
10879 int_registers_saved = true;
10880 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10881 }
10882
10883 if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
10884 {
10885 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10886 RTX_FRAME_RELATED_P (insn) = 1;
10887
10888 if (m->fs.cfa_reg == stack_pointer_rtx)
10889 m->fs.cfa_reg = hard_frame_pointer_rtx;
10890 m->fs.fp_offset = m->fs.sp_offset;
10891 m->fs.fp_valid = true;
10892 }
10893 }
10894
10895 if (!int_registers_saved)
10896 {
10897 /* If saving registers via PUSH, do so now. */
10898 if (!frame.save_regs_using_mov)
10899 {
10900 ix86_emit_save_regs ();
10901 int_registers_saved = true;
10902 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
10903 }
10904
10905 /* When using red zone we may start register saving before allocating
10906 the stack frame saving one cycle of the prologue. However, avoid
10907 doing this if we have to probe the stack; at least on x86_64 the
10908 stack probe can turn into a call that clobbers a red zone location. */
10909 else if (ix86_using_red_zone ()
10910 && (! TARGET_STACK_PROBE
10911 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
10912 {
10913 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
10914 int_registers_saved = true;
10915 }
10916 }
10917
10918 if (stack_realign_fp)
10919 {
10920 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
10921 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
10922
10923 /* The computation of the size of the re-aligned stack frame means
10924 that we must allocate the size of the register save area before
10925 performing the actual alignment. Otherwise we cannot guarantee
10926 that there's enough storage above the realignment point. */
10927 if (m->fs.sp_offset != frame.sse_reg_save_offset)
10928 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10929 GEN_INT (m->fs.sp_offset
10930 - frame.sse_reg_save_offset),
10931 -1, false);
10932
10933 /* Align the stack. */
10934 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
10935 stack_pointer_rtx,
10936 GEN_INT (-align_bytes)));
10937
10938 /* For the purposes of register save area addressing, the stack
10939 pointer is no longer valid. As for the value of sp_offset,
10940 see ix86_compute_frame_layout, which we need to match in order
10941 to pass verification of stack_pointer_offset at the end. */
10942 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
10943 m->fs.sp_valid = false;
10944 }
10945
10946 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
10947
10948 if (flag_stack_usage_info)
10949 {
10950 /* We start to count from ARG_POINTER. */
10951 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
10952
10953 /* If it was realigned, take into account the fake frame. */
10954 if (stack_realign_drap)
10955 {
10956 if (ix86_static_chain_on_stack)
10957 stack_size += UNITS_PER_WORD;
10958
10959 if (!call_used_regs[REGNO (crtl->drap_reg)])
10960 stack_size += UNITS_PER_WORD;
10961
10962 /* This over-estimates by 1 minimal-stack-alignment-unit but
10963 mitigates that by counting in the new return address slot. */
10964 current_function_dynamic_stack_size
10965 += crtl->stack_alignment_needed / BITS_PER_UNIT;
10966 }
10967
10968 current_function_static_stack_size = stack_size;
10969 }
10970
10971 /* On SEH target with very large frame size, allocate an area to save
10972 SSE registers (as the very large allocation won't be described). */
10973 if (TARGET_SEH
10974 && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
10975 && !sse_registers_saved)
10976 {
10977 HOST_WIDE_INT sse_size =
10978 frame.sse_reg_save_offset - frame.reg_save_offset;
10979
10980 gcc_assert (int_registers_saved);
10981
10982 /* No need to do stack checking as the area will be immediately
10983 written. */
10984 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10985 GEN_INT (-sse_size), -1,
10986 m->fs.cfa_reg == stack_pointer_rtx);
10987 allocate -= sse_size;
10988 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
10989 sse_registers_saved = true;
10990 }
10991
10992 /* The stack has already been decremented by the instruction calling us
10993 so probe if the size is non-negative to preserve the protection area. */
10994 if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
10995 {
10996 /* We expect the registers to be saved when probes are used. */
10997 gcc_assert (int_registers_saved);
10998
10999 if (STACK_CHECK_MOVING_SP)
11000 {
11001 if (!(crtl->is_leaf && !cfun->calls_alloca
11002 && allocate <= PROBE_INTERVAL))
11003 {
11004 ix86_adjust_stack_and_probe (allocate);
11005 allocate = 0;
11006 }
11007 }
11008 else
11009 {
11010 HOST_WIDE_INT size = allocate;
11011
11012 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11013 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11014
11015 if (TARGET_STACK_PROBE)
11016 {
11017 if (crtl->is_leaf && !cfun->calls_alloca)
11018 {
11019 if (size > PROBE_INTERVAL)
11020 ix86_emit_probe_stack_range (0, size);
11021 }
11022 else
11023 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11024 }
11025 else
11026 {
11027 if (crtl->is_leaf && !cfun->calls_alloca)
11028 {
11029 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11030 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11031 size - STACK_CHECK_PROTECT);
11032 }
11033 else
11034 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11035 }
11036 }
11037 }
11038
11039 if (allocate == 0)
11040 ;
11041 else if (!ix86_target_stack_probe ()
11042 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11043 {
11044 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11045 GEN_INT (-allocate), -1,
11046 m->fs.cfa_reg == stack_pointer_rtx);
11047 }
11048 else
11049 {
11050 rtx eax = gen_rtx_REG (Pmode, AX_REG);
11051 rtx r10 = NULL;
11052 rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11053 const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11054 bool eax_live = ix86_eax_live_at_start_p ();
11055 bool r10_live = false;
11056
11057 if (TARGET_64BIT)
11058 r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11059
11060 if (eax_live)
11061 {
11062 insn = emit_insn (gen_push (eax));
11063 allocate -= UNITS_PER_WORD;
11064 /* Note that SEH directives need to continue tracking the stack
11065 pointer even after the frame pointer has been set up. */
11066 if (sp_is_cfa_reg || TARGET_SEH)
11067 {
11068 if (sp_is_cfa_reg)
11069 m->fs.cfa_offset += UNITS_PER_WORD;
11070 RTX_FRAME_RELATED_P (insn) = 1;
11071 }
11072 }
11073
11074 if (r10_live)
11075 {
11076 r10 = gen_rtx_REG (Pmode, R10_REG);
11077 insn = emit_insn (gen_push (r10));
11078 allocate -= UNITS_PER_WORD;
11079 if (sp_is_cfa_reg || TARGET_SEH)
11080 {
11081 if (sp_is_cfa_reg)
11082 m->fs.cfa_offset += UNITS_PER_WORD;
11083 RTX_FRAME_RELATED_P (insn) = 1;
11084 }
11085 }
11086
11087 emit_move_insn (eax, GEN_INT (allocate));
11088 emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11089
11090 /* Use the fact that AX still contains ALLOCATE. */
11091 adjust_stack_insn = (Pmode == DImode
11092 ? gen_pro_epilogue_adjust_stack_di_sub
11093 : gen_pro_epilogue_adjust_stack_si_sub);
11094
11095 insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11096 stack_pointer_rtx, eax));
11097
11098 if (sp_is_cfa_reg || TARGET_SEH)
11099 {
11100 if (sp_is_cfa_reg)
11101 m->fs.cfa_offset += allocate;
11102 RTX_FRAME_RELATED_P (insn) = 1;
11103 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11104 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11105 plus_constant (Pmode, stack_pointer_rtx,
11106 -allocate)));
11107 }
11108 m->fs.sp_offset += allocate;
11109
11110 /* Use stack_pointer_rtx for relative addressing so that code
11111 works for realigned stack, too. */
11112 if (r10_live && eax_live)
11113 {
11114 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11115 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11116 gen_frame_mem (word_mode, t));
11117 t = plus_constant (Pmode, t, UNITS_PER_WORD);
11118 emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11119 gen_frame_mem (word_mode, t));
11120 }
11121 else if (eax_live || r10_live)
11122 {
11123 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11124 emit_move_insn (gen_rtx_REG (word_mode,
11125 (eax_live ? AX_REG : R10_REG)),
11126 gen_frame_mem (word_mode, t));
11127 }
11128 }
11129 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11130
11131 /* If we havn't already set up the frame pointer, do so now. */
11132 if (frame_pointer_needed && !m->fs.fp_valid)
11133 {
11134 insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11135 GEN_INT (frame.stack_pointer_offset
11136 - frame.hard_frame_pointer_offset));
11137 insn = emit_insn (insn);
11138 RTX_FRAME_RELATED_P (insn) = 1;
11139 add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11140
11141 if (m->fs.cfa_reg == stack_pointer_rtx)
11142 m->fs.cfa_reg = hard_frame_pointer_rtx;
11143 m->fs.fp_offset = frame.hard_frame_pointer_offset;
11144 m->fs.fp_valid = true;
11145 }
11146
11147 if (!int_registers_saved)
11148 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11149 if (!sse_registers_saved)
11150 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11151
11152 pic_reg_used = false;
11153 /* We don't use pic-register for pe-coff target. */
11154 if (pic_offset_table_rtx
11155 && !TARGET_PECOFF
11156 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
11157 || crtl->profile))
11158 {
11159 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
11160
11161 if (alt_pic_reg_used != INVALID_REGNUM)
11162 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
11163
11164 pic_reg_used = true;
11165 }
11166
11167 if (pic_reg_used)
11168 {
11169 if (TARGET_64BIT)
11170 {
11171 if (ix86_cmodel == CM_LARGE_PIC)
11172 {
11173 rtx label, tmp_reg;
11174
11175 gcc_assert (Pmode == DImode);
11176 label = gen_label_rtx ();
11177 emit_label (label);
11178 LABEL_PRESERVE_P (label) = 1;
11179 tmp_reg = gen_rtx_REG (Pmode, R11_REG);
11180 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
11181 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
11182 label));
11183 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
11184 insn = emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
11185 pic_offset_table_rtx, tmp_reg));
11186 }
11187 else
11188 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
11189 }
11190 else
11191 {
11192 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
11193 RTX_FRAME_RELATED_P (insn) = 1;
11194 add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11195 }
11196 }
11197
11198 /* In the pic_reg_used case, make sure that the got load isn't deleted
11199 when mcount needs it. Blockage to avoid call movement across mcount
11200 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
11201 note. */
11202 if (crtl->profile && !flag_fentry && pic_reg_used)
11203 emit_insn (gen_prologue_use (pic_offset_table_rtx));
11204
11205 if (crtl->drap_reg && !crtl->stack_realign_needed)
11206 {
11207 /* vDRAP is setup but after reload it turns out stack realign
11208 isn't necessary, here we will emit prologue to setup DRAP
11209 without stack realign adjustment */
11210 t = choose_baseaddr (0);
11211 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11212 }
11213
11214 /* Prevent instructions from being scheduled into register save push
11215 sequence when access to the redzone area is done through frame pointer.
11216 The offset between the frame pointer and the stack pointer is calculated
11217 relative to the value of the stack pointer at the end of the function
11218 prologue, and moving instructions that access redzone area via frame
11219 pointer inside push sequence violates this assumption. */
11220 if (frame_pointer_needed && frame.red_zone_size)
11221 emit_insn (gen_memory_blockage ());
11222
11223 /* Emit cld instruction if stringops are used in the function. */
11224 if (TARGET_CLD && ix86_current_function_needs_cld)
11225 emit_insn (gen_cld ());
11226
11227 /* SEH requires that the prologue end within 256 bytes of the start of
11228 the function. Prevent instruction schedules that would extend that.
11229 Further, prevent alloca modifications to the stack pointer from being
11230 combined with prologue modifications. */
11231 if (TARGET_SEH)
11232 emit_insn (gen_prologue_use (stack_pointer_rtx));
11233 }
11234
11235 /* Emit code to restore REG using a POP insn. */
11236
11237 static void
11238 ix86_emit_restore_reg_using_pop (rtx reg)
11239 {
11240 struct machine_function *m = cfun->machine;
11241 rtx insn = emit_insn (gen_pop (reg));
11242
11243 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11244 m->fs.sp_offset -= UNITS_PER_WORD;
11245
11246 if (m->fs.cfa_reg == crtl->drap_reg
11247 && REGNO (reg) == REGNO (crtl->drap_reg))
11248 {
11249 /* Previously we'd represented the CFA as an expression
11250 like *(%ebp - 8). We've just popped that value from
11251 the stack, which means we need to reset the CFA to
11252 the drap register. This will remain until we restore
11253 the stack pointer. */
11254 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11255 RTX_FRAME_RELATED_P (insn) = 1;
11256
11257 /* This means that the DRAP register is valid for addressing too. */
11258 m->fs.drap_valid = true;
11259 return;
11260 }
11261
11262 if (m->fs.cfa_reg == stack_pointer_rtx)
11263 {
11264 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11265 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11266 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11267 RTX_FRAME_RELATED_P (insn) = 1;
11268
11269 m->fs.cfa_offset -= UNITS_PER_WORD;
11270 }
11271
11272 /* When the frame pointer is the CFA, and we pop it, we are
11273 swapping back to the stack pointer as the CFA. This happens
11274 for stack frames that don't allocate other data, so we assume
11275 the stack pointer is now pointing at the return address, i.e.
11276 the function entry state, which makes the offset be 1 word. */
11277 if (reg == hard_frame_pointer_rtx)
11278 {
11279 m->fs.fp_valid = false;
11280 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11281 {
11282 m->fs.cfa_reg = stack_pointer_rtx;
11283 m->fs.cfa_offset -= UNITS_PER_WORD;
11284
11285 add_reg_note (insn, REG_CFA_DEF_CFA,
11286 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11287 GEN_INT (m->fs.cfa_offset)));
11288 RTX_FRAME_RELATED_P (insn) = 1;
11289 }
11290 }
11291 }
11292
11293 /* Emit code to restore saved registers using POP insns. */
11294
11295 static void
11296 ix86_emit_restore_regs_using_pop (void)
11297 {
11298 unsigned int regno;
11299
11300 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11301 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11302 ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11303 }
11304
11305 /* Emit code and notes for the LEAVE instruction. */
11306
11307 static void
11308 ix86_emit_leave (void)
11309 {
11310 struct machine_function *m = cfun->machine;
11311 rtx insn = emit_insn (ix86_gen_leave ());
11312
11313 ix86_add_queued_cfa_restore_notes (insn);
11314
11315 gcc_assert (m->fs.fp_valid);
11316 m->fs.sp_valid = true;
11317 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11318 m->fs.fp_valid = false;
11319
11320 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11321 {
11322 m->fs.cfa_reg = stack_pointer_rtx;
11323 m->fs.cfa_offset = m->fs.sp_offset;
11324
11325 add_reg_note (insn, REG_CFA_DEF_CFA,
11326 plus_constant (Pmode, stack_pointer_rtx,
11327 m->fs.sp_offset));
11328 RTX_FRAME_RELATED_P (insn) = 1;
11329 }
11330 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11331 m->fs.fp_offset);
11332 }
11333
11334 /* Emit code to restore saved registers using MOV insns.
11335 First register is restored from CFA - CFA_OFFSET. */
11336 static void
11337 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11338 bool maybe_eh_return)
11339 {
11340 struct machine_function *m = cfun->machine;
11341 unsigned int regno;
11342
11343 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11344 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11345 {
11346 rtx reg = gen_rtx_REG (word_mode, regno);
11347 rtx insn, mem;
11348
11349 mem = choose_baseaddr (cfa_offset);
11350 mem = gen_frame_mem (word_mode, mem);
11351 insn = emit_move_insn (reg, mem);
11352
11353 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
11354 {
11355 /* Previously we'd represented the CFA as an expression
11356 like *(%ebp - 8). We've just popped that value from
11357 the stack, which means we need to reset the CFA to
11358 the drap register. This will remain until we restore
11359 the stack pointer. */
11360 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11361 RTX_FRAME_RELATED_P (insn) = 1;
11362
11363 /* This means that the DRAP register is valid for addressing. */
11364 m->fs.drap_valid = true;
11365 }
11366 else
11367 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11368
11369 cfa_offset -= UNITS_PER_WORD;
11370 }
11371 }
11372
11373 /* Emit code to restore saved registers using MOV insns.
11374 First register is restored from CFA - CFA_OFFSET. */
11375 static void
11376 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
11377 bool maybe_eh_return)
11378 {
11379 unsigned int regno;
11380
11381 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11382 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
11383 {
11384 rtx reg = gen_rtx_REG (V4SFmode, regno);
11385 rtx mem;
11386
11387 mem = choose_baseaddr (cfa_offset);
11388 mem = gen_rtx_MEM (V4SFmode, mem);
11389 set_mem_align (mem, 128);
11390 emit_move_insn (reg, mem);
11391
11392 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
11393
11394 cfa_offset -= 16;
11395 }
11396 }
11397
11398 /* Restore function stack, frame, and registers. */
11399
11400 void
11401 ix86_expand_epilogue (int style)
11402 {
11403 struct machine_function *m = cfun->machine;
11404 struct machine_frame_state frame_state_save = m->fs;
11405 struct ix86_frame frame;
11406 bool restore_regs_via_mov;
11407 bool using_drap;
11408
11409 ix86_finalize_stack_realign_flags ();
11410 ix86_compute_frame_layout (&frame);
11411
11412 m->fs.sp_valid = (!frame_pointer_needed
11413 || (crtl->sp_is_unchanging
11414 && !stack_realign_fp));
11415 gcc_assert (!m->fs.sp_valid
11416 || m->fs.sp_offset == frame.stack_pointer_offset);
11417
11418 /* The FP must be valid if the frame pointer is present. */
11419 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
11420 gcc_assert (!m->fs.fp_valid
11421 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
11422
11423 /* We must have *some* valid pointer to the stack frame. */
11424 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
11425
11426 /* The DRAP is never valid at this point. */
11427 gcc_assert (!m->fs.drap_valid);
11428
11429 /* See the comment about red zone and frame
11430 pointer usage in ix86_expand_prologue. */
11431 if (frame_pointer_needed && frame.red_zone_size)
11432 emit_insn (gen_memory_blockage ());
11433
11434 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
11435 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
11436
11437 /* Determine the CFA offset of the end of the red-zone. */
11438 m->fs.red_zone_offset = 0;
11439 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
11440 {
11441 /* The red-zone begins below the return address. */
11442 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
11443
11444 /* When the register save area is in the aligned portion of
11445 the stack, determine the maximum runtime displacement that
11446 matches up with the aligned frame. */
11447 if (stack_realign_drap)
11448 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
11449 + UNITS_PER_WORD);
11450 }
11451
11452 /* Special care must be taken for the normal return case of a function
11453 using eh_return: the eax and edx registers are marked as saved, but
11454 not restored along this path. Adjust the save location to match. */
11455 if (crtl->calls_eh_return && style != 2)
11456 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
11457
11458 /* EH_RETURN requires the use of moves to function properly. */
11459 if (crtl->calls_eh_return)
11460 restore_regs_via_mov = true;
11461 /* SEH requires the use of pops to identify the epilogue. */
11462 else if (TARGET_SEH)
11463 restore_regs_via_mov = false;
11464 /* If we're only restoring one register and sp is not valid then
11465 using a move instruction to restore the register since it's
11466 less work than reloading sp and popping the register. */
11467 else if (!m->fs.sp_valid && frame.nregs <= 1)
11468 restore_regs_via_mov = true;
11469 else if (TARGET_EPILOGUE_USING_MOVE
11470 && cfun->machine->use_fast_prologue_epilogue
11471 && (frame.nregs > 1
11472 || m->fs.sp_offset != frame.reg_save_offset))
11473 restore_regs_via_mov = true;
11474 else if (frame_pointer_needed
11475 && !frame.nregs
11476 && m->fs.sp_offset != frame.reg_save_offset)
11477 restore_regs_via_mov = true;
11478 else if (frame_pointer_needed
11479 && TARGET_USE_LEAVE
11480 && cfun->machine->use_fast_prologue_epilogue
11481 && frame.nregs == 1)
11482 restore_regs_via_mov = true;
11483 else
11484 restore_regs_via_mov = false;
11485
11486 if (restore_regs_via_mov || frame.nsseregs)
11487 {
11488 /* Ensure that the entire register save area is addressable via
11489 the stack pointer, if we will restore via sp. */
11490 if (TARGET_64BIT
11491 && m->fs.sp_offset > 0x7fffffff
11492 && !(m->fs.fp_valid || m->fs.drap_valid)
11493 && (frame.nsseregs + frame.nregs) != 0)
11494 {
11495 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11496 GEN_INT (m->fs.sp_offset
11497 - frame.sse_reg_save_offset),
11498 style,
11499 m->fs.cfa_reg == stack_pointer_rtx);
11500 }
11501 }
11502
11503 /* If there are any SSE registers to restore, then we have to do it
11504 via moves, since there's obviously no pop for SSE regs. */
11505 if (frame.nsseregs)
11506 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
11507 style == 2);
11508
11509 if (restore_regs_via_mov)
11510 {
11511 rtx t;
11512
11513 if (frame.nregs)
11514 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
11515
11516 /* eh_return epilogues need %ecx added to the stack pointer. */
11517 if (style == 2)
11518 {
11519 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
11520
11521 /* Stack align doesn't work with eh_return. */
11522 gcc_assert (!stack_realign_drap);
11523 /* Neither does regparm nested functions. */
11524 gcc_assert (!ix86_static_chain_on_stack);
11525
11526 if (frame_pointer_needed)
11527 {
11528 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
11529 t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
11530 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
11531
11532 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
11533 insn = emit_move_insn (hard_frame_pointer_rtx, t);
11534
11535 /* Note that we use SA as a temporary CFA, as the return
11536 address is at the proper place relative to it. We
11537 pretend this happens at the FP restore insn because
11538 prior to this insn the FP would be stored at the wrong
11539 offset relative to SA, and after this insn we have no
11540 other reasonable register to use for the CFA. We don't
11541 bother resetting the CFA to the SP for the duration of
11542 the return insn. */
11543 add_reg_note (insn, REG_CFA_DEF_CFA,
11544 plus_constant (Pmode, sa, UNITS_PER_WORD));
11545 ix86_add_queued_cfa_restore_notes (insn);
11546 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
11547 RTX_FRAME_RELATED_P (insn) = 1;
11548
11549 m->fs.cfa_reg = sa;
11550 m->fs.cfa_offset = UNITS_PER_WORD;
11551 m->fs.fp_valid = false;
11552
11553 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
11554 const0_rtx, style, false);
11555 }
11556 else
11557 {
11558 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
11559 t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
11560 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
11561 ix86_add_queued_cfa_restore_notes (insn);
11562
11563 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
11564 if (m->fs.cfa_offset != UNITS_PER_WORD)
11565 {
11566 m->fs.cfa_offset = UNITS_PER_WORD;
11567 add_reg_note (insn, REG_CFA_DEF_CFA,
11568 plus_constant (Pmode, stack_pointer_rtx,
11569 UNITS_PER_WORD));
11570 RTX_FRAME_RELATED_P (insn) = 1;
11571 }
11572 }
11573 m->fs.sp_offset = UNITS_PER_WORD;
11574 m->fs.sp_valid = true;
11575 }
11576 }
11577 else
11578 {
11579 /* SEH requires that the function end with (1) a stack adjustment
11580 if necessary, (2) a sequence of pops, and (3) a return or
11581 jump instruction. Prevent insns from the function body from
11582 being scheduled into this sequence. */
11583 if (TARGET_SEH)
11584 {
11585 /* Prevent a catch region from being adjacent to the standard
11586 epilogue sequence. Unfortuantely crtl->uses_eh_lsda nor
11587 several other flags that would be interesting to test are
11588 not yet set up. */
11589 if (flag_non_call_exceptions)
11590 emit_insn (gen_nops (const1_rtx));
11591 else
11592 emit_insn (gen_blockage ());
11593 }
11594
11595 /* First step is to deallocate the stack frame so that we can
11596 pop the registers. Also do it on SEH target for very large
11597 frame as the emitted instructions aren't allowed by the ABI in
11598 epilogues. */
11599 if (!m->fs.sp_valid
11600 || (TARGET_SEH
11601 && (m->fs.sp_offset - frame.reg_save_offset
11602 >= SEH_MAX_FRAME_SIZE)))
11603 {
11604 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
11605 GEN_INT (m->fs.fp_offset
11606 - frame.reg_save_offset),
11607 style, false);
11608 }
11609 else if (m->fs.sp_offset != frame.reg_save_offset)
11610 {
11611 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11612 GEN_INT (m->fs.sp_offset
11613 - frame.reg_save_offset),
11614 style,
11615 m->fs.cfa_reg == stack_pointer_rtx);
11616 }
11617
11618 ix86_emit_restore_regs_using_pop ();
11619 }
11620
11621 /* If we used a stack pointer and haven't already got rid of it,
11622 then do so now. */
11623 if (m->fs.fp_valid)
11624 {
11625 /* If the stack pointer is valid and pointing at the frame
11626 pointer store address, then we only need a pop. */
11627 if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
11628 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11629 /* Leave results in shorter dependency chains on CPUs that are
11630 able to grok it fast. */
11631 else if (TARGET_USE_LEAVE
11632 || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
11633 || !cfun->machine->use_fast_prologue_epilogue)
11634 ix86_emit_leave ();
11635 else
11636 {
11637 pro_epilogue_adjust_stack (stack_pointer_rtx,
11638 hard_frame_pointer_rtx,
11639 const0_rtx, style, !using_drap);
11640 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
11641 }
11642 }
11643
11644 if (using_drap)
11645 {
11646 int param_ptr_offset = UNITS_PER_WORD;
11647 rtx insn;
11648
11649 gcc_assert (stack_realign_drap);
11650
11651 if (ix86_static_chain_on_stack)
11652 param_ptr_offset += UNITS_PER_WORD;
11653 if (!call_used_regs[REGNO (crtl->drap_reg)])
11654 param_ptr_offset += UNITS_PER_WORD;
11655
11656 insn = emit_insn (gen_rtx_SET
11657 (VOIDmode, stack_pointer_rtx,
11658 gen_rtx_PLUS (Pmode,
11659 crtl->drap_reg,
11660 GEN_INT (-param_ptr_offset))));
11661 m->fs.cfa_reg = stack_pointer_rtx;
11662 m->fs.cfa_offset = param_ptr_offset;
11663 m->fs.sp_offset = param_ptr_offset;
11664 m->fs.realigned = false;
11665
11666 add_reg_note (insn, REG_CFA_DEF_CFA,
11667 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11668 GEN_INT (param_ptr_offset)));
11669 RTX_FRAME_RELATED_P (insn) = 1;
11670
11671 if (!call_used_regs[REGNO (crtl->drap_reg)])
11672 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
11673 }
11674
11675 /* At this point the stack pointer must be valid, and we must have
11676 restored all of the registers. We may not have deallocated the
11677 entire stack frame. We've delayed this until now because it may
11678 be possible to merge the local stack deallocation with the
11679 deallocation forced by ix86_static_chain_on_stack. */
11680 gcc_assert (m->fs.sp_valid);
11681 gcc_assert (!m->fs.fp_valid);
11682 gcc_assert (!m->fs.realigned);
11683 if (m->fs.sp_offset != UNITS_PER_WORD)
11684 {
11685 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11686 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
11687 style, true);
11688 }
11689 else
11690 ix86_add_queued_cfa_restore_notes (get_last_insn ());
11691
11692 /* Sibcall epilogues don't want a return instruction. */
11693 if (style == 0)
11694 {
11695 m->fs = frame_state_save;
11696 return;
11697 }
11698
11699 if (crtl->args.pops_args && crtl->args.size)
11700 {
11701 rtx popc = GEN_INT (crtl->args.pops_args);
11702
11703 /* i386 can only pop 64K bytes. If asked to pop more, pop return
11704 address, do explicit add, and jump indirectly to the caller. */
11705
11706 if (crtl->args.pops_args >= 65536)
11707 {
11708 rtx ecx = gen_rtx_REG (SImode, CX_REG);
11709 rtx insn;
11710
11711 /* There is no "pascal" calling convention in any 64bit ABI. */
11712 gcc_assert (!TARGET_64BIT);
11713
11714 insn = emit_insn (gen_pop (ecx));
11715 m->fs.cfa_offset -= UNITS_PER_WORD;
11716 m->fs.sp_offset -= UNITS_PER_WORD;
11717
11718 rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11719 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11720 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11721 add_reg_note (insn, REG_CFA_REGISTER,
11722 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
11723 RTX_FRAME_RELATED_P (insn) = 1;
11724
11725 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11726 popc, -1, true);
11727 emit_jump_insn (gen_simple_return_indirect_internal (ecx));
11728 }
11729 else
11730 emit_jump_insn (gen_simple_return_pop_internal (popc));
11731 }
11732 else
11733 emit_jump_insn (gen_simple_return_internal ());
11734
11735 /* Restore the state back to the state from the prologue,
11736 so that it's correct for the next epilogue. */
11737 m->fs = frame_state_save;
11738 }
11739
11740 /* Reset from the function's potential modifications. */
11741
11742 static void
11743 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
11744 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
11745 {
11746 if (pic_offset_table_rtx)
11747 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
11748 #if TARGET_MACHO
11749 /* Mach-O doesn't support labels at the end of objects, so if
11750 it looks like we might want one, insert a NOP. */
11751 {
11752 rtx insn = get_last_insn ();
11753 rtx deleted_debug_label = NULL_RTX;
11754 while (insn
11755 && NOTE_P (insn)
11756 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
11757 {
11758 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
11759 notes only, instead set their CODE_LABEL_NUMBER to -1,
11760 otherwise there would be code generation differences
11761 in between -g and -g0. */
11762 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11763 deleted_debug_label = insn;
11764 insn = PREV_INSN (insn);
11765 }
11766 if (insn
11767 && (LABEL_P (insn)
11768 || (NOTE_P (insn)
11769 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
11770 fputs ("\tnop\n", file);
11771 else if (deleted_debug_label)
11772 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
11773 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
11774 CODE_LABEL_NUMBER (insn) = -1;
11775 }
11776 #endif
11777
11778 }
11779
11780 /* Return a scratch register to use in the split stack prologue. The
11781 split stack prologue is used for -fsplit-stack. It is the first
11782 instructions in the function, even before the regular prologue.
11783 The scratch register can be any caller-saved register which is not
11784 used for parameters or for the static chain. */
11785
11786 static unsigned int
11787 split_stack_prologue_scratch_regno (void)
11788 {
11789 if (TARGET_64BIT)
11790 return R11_REG;
11791 else
11792 {
11793 bool is_fastcall, is_thiscall;
11794 int regparm;
11795
11796 is_fastcall = (lookup_attribute ("fastcall",
11797 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11798 != NULL);
11799 is_thiscall = (lookup_attribute ("thiscall",
11800 TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
11801 != NULL);
11802 regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
11803
11804 if (is_fastcall)
11805 {
11806 if (DECL_STATIC_CHAIN (cfun->decl))
11807 {
11808 sorry ("-fsplit-stack does not support fastcall with "
11809 "nested function");
11810 return INVALID_REGNUM;
11811 }
11812 return AX_REG;
11813 }
11814 else if (is_thiscall)
11815 {
11816 if (!DECL_STATIC_CHAIN (cfun->decl))
11817 return DX_REG;
11818 return AX_REG;
11819 }
11820 else if (regparm < 3)
11821 {
11822 if (!DECL_STATIC_CHAIN (cfun->decl))
11823 return CX_REG;
11824 else
11825 {
11826 if (regparm >= 2)
11827 {
11828 sorry ("-fsplit-stack does not support 2 register "
11829 " parameters for a nested function");
11830 return INVALID_REGNUM;
11831 }
11832 return DX_REG;
11833 }
11834 }
11835 else
11836 {
11837 /* FIXME: We could make this work by pushing a register
11838 around the addition and comparison. */
11839 sorry ("-fsplit-stack does not support 3 register parameters");
11840 return INVALID_REGNUM;
11841 }
11842 }
11843 }
11844
11845 /* A SYMBOL_REF for the function which allocates new stackspace for
11846 -fsplit-stack. */
11847
11848 static GTY(()) rtx split_stack_fn;
11849
11850 /* A SYMBOL_REF for the more stack function when using the large
11851 model. */
11852
11853 static GTY(()) rtx split_stack_fn_large;
11854
11855 /* Handle -fsplit-stack. These are the first instructions in the
11856 function, even before the regular prologue. */
11857
11858 void
11859 ix86_expand_split_stack_prologue (void)
11860 {
11861 struct ix86_frame frame;
11862 HOST_WIDE_INT allocate;
11863 unsigned HOST_WIDE_INT args_size;
11864 rtx label, limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
11865 rtx scratch_reg = NULL_RTX;
11866 rtx varargs_label = NULL_RTX;
11867 rtx fn;
11868
11869 gcc_assert (flag_split_stack && reload_completed);
11870
11871 ix86_finalize_stack_realign_flags ();
11872 ix86_compute_frame_layout (&frame);
11873 allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
11874
11875 /* This is the label we will branch to if we have enough stack
11876 space. We expect the basic block reordering pass to reverse this
11877 branch if optimizing, so that we branch in the unlikely case. */
11878 label = gen_label_rtx ();
11879
11880 /* We need to compare the stack pointer minus the frame size with
11881 the stack boundary in the TCB. The stack boundary always gives
11882 us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
11883 can compare directly. Otherwise we need to do an addition. */
11884
11885 limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
11886 UNSPEC_STACK_CHECK);
11887 limit = gen_rtx_CONST (Pmode, limit);
11888 limit = gen_rtx_MEM (Pmode, limit);
11889 if (allocate < SPLIT_STACK_AVAILABLE)
11890 current = stack_pointer_rtx;
11891 else
11892 {
11893 unsigned int scratch_regno;
11894 rtx offset;
11895
11896 /* We need a scratch register to hold the stack pointer minus
11897 the required frame size. Since this is the very start of the
11898 function, the scratch register can be any caller-saved
11899 register which is not used for parameters. */
11900 offset = GEN_INT (- allocate);
11901 scratch_regno = split_stack_prologue_scratch_regno ();
11902 if (scratch_regno == INVALID_REGNUM)
11903 return;
11904 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
11905 if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
11906 {
11907 /* We don't use ix86_gen_add3 in this case because it will
11908 want to split to lea, but when not optimizing the insn
11909 will not be split after this point. */
11910 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
11911 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11912 offset)));
11913 }
11914 else
11915 {
11916 emit_move_insn (scratch_reg, offset);
11917 emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
11918 stack_pointer_rtx));
11919 }
11920 current = scratch_reg;
11921 }
11922
11923 ix86_expand_branch (GEU, current, limit, label);
11924 jump_insn = get_last_insn ();
11925 JUMP_LABEL (jump_insn) = label;
11926
11927 /* Mark the jump as very likely to be taken. */
11928 add_int_reg_note (jump_insn, REG_BR_PROB,
11929 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
11930
11931 if (split_stack_fn == NULL_RTX)
11932 split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11933 fn = split_stack_fn;
11934
11935 /* Get more stack space. We pass in the desired stack space and the
11936 size of the arguments to copy to the new stack. In 32-bit mode
11937 we push the parameters; __morestack will return on a new stack
11938 anyhow. In 64-bit mode we pass the parameters in r10 and
11939 r11. */
11940 allocate_rtx = GEN_INT (allocate);
11941 args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
11942 call_fusage = NULL_RTX;
11943 if (TARGET_64BIT)
11944 {
11945 rtx reg10, reg11;
11946
11947 reg10 = gen_rtx_REG (Pmode, R10_REG);
11948 reg11 = gen_rtx_REG (Pmode, R11_REG);
11949
11950 /* If this function uses a static chain, it will be in %r10.
11951 Preserve it across the call to __morestack. */
11952 if (DECL_STATIC_CHAIN (cfun->decl))
11953 {
11954 rtx rax;
11955
11956 rax = gen_rtx_REG (word_mode, AX_REG);
11957 emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
11958 use_reg (&call_fusage, rax);
11959 }
11960
11961 if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
11962 && !TARGET_PECOFF)
11963 {
11964 HOST_WIDE_INT argval;
11965
11966 gcc_assert (Pmode == DImode);
11967 /* When using the large model we need to load the address
11968 into a register, and we've run out of registers. So we
11969 switch to a different calling convention, and we call a
11970 different function: __morestack_large. We pass the
11971 argument size in the upper 32 bits of r10 and pass the
11972 frame size in the lower 32 bits. */
11973 gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
11974 gcc_assert ((args_size & 0xffffffff) == args_size);
11975
11976 if (split_stack_fn_large == NULL_RTX)
11977 split_stack_fn_large =
11978 gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
11979
11980 if (ix86_cmodel == CM_LARGE_PIC)
11981 {
11982 rtx label, x;
11983
11984 label = gen_label_rtx ();
11985 emit_label (label);
11986 LABEL_PRESERVE_P (label) = 1;
11987 emit_insn (gen_set_rip_rex64 (reg10, label));
11988 emit_insn (gen_set_got_offset_rex64 (reg11, label));
11989 emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
11990 x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
11991 UNSPEC_GOT);
11992 x = gen_rtx_CONST (Pmode, x);
11993 emit_move_insn (reg11, x);
11994 x = gen_rtx_PLUS (Pmode, reg10, reg11);
11995 x = gen_const_mem (Pmode, x);
11996 emit_move_insn (reg11, x);
11997 }
11998 else
11999 emit_move_insn (reg11, split_stack_fn_large);
12000
12001 fn = reg11;
12002
12003 argval = ((args_size << 16) << 16) + allocate;
12004 emit_move_insn (reg10, GEN_INT (argval));
12005 }
12006 else
12007 {
12008 emit_move_insn (reg10, allocate_rtx);
12009 emit_move_insn (reg11, GEN_INT (args_size));
12010 use_reg (&call_fusage, reg11);
12011 }
12012
12013 use_reg (&call_fusage, reg10);
12014 }
12015 else
12016 {
12017 emit_insn (gen_push (GEN_INT (args_size)));
12018 emit_insn (gen_push (allocate_rtx));
12019 }
12020 call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12021 GEN_INT (UNITS_PER_WORD), constm1_rtx,
12022 NULL_RTX, false);
12023 add_function_usage_to (call_insn, call_fusage);
12024
12025 /* In order to make call/return prediction work right, we now need
12026 to execute a return instruction. See
12027 libgcc/config/i386/morestack.S for the details on how this works.
12028
12029 For flow purposes gcc must not see this as a return
12030 instruction--we need control flow to continue at the subsequent
12031 label. Therefore, we use an unspec. */
12032 gcc_assert (crtl->args.pops_args < 65536);
12033 emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12034
12035 /* If we are in 64-bit mode and this function uses a static chain,
12036 we saved %r10 in %rax before calling _morestack. */
12037 if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12038 emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12039 gen_rtx_REG (word_mode, AX_REG));
12040
12041 /* If this function calls va_start, we need to store a pointer to
12042 the arguments on the old stack, because they may not have been
12043 all copied to the new stack. At this point the old stack can be
12044 found at the frame pointer value used by __morestack, because
12045 __morestack has set that up before calling back to us. Here we
12046 store that pointer in a scratch register, and in
12047 ix86_expand_prologue we store the scratch register in a stack
12048 slot. */
12049 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12050 {
12051 unsigned int scratch_regno;
12052 rtx frame_reg;
12053 int words;
12054
12055 scratch_regno = split_stack_prologue_scratch_regno ();
12056 scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12057 frame_reg = gen_rtx_REG (Pmode, BP_REG);
12058
12059 /* 64-bit:
12060 fp -> old fp value
12061 return address within this function
12062 return address of caller of this function
12063 stack arguments
12064 So we add three words to get to the stack arguments.
12065
12066 32-bit:
12067 fp -> old fp value
12068 return address within this function
12069 first argument to __morestack
12070 second argument to __morestack
12071 return address of caller of this function
12072 stack arguments
12073 So we add five words to get to the stack arguments.
12074 */
12075 words = TARGET_64BIT ? 3 : 5;
12076 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12077 gen_rtx_PLUS (Pmode, frame_reg,
12078 GEN_INT (words * UNITS_PER_WORD))));
12079
12080 varargs_label = gen_label_rtx ();
12081 emit_jump_insn (gen_jump (varargs_label));
12082 JUMP_LABEL (get_last_insn ()) = varargs_label;
12083
12084 emit_barrier ();
12085 }
12086
12087 emit_label (label);
12088 LABEL_NUSES (label) = 1;
12089
12090 /* If this function calls va_start, we now have to set the scratch
12091 register for the case where we do not call __morestack. In this
12092 case we need to set it based on the stack pointer. */
12093 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12094 {
12095 emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12096 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12097 GEN_INT (UNITS_PER_WORD))));
12098
12099 emit_label (varargs_label);
12100 LABEL_NUSES (varargs_label) = 1;
12101 }
12102 }
12103
12104 /* We may have to tell the dataflow pass that the split stack prologue
12105 is initializing a scratch register. */
12106
12107 static void
12108 ix86_live_on_entry (bitmap regs)
12109 {
12110 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12111 {
12112 gcc_assert (flag_split_stack);
12113 bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12114 }
12115 }
12116 \f
12117 /* Extract the parts of an RTL expression that is a valid memory address
12118 for an instruction. Return 0 if the structure of the address is
12119 grossly off. Return -1 if the address contains ASHIFT, so it is not
12120 strictly valid, but still used for computing length of lea instruction. */
12121
12122 int
12123 ix86_decompose_address (rtx addr, struct ix86_address *out)
12124 {
12125 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12126 rtx base_reg, index_reg;
12127 HOST_WIDE_INT scale = 1;
12128 rtx scale_rtx = NULL_RTX;
12129 rtx tmp;
12130 int retval = 1;
12131 enum ix86_address_seg seg = SEG_DEFAULT;
12132
12133 /* Allow zero-extended SImode addresses,
12134 they will be emitted with addr32 prefix. */
12135 if (TARGET_64BIT && GET_MODE (addr) == DImode)
12136 {
12137 if (GET_CODE (addr) == ZERO_EXTEND
12138 && GET_MODE (XEXP (addr, 0)) == SImode)
12139 {
12140 addr = XEXP (addr, 0);
12141 if (CONST_INT_P (addr))
12142 return 0;
12143 }
12144 else if (GET_CODE (addr) == AND
12145 && const_32bit_mask (XEXP (addr, 1), DImode))
12146 {
12147 addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12148 if (addr == NULL_RTX)
12149 return 0;
12150
12151 if (CONST_INT_P (addr))
12152 return 0;
12153 }
12154 }
12155
12156 /* Allow SImode subregs of DImode addresses,
12157 they will be emitted with addr32 prefix. */
12158 if (TARGET_64BIT && GET_MODE (addr) == SImode)
12159 {
12160 if (GET_CODE (addr) == SUBREG
12161 && GET_MODE (SUBREG_REG (addr)) == DImode)
12162 {
12163 addr = SUBREG_REG (addr);
12164 if (CONST_INT_P (addr))
12165 return 0;
12166 }
12167 }
12168
12169 if (REG_P (addr))
12170 base = addr;
12171 else if (GET_CODE (addr) == SUBREG)
12172 {
12173 if (REG_P (SUBREG_REG (addr)))
12174 base = addr;
12175 else
12176 return 0;
12177 }
12178 else if (GET_CODE (addr) == PLUS)
12179 {
12180 rtx addends[4], op;
12181 int n = 0, i;
12182
12183 op = addr;
12184 do
12185 {
12186 if (n >= 4)
12187 return 0;
12188 addends[n++] = XEXP (op, 1);
12189 op = XEXP (op, 0);
12190 }
12191 while (GET_CODE (op) == PLUS);
12192 if (n >= 4)
12193 return 0;
12194 addends[n] = op;
12195
12196 for (i = n; i >= 0; --i)
12197 {
12198 op = addends[i];
12199 switch (GET_CODE (op))
12200 {
12201 case MULT:
12202 if (index)
12203 return 0;
12204 index = XEXP (op, 0);
12205 scale_rtx = XEXP (op, 1);
12206 break;
12207
12208 case ASHIFT:
12209 if (index)
12210 return 0;
12211 index = XEXP (op, 0);
12212 tmp = XEXP (op, 1);
12213 if (!CONST_INT_P (tmp))
12214 return 0;
12215 scale = INTVAL (tmp);
12216 if ((unsigned HOST_WIDE_INT) scale > 3)
12217 return 0;
12218 scale = 1 << scale;
12219 break;
12220
12221 case ZERO_EXTEND:
12222 op = XEXP (op, 0);
12223 if (GET_CODE (op) != UNSPEC)
12224 return 0;
12225 /* FALLTHRU */
12226
12227 case UNSPEC:
12228 if (XINT (op, 1) == UNSPEC_TP
12229 && TARGET_TLS_DIRECT_SEG_REFS
12230 && seg == SEG_DEFAULT)
12231 seg = DEFAULT_TLS_SEG_REG;
12232 else
12233 return 0;
12234 break;
12235
12236 case SUBREG:
12237 if (!REG_P (SUBREG_REG (op)))
12238 return 0;
12239 /* FALLTHRU */
12240
12241 case REG:
12242 if (!base)
12243 base = op;
12244 else if (!index)
12245 index = op;
12246 else
12247 return 0;
12248 break;
12249
12250 case CONST:
12251 case CONST_INT:
12252 case SYMBOL_REF:
12253 case LABEL_REF:
12254 if (disp)
12255 return 0;
12256 disp = op;
12257 break;
12258
12259 default:
12260 return 0;
12261 }
12262 }
12263 }
12264 else if (GET_CODE (addr) == MULT)
12265 {
12266 index = XEXP (addr, 0); /* index*scale */
12267 scale_rtx = XEXP (addr, 1);
12268 }
12269 else if (GET_CODE (addr) == ASHIFT)
12270 {
12271 /* We're called for lea too, which implements ashift on occasion. */
12272 index = XEXP (addr, 0);
12273 tmp = XEXP (addr, 1);
12274 if (!CONST_INT_P (tmp))
12275 return 0;
12276 scale = INTVAL (tmp);
12277 if ((unsigned HOST_WIDE_INT) scale > 3)
12278 return 0;
12279 scale = 1 << scale;
12280 retval = -1;
12281 }
12282 else
12283 disp = addr; /* displacement */
12284
12285 if (index)
12286 {
12287 if (REG_P (index))
12288 ;
12289 else if (GET_CODE (index) == SUBREG
12290 && REG_P (SUBREG_REG (index)))
12291 ;
12292 else
12293 return 0;
12294 }
12295
12296 /* Extract the integral value of scale. */
12297 if (scale_rtx)
12298 {
12299 if (!CONST_INT_P (scale_rtx))
12300 return 0;
12301 scale = INTVAL (scale_rtx);
12302 }
12303
12304 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12305 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12306
12307 /* Avoid useless 0 displacement. */
12308 if (disp == const0_rtx && (base || index))
12309 disp = NULL_RTX;
12310
12311 /* Allow arg pointer and stack pointer as index if there is not scaling. */
12312 if (base_reg && index_reg && scale == 1
12313 && (index_reg == arg_pointer_rtx
12314 || index_reg == frame_pointer_rtx
12315 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12316 {
12317 rtx tmp;
12318 tmp = base, base = index, index = tmp;
12319 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
12320 }
12321
12322 /* Special case: %ebp cannot be encoded as a base without a displacement.
12323 Similarly %r13. */
12324 if (!disp
12325 && base_reg
12326 && (base_reg == hard_frame_pointer_rtx
12327 || base_reg == frame_pointer_rtx
12328 || base_reg == arg_pointer_rtx
12329 || (REG_P (base_reg)
12330 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
12331 || REGNO (base_reg) == R13_REG))))
12332 disp = const0_rtx;
12333
12334 /* Special case: on K6, [%esi] makes the instruction vector decoded.
12335 Avoid this by transforming to [%esi+0].
12336 Reload calls address legitimization without cfun defined, so we need
12337 to test cfun for being non-NULL. */
12338 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
12339 && base_reg && !index_reg && !disp
12340 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
12341 disp = const0_rtx;
12342
12343 /* Special case: encode reg+reg instead of reg*2. */
12344 if (!base && index && scale == 2)
12345 base = index, base_reg = index_reg, scale = 1;
12346
12347 /* Special case: scaling cannot be encoded without base or displacement. */
12348 if (!base && !disp && index && scale != 1)
12349 disp = const0_rtx;
12350
12351 out->base = base;
12352 out->index = index;
12353 out->disp = disp;
12354 out->scale = scale;
12355 out->seg = seg;
12356
12357 return retval;
12358 }
12359 \f
12360 /* Return cost of the memory address x.
12361 For i386, it is better to use a complex address than let gcc copy
12362 the address into a reg and make a new pseudo. But not if the address
12363 requires to two regs - that would mean more pseudos with longer
12364 lifetimes. */
12365 static int
12366 ix86_address_cost (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
12367 addr_space_t as ATTRIBUTE_UNUSED,
12368 bool speed ATTRIBUTE_UNUSED)
12369 {
12370 struct ix86_address parts;
12371 int cost = 1;
12372 int ok = ix86_decompose_address (x, &parts);
12373
12374 gcc_assert (ok);
12375
12376 if (parts.base && GET_CODE (parts.base) == SUBREG)
12377 parts.base = SUBREG_REG (parts.base);
12378 if (parts.index && GET_CODE (parts.index) == SUBREG)
12379 parts.index = SUBREG_REG (parts.index);
12380
12381 /* Attempt to minimize number of registers in the address. */
12382 if ((parts.base
12383 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
12384 || (parts.index
12385 && (!REG_P (parts.index)
12386 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
12387 cost++;
12388
12389 if (parts.base
12390 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
12391 && parts.index
12392 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
12393 && parts.base != parts.index)
12394 cost++;
12395
12396 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
12397 since it's predecode logic can't detect the length of instructions
12398 and it degenerates to vector decoded. Increase cost of such
12399 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
12400 to split such addresses or even refuse such addresses at all.
12401
12402 Following addressing modes are affected:
12403 [base+scale*index]
12404 [scale*index+disp]
12405 [base+index]
12406
12407 The first and last case may be avoidable by explicitly coding the zero in
12408 memory address, but I don't have AMD-K6 machine handy to check this
12409 theory. */
12410
12411 if (TARGET_K6
12412 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
12413 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
12414 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
12415 cost += 10;
12416
12417 return cost;
12418 }
12419 \f
12420 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
12421 this is used for to form addresses to local data when -fPIC is in
12422 use. */
12423
12424 static bool
12425 darwin_local_data_pic (rtx disp)
12426 {
12427 return (GET_CODE (disp) == UNSPEC
12428 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
12429 }
12430
12431 /* Determine if a given RTX is a valid constant. We already know this
12432 satisfies CONSTANT_P. */
12433
12434 static bool
12435 ix86_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
12436 {
12437 switch (GET_CODE (x))
12438 {
12439 case CONST:
12440 x = XEXP (x, 0);
12441
12442 if (GET_CODE (x) == PLUS)
12443 {
12444 if (!CONST_INT_P (XEXP (x, 1)))
12445 return false;
12446 x = XEXP (x, 0);
12447 }
12448
12449 if (TARGET_MACHO && darwin_local_data_pic (x))
12450 return true;
12451
12452 /* Only some unspecs are valid as "constants". */
12453 if (GET_CODE (x) == UNSPEC)
12454 switch (XINT (x, 1))
12455 {
12456 case UNSPEC_GOT:
12457 case UNSPEC_GOTOFF:
12458 case UNSPEC_PLTOFF:
12459 return TARGET_64BIT;
12460 case UNSPEC_TPOFF:
12461 case UNSPEC_NTPOFF:
12462 x = XVECEXP (x, 0, 0);
12463 return (GET_CODE (x) == SYMBOL_REF
12464 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12465 case UNSPEC_DTPOFF:
12466 x = XVECEXP (x, 0, 0);
12467 return (GET_CODE (x) == SYMBOL_REF
12468 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
12469 default:
12470 return false;
12471 }
12472
12473 /* We must have drilled down to a symbol. */
12474 if (GET_CODE (x) == LABEL_REF)
12475 return true;
12476 if (GET_CODE (x) != SYMBOL_REF)
12477 return false;
12478 /* FALLTHRU */
12479
12480 case SYMBOL_REF:
12481 /* TLS symbols are never valid. */
12482 if (SYMBOL_REF_TLS_MODEL (x))
12483 return false;
12484
12485 /* DLLIMPORT symbols are never valid. */
12486 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12487 && SYMBOL_REF_DLLIMPORT_P (x))
12488 return false;
12489
12490 #if TARGET_MACHO
12491 /* mdynamic-no-pic */
12492 if (MACHO_DYNAMIC_NO_PIC_P)
12493 return machopic_symbol_defined_p (x);
12494 #endif
12495 break;
12496
12497 case CONST_DOUBLE:
12498 if (GET_MODE (x) == TImode
12499 && x != CONST0_RTX (TImode)
12500 && !TARGET_64BIT)
12501 return false;
12502 break;
12503
12504 case CONST_VECTOR:
12505 if (!standard_sse_constant_p (x))
12506 return false;
12507
12508 default:
12509 break;
12510 }
12511
12512 /* Otherwise we handle everything else in the move patterns. */
12513 return true;
12514 }
12515
12516 /* Determine if it's legal to put X into the constant pool. This
12517 is not possible for the address of thread-local symbols, which
12518 is checked above. */
12519
12520 static bool
12521 ix86_cannot_force_const_mem (enum machine_mode mode, rtx x)
12522 {
12523 /* We can always put integral constants and vectors in memory. */
12524 switch (GET_CODE (x))
12525 {
12526 case CONST_INT:
12527 case CONST_DOUBLE:
12528 case CONST_VECTOR:
12529 return false;
12530
12531 default:
12532 break;
12533 }
12534 return !ix86_legitimate_constant_p (mode, x);
12535 }
12536
12537 /* Nonzero if the symbol is marked as dllimport, or as stub-variable,
12538 otherwise zero. */
12539
12540 static bool
12541 is_imported_p (rtx x)
12542 {
12543 if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
12544 || GET_CODE (x) != SYMBOL_REF)
12545 return false;
12546
12547 return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
12548 }
12549
12550
12551 /* Nonzero if the constant value X is a legitimate general operand
12552 when generating PIC code. It is given that flag_pic is on and
12553 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
12554
12555 bool
12556 legitimate_pic_operand_p (rtx x)
12557 {
12558 rtx inner;
12559
12560 switch (GET_CODE (x))
12561 {
12562 case CONST:
12563 inner = XEXP (x, 0);
12564 if (GET_CODE (inner) == PLUS
12565 && CONST_INT_P (XEXP (inner, 1)))
12566 inner = XEXP (inner, 0);
12567
12568 /* Only some unspecs are valid as "constants". */
12569 if (GET_CODE (inner) == UNSPEC)
12570 switch (XINT (inner, 1))
12571 {
12572 case UNSPEC_GOT:
12573 case UNSPEC_GOTOFF:
12574 case UNSPEC_PLTOFF:
12575 return TARGET_64BIT;
12576 case UNSPEC_TPOFF:
12577 x = XVECEXP (inner, 0, 0);
12578 return (GET_CODE (x) == SYMBOL_REF
12579 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
12580 case UNSPEC_MACHOPIC_OFFSET:
12581 return legitimate_pic_address_disp_p (x);
12582 default:
12583 return false;
12584 }
12585 /* FALLTHRU */
12586
12587 case SYMBOL_REF:
12588 case LABEL_REF:
12589 return legitimate_pic_address_disp_p (x);
12590
12591 default:
12592 return true;
12593 }
12594 }
12595
12596 /* Determine if a given CONST RTX is a valid memory displacement
12597 in PIC mode. */
12598
12599 bool
12600 legitimate_pic_address_disp_p (rtx disp)
12601 {
12602 bool saw_plus;
12603
12604 /* In 64bit mode we can allow direct addresses of symbols and labels
12605 when they are not dynamic symbols. */
12606 if (TARGET_64BIT)
12607 {
12608 rtx op0 = disp, op1;
12609
12610 switch (GET_CODE (disp))
12611 {
12612 case LABEL_REF:
12613 return true;
12614
12615 case CONST:
12616 if (GET_CODE (XEXP (disp, 0)) != PLUS)
12617 break;
12618 op0 = XEXP (XEXP (disp, 0), 0);
12619 op1 = XEXP (XEXP (disp, 0), 1);
12620 if (!CONST_INT_P (op1)
12621 || INTVAL (op1) >= 16*1024*1024
12622 || INTVAL (op1) < -16*1024*1024)
12623 break;
12624 if (GET_CODE (op0) == LABEL_REF)
12625 return true;
12626 if (GET_CODE (op0) == CONST
12627 && GET_CODE (XEXP (op0, 0)) == UNSPEC
12628 && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
12629 return true;
12630 if (GET_CODE (op0) == UNSPEC
12631 && XINT (op0, 1) == UNSPEC_PCREL)
12632 return true;
12633 if (GET_CODE (op0) != SYMBOL_REF)
12634 break;
12635 /* FALLTHRU */
12636
12637 case SYMBOL_REF:
12638 /* TLS references should always be enclosed in UNSPEC.
12639 The dllimported symbol needs always to be resolved. */
12640 if (SYMBOL_REF_TLS_MODEL (op0)
12641 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
12642 return false;
12643
12644 if (TARGET_PECOFF)
12645 {
12646 if (is_imported_p (op0))
12647 return true;
12648
12649 if (SYMBOL_REF_FAR_ADDR_P (op0)
12650 || !SYMBOL_REF_LOCAL_P (op0))
12651 break;
12652
12653 /* Function-symbols need to be resolved only for
12654 large-model.
12655 For the small-model we don't need to resolve anything
12656 here. */
12657 if ((ix86_cmodel != CM_LARGE_PIC
12658 && SYMBOL_REF_FUNCTION_P (op0))
12659 || ix86_cmodel == CM_SMALL_PIC)
12660 return true;
12661 /* Non-external symbols don't need to be resolved for
12662 large, and medium-model. */
12663 if ((ix86_cmodel == CM_LARGE_PIC
12664 || ix86_cmodel == CM_MEDIUM_PIC)
12665 && !SYMBOL_REF_EXTERNAL_P (op0))
12666 return true;
12667 }
12668 else if (!SYMBOL_REF_FAR_ADDR_P (op0)
12669 && SYMBOL_REF_LOCAL_P (op0)
12670 && ix86_cmodel != CM_LARGE_PIC)
12671 return true;
12672 break;
12673
12674 default:
12675 break;
12676 }
12677 }
12678 if (GET_CODE (disp) != CONST)
12679 return false;
12680 disp = XEXP (disp, 0);
12681
12682 if (TARGET_64BIT)
12683 {
12684 /* We are unsafe to allow PLUS expressions. This limit allowed distance
12685 of GOT tables. We should not need these anyway. */
12686 if (GET_CODE (disp) != UNSPEC
12687 || (XINT (disp, 1) != UNSPEC_GOTPCREL
12688 && XINT (disp, 1) != UNSPEC_GOTOFF
12689 && XINT (disp, 1) != UNSPEC_PCREL
12690 && XINT (disp, 1) != UNSPEC_PLTOFF))
12691 return false;
12692
12693 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
12694 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
12695 return false;
12696 return true;
12697 }
12698
12699 saw_plus = false;
12700 if (GET_CODE (disp) == PLUS)
12701 {
12702 if (!CONST_INT_P (XEXP (disp, 1)))
12703 return false;
12704 disp = XEXP (disp, 0);
12705 saw_plus = true;
12706 }
12707
12708 if (TARGET_MACHO && darwin_local_data_pic (disp))
12709 return true;
12710
12711 if (GET_CODE (disp) != UNSPEC)
12712 return false;
12713
12714 switch (XINT (disp, 1))
12715 {
12716 case UNSPEC_GOT:
12717 if (saw_plus)
12718 return false;
12719 /* We need to check for both symbols and labels because VxWorks loads
12720 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
12721 details. */
12722 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12723 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
12724 case UNSPEC_GOTOFF:
12725 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
12726 While ABI specify also 32bit relocation but we don't produce it in
12727 small PIC model at all. */
12728 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
12729 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
12730 && !TARGET_64BIT)
12731 return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
12732 return false;
12733 case UNSPEC_GOTTPOFF:
12734 case UNSPEC_GOTNTPOFF:
12735 case UNSPEC_INDNTPOFF:
12736 if (saw_plus)
12737 return false;
12738 disp = XVECEXP (disp, 0, 0);
12739 return (GET_CODE (disp) == SYMBOL_REF
12740 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
12741 case UNSPEC_NTPOFF:
12742 disp = XVECEXP (disp, 0, 0);
12743 return (GET_CODE (disp) == SYMBOL_REF
12744 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
12745 case UNSPEC_DTPOFF:
12746 disp = XVECEXP (disp, 0, 0);
12747 return (GET_CODE (disp) == SYMBOL_REF
12748 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
12749 }
12750
12751 return false;
12752 }
12753
12754 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
12755 replace the input X, or the original X if no replacement is called for.
12756 The output parameter *WIN is 1 if the calling macro should goto WIN,
12757 0 if it should not. */
12758
12759 bool
12760 ix86_legitimize_reload_address (rtx x,
12761 enum machine_mode mode ATTRIBUTE_UNUSED,
12762 int opnum, int type,
12763 int ind_levels ATTRIBUTE_UNUSED)
12764 {
12765 /* Reload can generate:
12766
12767 (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
12768 (reg:DI 97))
12769 (reg:DI 2 cx))
12770
12771 This RTX is rejected from ix86_legitimate_address_p due to
12772 non-strictness of base register 97. Following this rejection,
12773 reload pushes all three components into separate registers,
12774 creating invalid memory address RTX.
12775
12776 Following code reloads only the invalid part of the
12777 memory address RTX. */
12778
12779 if (GET_CODE (x) == PLUS
12780 && REG_P (XEXP (x, 1))
12781 && GET_CODE (XEXP (x, 0)) == PLUS
12782 && REG_P (XEXP (XEXP (x, 0), 1)))
12783 {
12784 rtx base, index;
12785 bool something_reloaded = false;
12786
12787 base = XEXP (XEXP (x, 0), 1);
12788 if (!REG_OK_FOR_BASE_STRICT_P (base))
12789 {
12790 push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
12791 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12792 opnum, (enum reload_type) type);
12793 something_reloaded = true;
12794 }
12795
12796 index = XEXP (x, 1);
12797 if (!REG_OK_FOR_INDEX_STRICT_P (index))
12798 {
12799 push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
12800 INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
12801 opnum, (enum reload_type) type);
12802 something_reloaded = true;
12803 }
12804
12805 gcc_assert (something_reloaded);
12806 return true;
12807 }
12808
12809 return false;
12810 }
12811
12812 /* Determine if op is suitable RTX for an address register.
12813 Return naked register if a register or a register subreg is
12814 found, otherwise return NULL_RTX. */
12815
12816 static rtx
12817 ix86_validate_address_register (rtx op)
12818 {
12819 enum machine_mode mode = GET_MODE (op);
12820
12821 /* Only SImode or DImode registers can form the address. */
12822 if (mode != SImode && mode != DImode)
12823 return NULL_RTX;
12824
12825 if (REG_P (op))
12826 return op;
12827 else if (GET_CODE (op) == SUBREG)
12828 {
12829 rtx reg = SUBREG_REG (op);
12830
12831 if (!REG_P (reg))
12832 return NULL_RTX;
12833
12834 mode = GET_MODE (reg);
12835
12836 /* Don't allow SUBREGs that span more than a word. It can
12837 lead to spill failures when the register is one word out
12838 of a two word structure. */
12839 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
12840 return NULL_RTX;
12841
12842 /* Allow only SUBREGs of non-eliminable hard registers. */
12843 if (register_no_elim_operand (reg, mode))
12844 return reg;
12845 }
12846
12847 /* Op is not a register. */
12848 return NULL_RTX;
12849 }
12850
12851 /* Recognizes RTL expressions that are valid memory addresses for an
12852 instruction. The MODE argument is the machine mode for the MEM
12853 expression that wants to use this address.
12854
12855 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
12856 convert common non-canonical forms to canonical form so that they will
12857 be recognized. */
12858
12859 static bool
12860 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
12861 rtx addr, bool strict)
12862 {
12863 struct ix86_address parts;
12864 rtx base, index, disp;
12865 HOST_WIDE_INT scale;
12866 enum ix86_address_seg seg;
12867
12868 if (ix86_decompose_address (addr, &parts) <= 0)
12869 /* Decomposition failed. */
12870 return false;
12871
12872 base = parts.base;
12873 index = parts.index;
12874 disp = parts.disp;
12875 scale = parts.scale;
12876 seg = parts.seg;
12877
12878 /* Validate base register. */
12879 if (base)
12880 {
12881 rtx reg = ix86_validate_address_register (base);
12882
12883 if (reg == NULL_RTX)
12884 return false;
12885
12886 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
12887 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
12888 /* Base is not valid. */
12889 return false;
12890 }
12891
12892 /* Validate index register. */
12893 if (index)
12894 {
12895 rtx reg = ix86_validate_address_register (index);
12896
12897 if (reg == NULL_RTX)
12898 return false;
12899
12900 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
12901 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
12902 /* Index is not valid. */
12903 return false;
12904 }
12905
12906 /* Index and base should have the same mode. */
12907 if (base && index
12908 && GET_MODE (base) != GET_MODE (index))
12909 return false;
12910
12911 /* Address override works only on the (%reg) part of %fs:(%reg). */
12912 if (seg != SEG_DEFAULT
12913 && ((base && GET_MODE (base) != word_mode)
12914 || (index && GET_MODE (index) != word_mode)))
12915 return false;
12916
12917 /* Validate scale factor. */
12918 if (scale != 1)
12919 {
12920 if (!index)
12921 /* Scale without index. */
12922 return false;
12923
12924 if (scale != 2 && scale != 4 && scale != 8)
12925 /* Scale is not a valid multiplier. */
12926 return false;
12927 }
12928
12929 /* Validate displacement. */
12930 if (disp)
12931 {
12932 if (GET_CODE (disp) == CONST
12933 && GET_CODE (XEXP (disp, 0)) == UNSPEC
12934 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
12935 switch (XINT (XEXP (disp, 0), 1))
12936 {
12937 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
12938 used. While ABI specify also 32bit relocations, we don't produce
12939 them at all and use IP relative instead. */
12940 case UNSPEC_GOT:
12941 case UNSPEC_GOTOFF:
12942 gcc_assert (flag_pic);
12943 if (!TARGET_64BIT)
12944 goto is_legitimate_pic;
12945
12946 /* 64bit address unspec. */
12947 return false;
12948
12949 case UNSPEC_GOTPCREL:
12950 case UNSPEC_PCREL:
12951 gcc_assert (flag_pic);
12952 goto is_legitimate_pic;
12953
12954 case UNSPEC_GOTTPOFF:
12955 case UNSPEC_GOTNTPOFF:
12956 case UNSPEC_INDNTPOFF:
12957 case UNSPEC_NTPOFF:
12958 case UNSPEC_DTPOFF:
12959 break;
12960
12961 case UNSPEC_STACK_CHECK:
12962 gcc_assert (flag_split_stack);
12963 break;
12964
12965 default:
12966 /* Invalid address unspec. */
12967 return false;
12968 }
12969
12970 else if (SYMBOLIC_CONST (disp)
12971 && (flag_pic
12972 || (TARGET_MACHO
12973 #if TARGET_MACHO
12974 && MACHOPIC_INDIRECT
12975 && !machopic_operand_p (disp)
12976 #endif
12977 )))
12978 {
12979
12980 is_legitimate_pic:
12981 if (TARGET_64BIT && (index || base))
12982 {
12983 /* foo@dtpoff(%rX) is ok. */
12984 if (GET_CODE (disp) != CONST
12985 || GET_CODE (XEXP (disp, 0)) != PLUS
12986 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
12987 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
12988 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
12989 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
12990 /* Non-constant pic memory reference. */
12991 return false;
12992 }
12993 else if ((!TARGET_MACHO || flag_pic)
12994 && ! legitimate_pic_address_disp_p (disp))
12995 /* Displacement is an invalid pic construct. */
12996 return false;
12997 #if TARGET_MACHO
12998 else if (MACHO_DYNAMIC_NO_PIC_P
12999 && !ix86_legitimate_constant_p (Pmode, disp))
13000 /* displacment must be referenced via non_lazy_pointer */
13001 return false;
13002 #endif
13003
13004 /* This code used to verify that a symbolic pic displacement
13005 includes the pic_offset_table_rtx register.
13006
13007 While this is good idea, unfortunately these constructs may
13008 be created by "adds using lea" optimization for incorrect
13009 code like:
13010
13011 int a;
13012 int foo(int i)
13013 {
13014 return *(&a+i);
13015 }
13016
13017 This code is nonsensical, but results in addressing
13018 GOT table with pic_offset_table_rtx base. We can't
13019 just refuse it easily, since it gets matched by
13020 "addsi3" pattern, that later gets split to lea in the
13021 case output register differs from input. While this
13022 can be handled by separate addsi pattern for this case
13023 that never results in lea, this seems to be easier and
13024 correct fix for crash to disable this test. */
13025 }
13026 else if (GET_CODE (disp) != LABEL_REF
13027 && !CONST_INT_P (disp)
13028 && (GET_CODE (disp) != CONST
13029 || !ix86_legitimate_constant_p (Pmode, disp))
13030 && (GET_CODE (disp) != SYMBOL_REF
13031 || !ix86_legitimate_constant_p (Pmode, disp)))
13032 /* Displacement is not constant. */
13033 return false;
13034 else if (TARGET_64BIT
13035 && !x86_64_immediate_operand (disp, VOIDmode))
13036 /* Displacement is out of range. */
13037 return false;
13038 /* In x32 mode, constant addresses are sign extended to 64bit, so
13039 we have to prevent addresses from 0x80000000 to 0xffffffff. */
13040 else if (TARGET_X32 && !(index || base)
13041 && CONST_INT_P (disp)
13042 && val_signbit_known_set_p (SImode, INTVAL (disp)))
13043 return false;
13044 }
13045
13046 /* Everything looks valid. */
13047 return true;
13048 }
13049
13050 /* Determine if a given RTX is a valid constant address. */
13051
13052 bool
13053 constant_address_p (rtx x)
13054 {
13055 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13056 }
13057 \f
13058 /* Return a unique alias set for the GOT. */
13059
13060 static alias_set_type
13061 ix86_GOT_alias_set (void)
13062 {
13063 static alias_set_type set = -1;
13064 if (set == -1)
13065 set = new_alias_set ();
13066 return set;
13067 }
13068
13069 /* Return a legitimate reference for ORIG (an address) using the
13070 register REG. If REG is 0, a new pseudo is generated.
13071
13072 There are two types of references that must be handled:
13073
13074 1. Global data references must load the address from the GOT, via
13075 the PIC reg. An insn is emitted to do this load, and the reg is
13076 returned.
13077
13078 2. Static data references, constant pool addresses, and code labels
13079 compute the address as an offset from the GOT, whose base is in
13080 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
13081 differentiate them from global data objects. The returned
13082 address is the PIC reg + an unspec constant.
13083
13084 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13085 reg also appears in the address. */
13086
13087 static rtx
13088 legitimize_pic_address (rtx orig, rtx reg)
13089 {
13090 rtx addr = orig;
13091 rtx new_rtx = orig;
13092
13093 #if TARGET_MACHO
13094 if (TARGET_MACHO && !TARGET_64BIT)
13095 {
13096 if (reg == 0)
13097 reg = gen_reg_rtx (Pmode);
13098 /* Use the generic Mach-O PIC machinery. */
13099 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13100 }
13101 #endif
13102
13103 if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13104 {
13105 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13106 if (tmp)
13107 return tmp;
13108 }
13109
13110 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13111 new_rtx = addr;
13112 else if (TARGET_64BIT && !TARGET_PECOFF
13113 && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13114 {
13115 rtx tmpreg;
13116 /* This symbol may be referenced via a displacement from the PIC
13117 base address (@GOTOFF). */
13118
13119 if (reload_in_progress)
13120 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13121 if (GET_CODE (addr) == CONST)
13122 addr = XEXP (addr, 0);
13123 if (GET_CODE (addr) == PLUS)
13124 {
13125 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13126 UNSPEC_GOTOFF);
13127 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13128 }
13129 else
13130 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13131 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13132 if (!reg)
13133 tmpreg = gen_reg_rtx (Pmode);
13134 else
13135 tmpreg = reg;
13136 emit_move_insn (tmpreg, new_rtx);
13137
13138 if (reg != 0)
13139 {
13140 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13141 tmpreg, 1, OPTAB_DIRECT);
13142 new_rtx = reg;
13143 }
13144 else
13145 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13146 }
13147 else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13148 {
13149 /* This symbol may be referenced via a displacement from the PIC
13150 base address (@GOTOFF). */
13151
13152 if (reload_in_progress)
13153 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13154 if (GET_CODE (addr) == CONST)
13155 addr = XEXP (addr, 0);
13156 if (GET_CODE (addr) == PLUS)
13157 {
13158 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13159 UNSPEC_GOTOFF);
13160 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13161 }
13162 else
13163 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13164 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13165 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13166
13167 if (reg != 0)
13168 {
13169 emit_move_insn (reg, new_rtx);
13170 new_rtx = reg;
13171 }
13172 }
13173 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13174 /* We can't use @GOTOFF for text labels on VxWorks;
13175 see gotoff_operand. */
13176 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13177 {
13178 rtx tmp = legitimize_pe_coff_symbol (addr, true);
13179 if (tmp)
13180 return tmp;
13181
13182 /* For x64 PE-COFF there is no GOT table. So we use address
13183 directly. */
13184 if (TARGET_64BIT && TARGET_PECOFF)
13185 {
13186 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13187 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13188
13189 if (reg == 0)
13190 reg = gen_reg_rtx (Pmode);
13191 emit_move_insn (reg, new_rtx);
13192 new_rtx = reg;
13193 }
13194 else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13195 {
13196 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13197 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13198 new_rtx = gen_const_mem (Pmode, new_rtx);
13199 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13200
13201 if (reg == 0)
13202 reg = gen_reg_rtx (Pmode);
13203 /* Use directly gen_movsi, otherwise the address is loaded
13204 into register for CSE. We don't want to CSE this addresses,
13205 instead we CSE addresses from the GOT table, so skip this. */
13206 emit_insn (gen_movsi (reg, new_rtx));
13207 new_rtx = reg;
13208 }
13209 else
13210 {
13211 /* This symbol must be referenced via a load from the
13212 Global Offset Table (@GOT). */
13213
13214 if (reload_in_progress)
13215 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13216 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13217 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13218 if (TARGET_64BIT)
13219 new_rtx = force_reg (Pmode, new_rtx);
13220 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13221 new_rtx = gen_const_mem (Pmode, new_rtx);
13222 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13223
13224 if (reg == 0)
13225 reg = gen_reg_rtx (Pmode);
13226 emit_move_insn (reg, new_rtx);
13227 new_rtx = reg;
13228 }
13229 }
13230 else
13231 {
13232 if (CONST_INT_P (addr)
13233 && !x86_64_immediate_operand (addr, VOIDmode))
13234 {
13235 if (reg)
13236 {
13237 emit_move_insn (reg, addr);
13238 new_rtx = reg;
13239 }
13240 else
13241 new_rtx = force_reg (Pmode, addr);
13242 }
13243 else if (GET_CODE (addr) == CONST)
13244 {
13245 addr = XEXP (addr, 0);
13246
13247 /* We must match stuff we generate before. Assume the only
13248 unspecs that can get here are ours. Not that we could do
13249 anything with them anyway.... */
13250 if (GET_CODE (addr) == UNSPEC
13251 || (GET_CODE (addr) == PLUS
13252 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13253 return orig;
13254 gcc_assert (GET_CODE (addr) == PLUS);
13255 }
13256 if (GET_CODE (addr) == PLUS)
13257 {
13258 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13259
13260 /* Check first to see if this is a constant offset from a @GOTOFF
13261 symbol reference. */
13262 if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13263 && CONST_INT_P (op1))
13264 {
13265 if (!TARGET_64BIT)
13266 {
13267 if (reload_in_progress)
13268 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13269 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13270 UNSPEC_GOTOFF);
13271 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13272 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13273 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13274
13275 if (reg != 0)
13276 {
13277 emit_move_insn (reg, new_rtx);
13278 new_rtx = reg;
13279 }
13280 }
13281 else
13282 {
13283 if (INTVAL (op1) < -16*1024*1024
13284 || INTVAL (op1) >= 16*1024*1024)
13285 {
13286 if (!x86_64_immediate_operand (op1, Pmode))
13287 op1 = force_reg (Pmode, op1);
13288 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13289 }
13290 }
13291 }
13292 else
13293 {
13294 rtx base = legitimize_pic_address (op0, reg);
13295 enum machine_mode mode = GET_MODE (base);
13296 new_rtx
13297 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13298
13299 if (CONST_INT_P (new_rtx))
13300 {
13301 if (INTVAL (new_rtx) < -16*1024*1024
13302 || INTVAL (new_rtx) >= 16*1024*1024)
13303 {
13304 if (!x86_64_immediate_operand (new_rtx, mode))
13305 new_rtx = force_reg (mode, new_rtx);
13306 new_rtx
13307 = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
13308 }
13309 else
13310 new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
13311 }
13312 else
13313 {
13314 if (GET_CODE (new_rtx) == PLUS
13315 && CONSTANT_P (XEXP (new_rtx, 1)))
13316 {
13317 base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
13318 new_rtx = XEXP (new_rtx, 1);
13319 }
13320 new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
13321 }
13322 }
13323 }
13324 }
13325 return new_rtx;
13326 }
13327 \f
13328 /* Load the thread pointer. If TO_REG is true, force it into a register. */
13329
13330 static rtx
13331 get_thread_pointer (enum machine_mode tp_mode, bool to_reg)
13332 {
13333 rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
13334
13335 if (GET_MODE (tp) != tp_mode)
13336 {
13337 gcc_assert (GET_MODE (tp) == SImode);
13338 gcc_assert (tp_mode == DImode);
13339
13340 tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
13341 }
13342
13343 if (to_reg)
13344 tp = copy_to_mode_reg (tp_mode, tp);
13345
13346 return tp;
13347 }
13348
13349 /* Construct the SYMBOL_REF for the tls_get_addr function. */
13350
13351 static GTY(()) rtx ix86_tls_symbol;
13352
13353 static rtx
13354 ix86_tls_get_addr (void)
13355 {
13356 if (!ix86_tls_symbol)
13357 {
13358 const char *sym
13359 = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
13360 ? "___tls_get_addr" : "__tls_get_addr");
13361
13362 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
13363 }
13364
13365 if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
13366 {
13367 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
13368 UNSPEC_PLTOFF);
13369 return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
13370 gen_rtx_CONST (Pmode, unspec));
13371 }
13372
13373 return ix86_tls_symbol;
13374 }
13375
13376 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
13377
13378 static GTY(()) rtx ix86_tls_module_base_symbol;
13379
13380 rtx
13381 ix86_tls_module_base (void)
13382 {
13383 if (!ix86_tls_module_base_symbol)
13384 {
13385 ix86_tls_module_base_symbol
13386 = gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
13387
13388 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
13389 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
13390 }
13391
13392 return ix86_tls_module_base_symbol;
13393 }
13394
13395 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
13396 false if we expect this to be used for a memory address and true if
13397 we expect to load the address into a register. */
13398
13399 static rtx
13400 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
13401 {
13402 rtx dest, base, off;
13403 rtx pic = NULL_RTX, tp = NULL_RTX;
13404 enum machine_mode tp_mode = Pmode;
13405 int type;
13406
13407 /* Fall back to global dynamic model if tool chain cannot support local
13408 dynamic. */
13409 if (TARGET_SUN_TLS && !TARGET_64BIT
13410 && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
13411 && model == TLS_MODEL_LOCAL_DYNAMIC)
13412 model = TLS_MODEL_GLOBAL_DYNAMIC;
13413
13414 switch (model)
13415 {
13416 case TLS_MODEL_GLOBAL_DYNAMIC:
13417 dest = gen_reg_rtx (Pmode);
13418
13419 if (!TARGET_64BIT)
13420 {
13421 if (flag_pic && !TARGET_PECOFF)
13422 pic = pic_offset_table_rtx;
13423 else
13424 {
13425 pic = gen_reg_rtx (Pmode);
13426 emit_insn (gen_set_got (pic));
13427 }
13428 }
13429
13430 if (TARGET_GNU2_TLS)
13431 {
13432 if (TARGET_64BIT)
13433 emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
13434 else
13435 emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
13436
13437 tp = get_thread_pointer (Pmode, true);
13438 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
13439
13440 if (GET_MODE (x) != Pmode)
13441 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13442
13443 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13444 }
13445 else
13446 {
13447 rtx caddr = ix86_tls_get_addr ();
13448
13449 if (TARGET_64BIT)
13450 {
13451 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13452 rtx insns;
13453
13454 start_sequence ();
13455 emit_call_insn
13456 (ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
13457 insns = get_insns ();
13458 end_sequence ();
13459
13460 if (GET_MODE (x) != Pmode)
13461 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13462
13463 RTL_CONST_CALL_P (insns) = 1;
13464 emit_libcall_block (insns, dest, rax, x);
13465 }
13466 else
13467 emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
13468 }
13469 break;
13470
13471 case TLS_MODEL_LOCAL_DYNAMIC:
13472 base = gen_reg_rtx (Pmode);
13473
13474 if (!TARGET_64BIT)
13475 {
13476 if (flag_pic)
13477 pic = pic_offset_table_rtx;
13478 else
13479 {
13480 pic = gen_reg_rtx (Pmode);
13481 emit_insn (gen_set_got (pic));
13482 }
13483 }
13484
13485 if (TARGET_GNU2_TLS)
13486 {
13487 rtx tmp = ix86_tls_module_base ();
13488
13489 if (TARGET_64BIT)
13490 emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
13491 else
13492 emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
13493
13494 tp = get_thread_pointer (Pmode, true);
13495 set_unique_reg_note (get_last_insn (), REG_EQUAL,
13496 gen_rtx_MINUS (Pmode, tmp, tp));
13497 }
13498 else
13499 {
13500 rtx caddr = ix86_tls_get_addr ();
13501
13502 if (TARGET_64BIT)
13503 {
13504 rtx rax = gen_rtx_REG (Pmode, AX_REG);
13505 rtx insns, eqv;
13506
13507 start_sequence ();
13508 emit_call_insn
13509 (ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
13510 insns = get_insns ();
13511 end_sequence ();
13512
13513 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
13514 share the LD_BASE result with other LD model accesses. */
13515 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
13516 UNSPEC_TLS_LD_BASE);
13517
13518 RTL_CONST_CALL_P (insns) = 1;
13519 emit_libcall_block (insns, base, rax, eqv);
13520 }
13521 else
13522 emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
13523 }
13524
13525 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
13526 off = gen_rtx_CONST (Pmode, off);
13527
13528 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
13529
13530 if (TARGET_GNU2_TLS)
13531 {
13532 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
13533
13534 if (GET_MODE (x) != Pmode)
13535 x = gen_rtx_ZERO_EXTEND (Pmode, x);
13536
13537 set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
13538 }
13539 break;
13540
13541 case TLS_MODEL_INITIAL_EXEC:
13542 if (TARGET_64BIT)
13543 {
13544 if (TARGET_SUN_TLS && !TARGET_X32)
13545 {
13546 /* The Sun linker took the AMD64 TLS spec literally
13547 and can only handle %rax as destination of the
13548 initial executable code sequence. */
13549
13550 dest = gen_reg_rtx (DImode);
13551 emit_insn (gen_tls_initial_exec_64_sun (dest, x));
13552 return dest;
13553 }
13554
13555 /* Generate DImode references to avoid %fs:(%reg32)
13556 problems and linker IE->LE relaxation bug. */
13557 tp_mode = DImode;
13558 pic = NULL;
13559 type = UNSPEC_GOTNTPOFF;
13560 }
13561 else if (flag_pic)
13562 {
13563 if (reload_in_progress)
13564 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
13565 pic = pic_offset_table_rtx;
13566 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
13567 }
13568 else if (!TARGET_ANY_GNU_TLS)
13569 {
13570 pic = gen_reg_rtx (Pmode);
13571 emit_insn (gen_set_got (pic));
13572 type = UNSPEC_GOTTPOFF;
13573 }
13574 else
13575 {
13576 pic = NULL;
13577 type = UNSPEC_INDNTPOFF;
13578 }
13579
13580 off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
13581 off = gen_rtx_CONST (tp_mode, off);
13582 if (pic)
13583 off = gen_rtx_PLUS (tp_mode, pic, off);
13584 off = gen_const_mem (tp_mode, off);
13585 set_mem_alias_set (off, ix86_GOT_alias_set ());
13586
13587 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13588 {
13589 base = get_thread_pointer (tp_mode,
13590 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13591 off = force_reg (tp_mode, off);
13592 return gen_rtx_PLUS (tp_mode, base, off);
13593 }
13594 else
13595 {
13596 base = get_thread_pointer (Pmode, true);
13597 dest = gen_reg_rtx (Pmode);
13598 emit_insn (ix86_gen_sub3 (dest, base, off));
13599 }
13600 break;
13601
13602 case TLS_MODEL_LOCAL_EXEC:
13603 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
13604 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13605 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
13606 off = gen_rtx_CONST (Pmode, off);
13607
13608 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
13609 {
13610 base = get_thread_pointer (Pmode,
13611 for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
13612 return gen_rtx_PLUS (Pmode, base, off);
13613 }
13614 else
13615 {
13616 base = get_thread_pointer (Pmode, true);
13617 dest = gen_reg_rtx (Pmode);
13618 emit_insn (ix86_gen_sub3 (dest, base, off));
13619 }
13620 break;
13621
13622 default:
13623 gcc_unreachable ();
13624 }
13625
13626 return dest;
13627 }
13628
13629 /* Create or return the unique __imp_DECL dllimport symbol corresponding
13630 to symbol DECL if BEIMPORT is true. Otherwise create or return the
13631 unique refptr-DECL symbol corresponding to symbol DECL. */
13632
13633 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
13634 htab_t dllimport_map;
13635
13636 static tree
13637 get_dllimport_decl (tree decl, bool beimport)
13638 {
13639 struct tree_map *h, in;
13640 void **loc;
13641 const char *name;
13642 const char *prefix;
13643 size_t namelen, prefixlen;
13644 char *imp_name;
13645 tree to;
13646 rtx rtl;
13647
13648 if (!dllimport_map)
13649 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
13650
13651 in.hash = htab_hash_pointer (decl);
13652 in.base.from = decl;
13653 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
13654 h = (struct tree_map *) *loc;
13655 if (h)
13656 return h->to;
13657
13658 *loc = h = ggc_alloc_tree_map ();
13659 h->hash = in.hash;
13660 h->base.from = decl;
13661 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
13662 VAR_DECL, NULL, ptr_type_node);
13663 DECL_ARTIFICIAL (to) = 1;
13664 DECL_IGNORED_P (to) = 1;
13665 DECL_EXTERNAL (to) = 1;
13666 TREE_READONLY (to) = 1;
13667
13668 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
13669 name = targetm.strip_name_encoding (name);
13670 if (beimport)
13671 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
13672 ? "*__imp_" : "*__imp__";
13673 else
13674 prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
13675 namelen = strlen (name);
13676 prefixlen = strlen (prefix);
13677 imp_name = (char *) alloca (namelen + prefixlen + 1);
13678 memcpy (imp_name, prefix, prefixlen);
13679 memcpy (imp_name + prefixlen, name, namelen + 1);
13680
13681 name = ggc_alloc_string (imp_name, namelen + prefixlen);
13682 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
13683 SET_SYMBOL_REF_DECL (rtl, to);
13684 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
13685 if (!beimport)
13686 {
13687 SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
13688 #ifdef SUB_TARGET_RECORD_STUB
13689 SUB_TARGET_RECORD_STUB (name);
13690 #endif
13691 }
13692
13693 rtl = gen_const_mem (Pmode, rtl);
13694 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
13695
13696 SET_DECL_RTL (to, rtl);
13697 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
13698
13699 return to;
13700 }
13701
13702 /* Expand SYMBOL into its corresponding far-addresse symbol.
13703 WANT_REG is true if we require the result be a register. */
13704
13705 static rtx
13706 legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
13707 {
13708 tree imp_decl;
13709 rtx x;
13710
13711 gcc_assert (SYMBOL_REF_DECL (symbol));
13712 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
13713
13714 x = DECL_RTL (imp_decl);
13715 if (want_reg)
13716 x = force_reg (Pmode, x);
13717 return x;
13718 }
13719
13720 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
13721 true if we require the result be a register. */
13722
13723 static rtx
13724 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
13725 {
13726 tree imp_decl;
13727 rtx x;
13728
13729 gcc_assert (SYMBOL_REF_DECL (symbol));
13730 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
13731
13732 x = DECL_RTL (imp_decl);
13733 if (want_reg)
13734 x = force_reg (Pmode, x);
13735 return x;
13736 }
13737
13738 /* Expand SYMBOL into its corresponding dllimport or refptr symbol. WANT_REG
13739 is true if we require the result be a register. */
13740
13741 static rtx
13742 legitimize_pe_coff_symbol (rtx addr, bool inreg)
13743 {
13744 if (!TARGET_PECOFF)
13745 return NULL_RTX;
13746
13747 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13748 {
13749 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
13750 return legitimize_dllimport_symbol (addr, inreg);
13751 if (GET_CODE (addr) == CONST
13752 && GET_CODE (XEXP (addr, 0)) == PLUS
13753 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13754 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
13755 {
13756 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
13757 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13758 }
13759 }
13760
13761 if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
13762 return NULL_RTX;
13763 if (GET_CODE (addr) == SYMBOL_REF
13764 && !is_imported_p (addr)
13765 && SYMBOL_REF_EXTERNAL_P (addr)
13766 && SYMBOL_REF_DECL (addr))
13767 return legitimize_pe_coff_extern_decl (addr, inreg);
13768
13769 if (GET_CODE (addr) == CONST
13770 && GET_CODE (XEXP (addr, 0)) == PLUS
13771 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
13772 && !is_imported_p (XEXP (XEXP (addr, 0), 0))
13773 && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
13774 && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
13775 {
13776 rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
13777 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
13778 }
13779 return NULL_RTX;
13780 }
13781
13782 /* Try machine-dependent ways of modifying an illegitimate address
13783 to be legitimate. If we find one, return the new, valid address.
13784 This macro is used in only one place: `memory_address' in explow.c.
13785
13786 OLDX is the address as it was before break_out_memory_refs was called.
13787 In some cases it is useful to look at this to decide what needs to be done.
13788
13789 It is always safe for this macro to do nothing. It exists to recognize
13790 opportunities to optimize the output.
13791
13792 For the 80386, we handle X+REG by loading X into a register R and
13793 using R+REG. R will go in a general reg and indexing will be used.
13794 However, if REG is a broken-out memory address or multiplication,
13795 nothing needs to be done because REG can certainly go in a general reg.
13796
13797 When -fpic is used, special handling is needed for symbolic references.
13798 See comments by legitimize_pic_address in i386.c for details. */
13799
13800 static rtx
13801 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
13802 enum machine_mode mode)
13803 {
13804 int changed = 0;
13805 unsigned log;
13806
13807 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
13808 if (log)
13809 return legitimize_tls_address (x, (enum tls_model) log, false);
13810 if (GET_CODE (x) == CONST
13811 && GET_CODE (XEXP (x, 0)) == PLUS
13812 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
13813 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
13814 {
13815 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
13816 (enum tls_model) log, false);
13817 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
13818 }
13819
13820 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13821 {
13822 rtx tmp = legitimize_pe_coff_symbol (x, true);
13823 if (tmp)
13824 return tmp;
13825 }
13826
13827 if (flag_pic && SYMBOLIC_CONST (x))
13828 return legitimize_pic_address (x, 0);
13829
13830 #if TARGET_MACHO
13831 if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
13832 return machopic_indirect_data_reference (x, 0);
13833 #endif
13834
13835 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
13836 if (GET_CODE (x) == ASHIFT
13837 && CONST_INT_P (XEXP (x, 1))
13838 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
13839 {
13840 changed = 1;
13841 log = INTVAL (XEXP (x, 1));
13842 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
13843 GEN_INT (1 << log));
13844 }
13845
13846 if (GET_CODE (x) == PLUS)
13847 {
13848 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
13849
13850 if (GET_CODE (XEXP (x, 0)) == ASHIFT
13851 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
13852 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
13853 {
13854 changed = 1;
13855 log = INTVAL (XEXP (XEXP (x, 0), 1));
13856 XEXP (x, 0) = gen_rtx_MULT (Pmode,
13857 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
13858 GEN_INT (1 << log));
13859 }
13860
13861 if (GET_CODE (XEXP (x, 1)) == ASHIFT
13862 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
13863 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
13864 {
13865 changed = 1;
13866 log = INTVAL (XEXP (XEXP (x, 1), 1));
13867 XEXP (x, 1) = gen_rtx_MULT (Pmode,
13868 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
13869 GEN_INT (1 << log));
13870 }
13871
13872 /* Put multiply first if it isn't already. */
13873 if (GET_CODE (XEXP (x, 1)) == MULT)
13874 {
13875 rtx tmp = XEXP (x, 0);
13876 XEXP (x, 0) = XEXP (x, 1);
13877 XEXP (x, 1) = tmp;
13878 changed = 1;
13879 }
13880
13881 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
13882 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
13883 created by virtual register instantiation, register elimination, and
13884 similar optimizations. */
13885 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
13886 {
13887 changed = 1;
13888 x = gen_rtx_PLUS (Pmode,
13889 gen_rtx_PLUS (Pmode, XEXP (x, 0),
13890 XEXP (XEXP (x, 1), 0)),
13891 XEXP (XEXP (x, 1), 1));
13892 }
13893
13894 /* Canonicalize
13895 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
13896 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
13897 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
13898 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
13899 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
13900 && CONSTANT_P (XEXP (x, 1)))
13901 {
13902 rtx constant;
13903 rtx other = NULL_RTX;
13904
13905 if (CONST_INT_P (XEXP (x, 1)))
13906 {
13907 constant = XEXP (x, 1);
13908 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
13909 }
13910 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
13911 {
13912 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
13913 other = XEXP (x, 1);
13914 }
13915 else
13916 constant = 0;
13917
13918 if (constant)
13919 {
13920 changed = 1;
13921 x = gen_rtx_PLUS (Pmode,
13922 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
13923 XEXP (XEXP (XEXP (x, 0), 1), 0)),
13924 plus_constant (Pmode, other,
13925 INTVAL (constant)));
13926 }
13927 }
13928
13929 if (changed && ix86_legitimate_address_p (mode, x, false))
13930 return x;
13931
13932 if (GET_CODE (XEXP (x, 0)) == MULT)
13933 {
13934 changed = 1;
13935 XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
13936 }
13937
13938 if (GET_CODE (XEXP (x, 1)) == MULT)
13939 {
13940 changed = 1;
13941 XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
13942 }
13943
13944 if (changed
13945 && REG_P (XEXP (x, 1))
13946 && REG_P (XEXP (x, 0)))
13947 return x;
13948
13949 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
13950 {
13951 changed = 1;
13952 x = legitimize_pic_address (x, 0);
13953 }
13954
13955 if (changed && ix86_legitimate_address_p (mode, x, false))
13956 return x;
13957
13958 if (REG_P (XEXP (x, 0)))
13959 {
13960 rtx temp = gen_reg_rtx (Pmode);
13961 rtx val = force_operand (XEXP (x, 1), temp);
13962 if (val != temp)
13963 {
13964 val = convert_to_mode (Pmode, val, 1);
13965 emit_move_insn (temp, val);
13966 }
13967
13968 XEXP (x, 1) = temp;
13969 return x;
13970 }
13971
13972 else if (REG_P (XEXP (x, 1)))
13973 {
13974 rtx temp = gen_reg_rtx (Pmode);
13975 rtx val = force_operand (XEXP (x, 0), temp);
13976 if (val != temp)
13977 {
13978 val = convert_to_mode (Pmode, val, 1);
13979 emit_move_insn (temp, val);
13980 }
13981
13982 XEXP (x, 0) = temp;
13983 return x;
13984 }
13985 }
13986
13987 return x;
13988 }
13989 \f
13990 /* Print an integer constant expression in assembler syntax. Addition
13991 and subtraction are the only arithmetic that may appear in these
13992 expressions. FILE is the stdio stream to write to, X is the rtx, and
13993 CODE is the operand print code from the output string. */
13994
13995 static void
13996 output_pic_addr_const (FILE *file, rtx x, int code)
13997 {
13998 char buf[256];
13999
14000 switch (GET_CODE (x))
14001 {
14002 case PC:
14003 gcc_assert (flag_pic);
14004 putc ('.', file);
14005 break;
14006
14007 case SYMBOL_REF:
14008 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14009 output_addr_const (file, x);
14010 else
14011 {
14012 const char *name = XSTR (x, 0);
14013
14014 /* Mark the decl as referenced so that cgraph will
14015 output the function. */
14016 if (SYMBOL_REF_DECL (x))
14017 mark_decl_referenced (SYMBOL_REF_DECL (x));
14018
14019 #if TARGET_MACHO
14020 if (MACHOPIC_INDIRECT
14021 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14022 name = machopic_indirection_name (x, /*stub_p=*/true);
14023 #endif
14024 assemble_name (file, name);
14025 }
14026 if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14027 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14028 fputs ("@PLT", file);
14029 break;
14030
14031 case LABEL_REF:
14032 x = XEXP (x, 0);
14033 /* FALLTHRU */
14034 case CODE_LABEL:
14035 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14036 assemble_name (asm_out_file, buf);
14037 break;
14038
14039 case CONST_INT:
14040 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14041 break;
14042
14043 case CONST:
14044 /* This used to output parentheses around the expression,
14045 but that does not work on the 386 (either ATT or BSD assembler). */
14046 output_pic_addr_const (file, XEXP (x, 0), code);
14047 break;
14048
14049 case CONST_DOUBLE:
14050 if (GET_MODE (x) == VOIDmode)
14051 {
14052 /* We can use %d if the number is <32 bits and positive. */
14053 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14054 fprintf (file, "0x%lx%08lx",
14055 (unsigned long) CONST_DOUBLE_HIGH (x),
14056 (unsigned long) CONST_DOUBLE_LOW (x));
14057 else
14058 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14059 }
14060 else
14061 /* We can't handle floating point constants;
14062 TARGET_PRINT_OPERAND must handle them. */
14063 output_operand_lossage ("floating constant misused");
14064 break;
14065
14066 case PLUS:
14067 /* Some assemblers need integer constants to appear first. */
14068 if (CONST_INT_P (XEXP (x, 0)))
14069 {
14070 output_pic_addr_const (file, XEXP (x, 0), code);
14071 putc ('+', file);
14072 output_pic_addr_const (file, XEXP (x, 1), code);
14073 }
14074 else
14075 {
14076 gcc_assert (CONST_INT_P (XEXP (x, 1)));
14077 output_pic_addr_const (file, XEXP (x, 1), code);
14078 putc ('+', file);
14079 output_pic_addr_const (file, XEXP (x, 0), code);
14080 }
14081 break;
14082
14083 case MINUS:
14084 if (!TARGET_MACHO)
14085 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14086 output_pic_addr_const (file, XEXP (x, 0), code);
14087 putc ('-', file);
14088 output_pic_addr_const (file, XEXP (x, 1), code);
14089 if (!TARGET_MACHO)
14090 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14091 break;
14092
14093 case UNSPEC:
14094 if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14095 {
14096 bool f = i386_asm_output_addr_const_extra (file, x);
14097 gcc_assert (f);
14098 break;
14099 }
14100
14101 gcc_assert (XVECLEN (x, 0) == 1);
14102 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14103 switch (XINT (x, 1))
14104 {
14105 case UNSPEC_GOT:
14106 fputs ("@GOT", file);
14107 break;
14108 case UNSPEC_GOTOFF:
14109 fputs ("@GOTOFF", file);
14110 break;
14111 case UNSPEC_PLTOFF:
14112 fputs ("@PLTOFF", file);
14113 break;
14114 case UNSPEC_PCREL:
14115 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14116 "(%rip)" : "[rip]", file);
14117 break;
14118 case UNSPEC_GOTPCREL:
14119 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14120 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14121 break;
14122 case UNSPEC_GOTTPOFF:
14123 /* FIXME: This might be @TPOFF in Sun ld too. */
14124 fputs ("@gottpoff", file);
14125 break;
14126 case UNSPEC_TPOFF:
14127 fputs ("@tpoff", file);
14128 break;
14129 case UNSPEC_NTPOFF:
14130 if (TARGET_64BIT)
14131 fputs ("@tpoff", file);
14132 else
14133 fputs ("@ntpoff", file);
14134 break;
14135 case UNSPEC_DTPOFF:
14136 fputs ("@dtpoff", file);
14137 break;
14138 case UNSPEC_GOTNTPOFF:
14139 if (TARGET_64BIT)
14140 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14141 "@gottpoff(%rip)": "@gottpoff[rip]", file);
14142 else
14143 fputs ("@gotntpoff", file);
14144 break;
14145 case UNSPEC_INDNTPOFF:
14146 fputs ("@indntpoff", file);
14147 break;
14148 #if TARGET_MACHO
14149 case UNSPEC_MACHOPIC_OFFSET:
14150 putc ('-', file);
14151 machopic_output_function_base_name (file);
14152 break;
14153 #endif
14154 default:
14155 output_operand_lossage ("invalid UNSPEC as operand");
14156 break;
14157 }
14158 break;
14159
14160 default:
14161 output_operand_lossage ("invalid expression as operand");
14162 }
14163 }
14164
14165 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14166 We need to emit DTP-relative relocations. */
14167
14168 static void ATTRIBUTE_UNUSED
14169 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14170 {
14171 fputs (ASM_LONG, file);
14172 output_addr_const (file, x);
14173 fputs ("@dtpoff", file);
14174 switch (size)
14175 {
14176 case 4:
14177 break;
14178 case 8:
14179 fputs (", 0", file);
14180 break;
14181 default:
14182 gcc_unreachable ();
14183 }
14184 }
14185
14186 /* Return true if X is a representation of the PIC register. This copes
14187 with calls from ix86_find_base_term, where the register might have
14188 been replaced by a cselib value. */
14189
14190 static bool
14191 ix86_pic_register_p (rtx x)
14192 {
14193 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14194 return (pic_offset_table_rtx
14195 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14196 else
14197 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14198 }
14199
14200 /* Helper function for ix86_delegitimize_address.
14201 Attempt to delegitimize TLS local-exec accesses. */
14202
14203 static rtx
14204 ix86_delegitimize_tls_address (rtx orig_x)
14205 {
14206 rtx x = orig_x, unspec;
14207 struct ix86_address addr;
14208
14209 if (!TARGET_TLS_DIRECT_SEG_REFS)
14210 return orig_x;
14211 if (MEM_P (x))
14212 x = XEXP (x, 0);
14213 if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14214 return orig_x;
14215 if (ix86_decompose_address (x, &addr) == 0
14216 || addr.seg != DEFAULT_TLS_SEG_REG
14217 || addr.disp == NULL_RTX
14218 || GET_CODE (addr.disp) != CONST)
14219 return orig_x;
14220 unspec = XEXP (addr.disp, 0);
14221 if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14222 unspec = XEXP (unspec, 0);
14223 if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14224 return orig_x;
14225 x = XVECEXP (unspec, 0, 0);
14226 gcc_assert (GET_CODE (x) == SYMBOL_REF);
14227 if (unspec != XEXP (addr.disp, 0))
14228 x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14229 if (addr.index)
14230 {
14231 rtx idx = addr.index;
14232 if (addr.scale != 1)
14233 idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14234 x = gen_rtx_PLUS (Pmode, idx, x);
14235 }
14236 if (addr.base)
14237 x = gen_rtx_PLUS (Pmode, addr.base, x);
14238 if (MEM_P (orig_x))
14239 x = replace_equiv_address_nv (orig_x, x);
14240 return x;
14241 }
14242
14243 /* In the name of slightly smaller debug output, and to cater to
14244 general assembler lossage, recognize PIC+GOTOFF and turn it back
14245 into a direct symbol reference.
14246
14247 On Darwin, this is necessary to avoid a crash, because Darwin
14248 has a different PIC label for each routine but the DWARF debugging
14249 information is not associated with any particular routine, so it's
14250 necessary to remove references to the PIC label from RTL stored by
14251 the DWARF output code. */
14252
14253 static rtx
14254 ix86_delegitimize_address (rtx x)
14255 {
14256 rtx orig_x = delegitimize_mem_from_attrs (x);
14257 /* addend is NULL or some rtx if x is something+GOTOFF where
14258 something doesn't include the PIC register. */
14259 rtx addend = NULL_RTX;
14260 /* reg_addend is NULL or a multiple of some register. */
14261 rtx reg_addend = NULL_RTX;
14262 /* const_addend is NULL or a const_int. */
14263 rtx const_addend = NULL_RTX;
14264 /* This is the result, or NULL. */
14265 rtx result = NULL_RTX;
14266
14267 x = orig_x;
14268
14269 if (MEM_P (x))
14270 x = XEXP (x, 0);
14271
14272 if (TARGET_64BIT)
14273 {
14274 if (GET_CODE (x) == CONST
14275 && GET_CODE (XEXP (x, 0)) == PLUS
14276 && GET_MODE (XEXP (x, 0)) == Pmode
14277 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14278 && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
14279 && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
14280 {
14281 rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
14282 x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
14283 if (MEM_P (orig_x))
14284 x = replace_equiv_address_nv (orig_x, x);
14285 return x;
14286 }
14287
14288 if (GET_CODE (x) == CONST
14289 && GET_CODE (XEXP (x, 0)) == UNSPEC
14290 && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
14291 || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
14292 && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
14293 {
14294 x = XVECEXP (XEXP (x, 0), 0, 0);
14295 if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
14296 {
14297 x = simplify_gen_subreg (GET_MODE (orig_x), x,
14298 GET_MODE (x), 0);
14299 if (x == NULL_RTX)
14300 return orig_x;
14301 }
14302 return x;
14303 }
14304
14305 if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
14306 return ix86_delegitimize_tls_address (orig_x);
14307
14308 /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
14309 and -mcmodel=medium -fpic. */
14310 }
14311
14312 if (GET_CODE (x) != PLUS
14313 || GET_CODE (XEXP (x, 1)) != CONST)
14314 return ix86_delegitimize_tls_address (orig_x);
14315
14316 if (ix86_pic_register_p (XEXP (x, 0)))
14317 /* %ebx + GOT/GOTOFF */
14318 ;
14319 else if (GET_CODE (XEXP (x, 0)) == PLUS)
14320 {
14321 /* %ebx + %reg * scale + GOT/GOTOFF */
14322 reg_addend = XEXP (x, 0);
14323 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
14324 reg_addend = XEXP (reg_addend, 1);
14325 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
14326 reg_addend = XEXP (reg_addend, 0);
14327 else
14328 {
14329 reg_addend = NULL_RTX;
14330 addend = XEXP (x, 0);
14331 }
14332 }
14333 else
14334 addend = XEXP (x, 0);
14335
14336 x = XEXP (XEXP (x, 1), 0);
14337 if (GET_CODE (x) == PLUS
14338 && CONST_INT_P (XEXP (x, 1)))
14339 {
14340 const_addend = XEXP (x, 1);
14341 x = XEXP (x, 0);
14342 }
14343
14344 if (GET_CODE (x) == UNSPEC
14345 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
14346 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
14347 || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
14348 && !MEM_P (orig_x) && !addend)))
14349 result = XVECEXP (x, 0, 0);
14350
14351 if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
14352 && !MEM_P (orig_x))
14353 result = XVECEXP (x, 0, 0);
14354
14355 if (! result)
14356 return ix86_delegitimize_tls_address (orig_x);
14357
14358 if (const_addend)
14359 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
14360 if (reg_addend)
14361 result = gen_rtx_PLUS (Pmode, reg_addend, result);
14362 if (addend)
14363 {
14364 /* If the rest of original X doesn't involve the PIC register, add
14365 addend and subtract pic_offset_table_rtx. This can happen e.g.
14366 for code like:
14367 leal (%ebx, %ecx, 4), %ecx
14368 ...
14369 movl foo@GOTOFF(%ecx), %edx
14370 in which case we return (%ecx - %ebx) + foo. */
14371 if (pic_offset_table_rtx)
14372 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
14373 pic_offset_table_rtx),
14374 result);
14375 else
14376 return orig_x;
14377 }
14378 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
14379 {
14380 result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
14381 if (result == NULL_RTX)
14382 return orig_x;
14383 }
14384 return result;
14385 }
14386
14387 /* If X is a machine specific address (i.e. a symbol or label being
14388 referenced as a displacement from the GOT implemented using an
14389 UNSPEC), then return the base term. Otherwise return X. */
14390
14391 rtx
14392 ix86_find_base_term (rtx x)
14393 {
14394 rtx term;
14395
14396 if (TARGET_64BIT)
14397 {
14398 if (GET_CODE (x) != CONST)
14399 return x;
14400 term = XEXP (x, 0);
14401 if (GET_CODE (term) == PLUS
14402 && (CONST_INT_P (XEXP (term, 1))
14403 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
14404 term = XEXP (term, 0);
14405 if (GET_CODE (term) != UNSPEC
14406 || (XINT (term, 1) != UNSPEC_GOTPCREL
14407 && XINT (term, 1) != UNSPEC_PCREL))
14408 return x;
14409
14410 return XVECEXP (term, 0, 0);
14411 }
14412
14413 return ix86_delegitimize_address (x);
14414 }
14415 \f
14416 static void
14417 put_condition_code (enum rtx_code code, enum machine_mode mode, bool reverse,
14418 bool fp, FILE *file)
14419 {
14420 const char *suffix;
14421
14422 if (mode == CCFPmode || mode == CCFPUmode)
14423 {
14424 code = ix86_fp_compare_code_to_integer (code);
14425 mode = CCmode;
14426 }
14427 if (reverse)
14428 code = reverse_condition (code);
14429
14430 switch (code)
14431 {
14432 case EQ:
14433 switch (mode)
14434 {
14435 case CCAmode:
14436 suffix = "a";
14437 break;
14438
14439 case CCCmode:
14440 suffix = "c";
14441 break;
14442
14443 case CCOmode:
14444 suffix = "o";
14445 break;
14446
14447 case CCSmode:
14448 suffix = "s";
14449 break;
14450
14451 default:
14452 suffix = "e";
14453 }
14454 break;
14455 case NE:
14456 switch (mode)
14457 {
14458 case CCAmode:
14459 suffix = "na";
14460 break;
14461
14462 case CCCmode:
14463 suffix = "nc";
14464 break;
14465
14466 case CCOmode:
14467 suffix = "no";
14468 break;
14469
14470 case CCSmode:
14471 suffix = "ns";
14472 break;
14473
14474 default:
14475 suffix = "ne";
14476 }
14477 break;
14478 case GT:
14479 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
14480 suffix = "g";
14481 break;
14482 case GTU:
14483 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
14484 Those same assemblers have the same but opposite lossage on cmov. */
14485 if (mode == CCmode)
14486 suffix = fp ? "nbe" : "a";
14487 else
14488 gcc_unreachable ();
14489 break;
14490 case LT:
14491 switch (mode)
14492 {
14493 case CCNOmode:
14494 case CCGOCmode:
14495 suffix = "s";
14496 break;
14497
14498 case CCmode:
14499 case CCGCmode:
14500 suffix = "l";
14501 break;
14502
14503 default:
14504 gcc_unreachable ();
14505 }
14506 break;
14507 case LTU:
14508 if (mode == CCmode)
14509 suffix = "b";
14510 else if (mode == CCCmode)
14511 suffix = "c";
14512 else
14513 gcc_unreachable ();
14514 break;
14515 case GE:
14516 switch (mode)
14517 {
14518 case CCNOmode:
14519 case CCGOCmode:
14520 suffix = "ns";
14521 break;
14522
14523 case CCmode:
14524 case CCGCmode:
14525 suffix = "ge";
14526 break;
14527
14528 default:
14529 gcc_unreachable ();
14530 }
14531 break;
14532 case GEU:
14533 if (mode == CCmode)
14534 suffix = fp ? "nb" : "ae";
14535 else if (mode == CCCmode)
14536 suffix = "nc";
14537 else
14538 gcc_unreachable ();
14539 break;
14540 case LE:
14541 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
14542 suffix = "le";
14543 break;
14544 case LEU:
14545 if (mode == CCmode)
14546 suffix = "be";
14547 else
14548 gcc_unreachable ();
14549 break;
14550 case UNORDERED:
14551 suffix = fp ? "u" : "p";
14552 break;
14553 case ORDERED:
14554 suffix = fp ? "nu" : "np";
14555 break;
14556 default:
14557 gcc_unreachable ();
14558 }
14559 fputs (suffix, file);
14560 }
14561
14562 /* Print the name of register X to FILE based on its machine mode and number.
14563 If CODE is 'w', pretend the mode is HImode.
14564 If CODE is 'b', pretend the mode is QImode.
14565 If CODE is 'k', pretend the mode is SImode.
14566 If CODE is 'q', pretend the mode is DImode.
14567 If CODE is 'x', pretend the mode is V4SFmode.
14568 If CODE is 't', pretend the mode is V8SFmode.
14569 If CODE is 'g', pretend the mode is V16SFmode.
14570 If CODE is 'h', pretend the reg is the 'high' byte register.
14571 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
14572 If CODE is 'd', duplicate the operand for AVX instruction.
14573 */
14574
14575 void
14576 print_reg (rtx x, int code, FILE *file)
14577 {
14578 const char *reg;
14579 unsigned int regno;
14580 bool duplicated = code == 'd' && TARGET_AVX;
14581
14582 if (ASSEMBLER_DIALECT == ASM_ATT)
14583 putc ('%', file);
14584
14585 if (x == pc_rtx)
14586 {
14587 gcc_assert (TARGET_64BIT);
14588 fputs ("rip", file);
14589 return;
14590 }
14591
14592 regno = true_regnum (x);
14593 gcc_assert (regno != ARG_POINTER_REGNUM
14594 && regno != FRAME_POINTER_REGNUM
14595 && regno != FLAGS_REG
14596 && regno != FPSR_REG
14597 && regno != FPCR_REG);
14598
14599 if (code == 'w' || MMX_REG_P (x))
14600 code = 2;
14601 else if (code == 'b')
14602 code = 1;
14603 else if (code == 'k')
14604 code = 4;
14605 else if (code == 'q')
14606 code = 8;
14607 else if (code == 'y')
14608 code = 3;
14609 else if (code == 'h')
14610 code = 0;
14611 else if (code == 'x')
14612 code = 16;
14613 else if (code == 't')
14614 code = 32;
14615 else if (code == 'g')
14616 code = 64;
14617 else
14618 code = GET_MODE_SIZE (GET_MODE (x));
14619
14620 /* Irritatingly, AMD extended registers use different naming convention
14621 from the normal registers: "r%d[bwd]" */
14622 if (REX_INT_REGNO_P (regno))
14623 {
14624 gcc_assert (TARGET_64BIT);
14625 putc ('r', file);
14626 fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
14627 switch (code)
14628 {
14629 case 0:
14630 error ("extended registers have no high halves");
14631 break;
14632 case 1:
14633 putc ('b', file);
14634 break;
14635 case 2:
14636 putc ('w', file);
14637 break;
14638 case 4:
14639 putc ('d', file);
14640 break;
14641 case 8:
14642 /* no suffix */
14643 break;
14644 default:
14645 error ("unsupported operand size for extended register");
14646 break;
14647 }
14648 return;
14649 }
14650
14651 reg = NULL;
14652 switch (code)
14653 {
14654 case 3:
14655 if (STACK_TOP_P (x))
14656 {
14657 reg = "st(0)";
14658 break;
14659 }
14660 /* FALLTHRU */
14661 case 8:
14662 case 4:
14663 case 12:
14664 if (! ANY_FP_REG_P (x))
14665 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
14666 /* FALLTHRU */
14667 case 16:
14668 case 2:
14669 normal:
14670 reg = hi_reg_name[regno];
14671 break;
14672 case 1:
14673 if (regno >= ARRAY_SIZE (qi_reg_name))
14674 goto normal;
14675 reg = qi_reg_name[regno];
14676 break;
14677 case 0:
14678 if (regno >= ARRAY_SIZE (qi_high_reg_name))
14679 goto normal;
14680 reg = qi_high_reg_name[regno];
14681 break;
14682 case 32:
14683 if (SSE_REG_P (x))
14684 {
14685 gcc_assert (!duplicated);
14686 putc ('y', file);
14687 fputs (hi_reg_name[regno] + 1, file);
14688 return;
14689 }
14690 case 64:
14691 if (SSE_REG_P (x))
14692 {
14693 gcc_assert (!duplicated);
14694 putc ('z', file);
14695 fputs (hi_reg_name[REGNO (x)] + 1, file);
14696 return;
14697 }
14698 break;
14699 default:
14700 gcc_unreachable ();
14701 }
14702
14703 fputs (reg, file);
14704 if (duplicated)
14705 {
14706 if (ASSEMBLER_DIALECT == ASM_ATT)
14707 fprintf (file, ", %%%s", reg);
14708 else
14709 fprintf (file, ", %s", reg);
14710 }
14711 }
14712
14713 /* Locate some local-dynamic symbol still in use by this function
14714 so that we can print its name in some tls_local_dynamic_base
14715 pattern. */
14716
14717 static int
14718 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
14719 {
14720 rtx x = *px;
14721
14722 if (GET_CODE (x) == SYMBOL_REF
14723 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
14724 {
14725 cfun->machine->some_ld_name = XSTR (x, 0);
14726 return 1;
14727 }
14728
14729 return 0;
14730 }
14731
14732 static const char *
14733 get_some_local_dynamic_name (void)
14734 {
14735 rtx insn;
14736
14737 if (cfun->machine->some_ld_name)
14738 return cfun->machine->some_ld_name;
14739
14740 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
14741 if (NONDEBUG_INSN_P (insn)
14742 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
14743 return cfun->machine->some_ld_name;
14744
14745 return NULL;
14746 }
14747
14748 /* Meaning of CODE:
14749 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
14750 C -- print opcode suffix for set/cmov insn.
14751 c -- like C, but print reversed condition
14752 F,f -- likewise, but for floating-point.
14753 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
14754 otherwise nothing
14755 R -- print embeded rounding and sae.
14756 r -- print only sae.
14757 z -- print the opcode suffix for the size of the current operand.
14758 Z -- likewise, with special suffixes for x87 instructions.
14759 * -- print a star (in certain assembler syntax)
14760 A -- print an absolute memory reference.
14761 E -- print address with DImode register names if TARGET_64BIT.
14762 w -- print the operand as if it's a "word" (HImode) even if it isn't.
14763 s -- print a shift double count, followed by the assemblers argument
14764 delimiter.
14765 b -- print the QImode name of the register for the indicated operand.
14766 %b0 would print %al if operands[0] is reg 0.
14767 w -- likewise, print the HImode name of the register.
14768 k -- likewise, print the SImode name of the register.
14769 q -- likewise, print the DImode name of the register.
14770 x -- likewise, print the V4SFmode name of the register.
14771 t -- likewise, print the V8SFmode name of the register.
14772 g -- likewise, print the V16SFmode name of the register.
14773 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
14774 y -- print "st(0)" instead of "st" as a register.
14775 d -- print duplicated register operand for AVX instruction.
14776 D -- print condition for SSE cmp instruction.
14777 P -- if PIC, print an @PLT suffix.
14778 p -- print raw symbol name.
14779 X -- don't print any sort of PIC '@' suffix for a symbol.
14780 & -- print some in-use local-dynamic symbol name.
14781 H -- print a memory address offset by 8; used for sse high-parts
14782 Y -- print condition for XOP pcom* instruction.
14783 + -- print a branch hint as 'cs' or 'ds' prefix
14784 ; -- print a semicolon (after prefixes due to bug in older gas).
14785 ~ -- print "i" if TARGET_AVX2, "f" otherwise.
14786 @ -- print a segment register of thread base pointer load
14787 ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
14788 */
14789
14790 void
14791 ix86_print_operand (FILE *file, rtx x, int code)
14792 {
14793 if (code)
14794 {
14795 switch (code)
14796 {
14797 case 'A':
14798 switch (ASSEMBLER_DIALECT)
14799 {
14800 case ASM_ATT:
14801 putc ('*', file);
14802 break;
14803
14804 case ASM_INTEL:
14805 /* Intel syntax. For absolute addresses, registers should not
14806 be surrounded by braces. */
14807 if (!REG_P (x))
14808 {
14809 putc ('[', file);
14810 ix86_print_operand (file, x, 0);
14811 putc (']', file);
14812 return;
14813 }
14814 break;
14815
14816 default:
14817 gcc_unreachable ();
14818 }
14819
14820 ix86_print_operand (file, x, 0);
14821 return;
14822
14823 case 'E':
14824 /* Wrap address in an UNSPEC to declare special handling. */
14825 if (TARGET_64BIT)
14826 x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
14827
14828 output_address (x);
14829 return;
14830
14831 case 'L':
14832 if (ASSEMBLER_DIALECT == ASM_ATT)
14833 putc ('l', file);
14834 return;
14835
14836 case 'W':
14837 if (ASSEMBLER_DIALECT == ASM_ATT)
14838 putc ('w', file);
14839 return;
14840
14841 case 'B':
14842 if (ASSEMBLER_DIALECT == ASM_ATT)
14843 putc ('b', file);
14844 return;
14845
14846 case 'Q':
14847 if (ASSEMBLER_DIALECT == ASM_ATT)
14848 putc ('l', file);
14849 return;
14850
14851 case 'S':
14852 if (ASSEMBLER_DIALECT == ASM_ATT)
14853 putc ('s', file);
14854 return;
14855
14856 case 'T':
14857 if (ASSEMBLER_DIALECT == ASM_ATT)
14858 putc ('t', file);
14859 return;
14860
14861 case 'O':
14862 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
14863 if (ASSEMBLER_DIALECT != ASM_ATT)
14864 return;
14865
14866 switch (GET_MODE_SIZE (GET_MODE (x)))
14867 {
14868 case 2:
14869 putc ('w', file);
14870 break;
14871
14872 case 4:
14873 putc ('l', file);
14874 break;
14875
14876 case 8:
14877 putc ('q', file);
14878 break;
14879
14880 default:
14881 output_operand_lossage
14882 ("invalid operand size for operand code 'O'");
14883 return;
14884 }
14885
14886 putc ('.', file);
14887 #endif
14888 return;
14889
14890 case 'z':
14891 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14892 {
14893 /* Opcodes don't get size suffixes if using Intel opcodes. */
14894 if (ASSEMBLER_DIALECT == ASM_INTEL)
14895 return;
14896
14897 switch (GET_MODE_SIZE (GET_MODE (x)))
14898 {
14899 case 1:
14900 putc ('b', file);
14901 return;
14902
14903 case 2:
14904 putc ('w', file);
14905 return;
14906
14907 case 4:
14908 putc ('l', file);
14909 return;
14910
14911 case 8:
14912 putc ('q', file);
14913 return;
14914
14915 default:
14916 output_operand_lossage
14917 ("invalid operand size for operand code 'z'");
14918 return;
14919 }
14920 }
14921
14922 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14923 warning
14924 (0, "non-integer operand used with operand code 'z'");
14925 /* FALLTHRU */
14926
14927 case 'Z':
14928 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
14929 if (ASSEMBLER_DIALECT == ASM_INTEL)
14930 return;
14931
14932 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
14933 {
14934 switch (GET_MODE_SIZE (GET_MODE (x)))
14935 {
14936 case 2:
14937 #ifdef HAVE_AS_IX86_FILDS
14938 putc ('s', file);
14939 #endif
14940 return;
14941
14942 case 4:
14943 putc ('l', file);
14944 return;
14945
14946 case 8:
14947 #ifdef HAVE_AS_IX86_FILDQ
14948 putc ('q', file);
14949 #else
14950 fputs ("ll", file);
14951 #endif
14952 return;
14953
14954 default:
14955 break;
14956 }
14957 }
14958 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14959 {
14960 /* 387 opcodes don't get size suffixes
14961 if the operands are registers. */
14962 if (STACK_REG_P (x))
14963 return;
14964
14965 switch (GET_MODE_SIZE (GET_MODE (x)))
14966 {
14967 case 4:
14968 putc ('s', file);
14969 return;
14970
14971 case 8:
14972 putc ('l', file);
14973 return;
14974
14975 case 12:
14976 case 16:
14977 putc ('t', file);
14978 return;
14979
14980 default:
14981 break;
14982 }
14983 }
14984 else
14985 {
14986 output_operand_lossage
14987 ("invalid operand type used with operand code 'Z'");
14988 return;
14989 }
14990
14991 output_operand_lossage
14992 ("invalid operand size for operand code 'Z'");
14993 return;
14994
14995 case 'd':
14996 case 'b':
14997 case 'w':
14998 case 'k':
14999 case 'q':
15000 case 'h':
15001 case 't':
15002 case 'g':
15003 case 'y':
15004 case 'x':
15005 case 'X':
15006 case 'P':
15007 case 'p':
15008 break;
15009
15010 case 's':
15011 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15012 {
15013 ix86_print_operand (file, x, 0);
15014 fputs (", ", file);
15015 }
15016 return;
15017
15018 case 'Y':
15019 switch (GET_CODE (x))
15020 {
15021 case NE:
15022 fputs ("neq", file);
15023 break;
15024 case EQ:
15025 fputs ("eq", file);
15026 break;
15027 case GE:
15028 case GEU:
15029 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15030 break;
15031 case GT:
15032 case GTU:
15033 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15034 break;
15035 case LE:
15036 case LEU:
15037 fputs ("le", file);
15038 break;
15039 case LT:
15040 case LTU:
15041 fputs ("lt", file);
15042 break;
15043 case UNORDERED:
15044 fputs ("unord", file);
15045 break;
15046 case ORDERED:
15047 fputs ("ord", file);
15048 break;
15049 case UNEQ:
15050 fputs ("ueq", file);
15051 break;
15052 case UNGE:
15053 fputs ("nlt", file);
15054 break;
15055 case UNGT:
15056 fputs ("nle", file);
15057 break;
15058 case UNLE:
15059 fputs ("ule", file);
15060 break;
15061 case UNLT:
15062 fputs ("ult", file);
15063 break;
15064 case LTGT:
15065 fputs ("une", file);
15066 break;
15067 default:
15068 output_operand_lossage ("operand is not a condition code, "
15069 "invalid operand code 'Y'");
15070 return;
15071 }
15072 return;
15073
15074 case 'D':
15075 /* Little bit of braindamage here. The SSE compare instructions
15076 does use completely different names for the comparisons that the
15077 fp conditional moves. */
15078 switch (GET_CODE (x))
15079 {
15080 case UNEQ:
15081 if (TARGET_AVX)
15082 {
15083 fputs ("eq_us", file);
15084 break;
15085 }
15086 case EQ:
15087 fputs ("eq", file);
15088 break;
15089 case UNLT:
15090 if (TARGET_AVX)
15091 {
15092 fputs ("nge", file);
15093 break;
15094 }
15095 case LT:
15096 fputs ("lt", file);
15097 break;
15098 case UNLE:
15099 if (TARGET_AVX)
15100 {
15101 fputs ("ngt", file);
15102 break;
15103 }
15104 case LE:
15105 fputs ("le", file);
15106 break;
15107 case UNORDERED:
15108 fputs ("unord", file);
15109 break;
15110 case LTGT:
15111 if (TARGET_AVX)
15112 {
15113 fputs ("neq_oq", file);
15114 break;
15115 }
15116 case NE:
15117 fputs ("neq", file);
15118 break;
15119 case GE:
15120 if (TARGET_AVX)
15121 {
15122 fputs ("ge", file);
15123 break;
15124 }
15125 case UNGE:
15126 fputs ("nlt", file);
15127 break;
15128 case GT:
15129 if (TARGET_AVX)
15130 {
15131 fputs ("gt", file);
15132 break;
15133 }
15134 case UNGT:
15135 fputs ("nle", file);
15136 break;
15137 case ORDERED:
15138 fputs ("ord", file);
15139 break;
15140 default:
15141 output_operand_lossage ("operand is not a condition code, "
15142 "invalid operand code 'D'");
15143 return;
15144 }
15145 return;
15146
15147 case 'F':
15148 case 'f':
15149 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15150 if (ASSEMBLER_DIALECT == ASM_ATT)
15151 putc ('.', file);
15152 #endif
15153
15154 case 'C':
15155 case 'c':
15156 if (!COMPARISON_P (x))
15157 {
15158 output_operand_lossage ("operand is not a condition code, "
15159 "invalid operand code '%c'", code);
15160 return;
15161 }
15162 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15163 code == 'c' || code == 'f',
15164 code == 'F' || code == 'f',
15165 file);
15166 return;
15167
15168 case 'H':
15169 if (!offsettable_memref_p (x))
15170 {
15171 output_operand_lossage ("operand is not an offsettable memory "
15172 "reference, invalid operand code 'H'");
15173 return;
15174 }
15175 /* It doesn't actually matter what mode we use here, as we're
15176 only going to use this for printing. */
15177 x = adjust_address_nv (x, DImode, 8);
15178 /* Output 'qword ptr' for intel assembler dialect. */
15179 if (ASSEMBLER_DIALECT == ASM_INTEL)
15180 code = 'q';
15181 break;
15182
15183 case 'K':
15184 gcc_assert (CONST_INT_P (x));
15185
15186 if (INTVAL (x) & IX86_HLE_ACQUIRE)
15187 #ifdef HAVE_AS_IX86_HLE
15188 fputs ("xacquire ", file);
15189 #else
15190 fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15191 #endif
15192 else if (INTVAL (x) & IX86_HLE_RELEASE)
15193 #ifdef HAVE_AS_IX86_HLE
15194 fputs ("xrelease ", file);
15195 #else
15196 fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15197 #endif
15198 /* We do not want to print value of the operand. */
15199 return;
15200
15201 case 'N':
15202 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15203 fputs ("{z}", file);
15204 return;
15205
15206 case 'r':
15207 gcc_assert (CONST_INT_P (x));
15208 gcc_assert (INTVAL (x) == ROUND_SAE);
15209
15210 if (ASSEMBLER_DIALECT == ASM_INTEL)
15211 fputs (", ", file);
15212
15213 fputs ("{sae}", file);
15214
15215 if (ASSEMBLER_DIALECT == ASM_ATT)
15216 fputs (", ", file);
15217
15218 return;
15219
15220 case 'R':
15221 gcc_assert (CONST_INT_P (x));
15222
15223 if (ASSEMBLER_DIALECT == ASM_INTEL)
15224 fputs (", ", file);
15225
15226 switch (INTVAL (x))
15227 {
15228 case ROUND_NEAREST_INT | ROUND_SAE:
15229 fputs ("{rn-sae}", file);
15230 break;
15231 case ROUND_NEG_INF | ROUND_SAE:
15232 fputs ("{rd-sae}", file);
15233 break;
15234 case ROUND_POS_INF | ROUND_SAE:
15235 fputs ("{ru-sae}", file);
15236 break;
15237 case ROUND_ZERO | ROUND_SAE:
15238 fputs ("{rz-sae}", file);
15239 break;
15240 default:
15241 gcc_unreachable ();
15242 }
15243
15244 if (ASSEMBLER_DIALECT == ASM_ATT)
15245 fputs (", ", file);
15246
15247 return;
15248
15249 case '*':
15250 if (ASSEMBLER_DIALECT == ASM_ATT)
15251 putc ('*', file);
15252 return;
15253
15254 case '&':
15255 {
15256 const char *name = get_some_local_dynamic_name ();
15257 if (name == NULL)
15258 output_operand_lossage ("'%%&' used without any "
15259 "local dynamic TLS references");
15260 else
15261 assemble_name (file, name);
15262 return;
15263 }
15264
15265 case '+':
15266 {
15267 rtx x;
15268
15269 if (!optimize
15270 || optimize_function_for_size_p (cfun)
15271 || !TARGET_BRANCH_PREDICTION_HINTS)
15272 return;
15273
15274 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15275 if (x)
15276 {
15277 int pred_val = XINT (x, 0);
15278
15279 if (pred_val < REG_BR_PROB_BASE * 45 / 100
15280 || pred_val > REG_BR_PROB_BASE * 55 / 100)
15281 {
15282 bool taken = pred_val > REG_BR_PROB_BASE / 2;
15283 bool cputaken
15284 = final_forward_branch_p (current_output_insn) == 0;
15285
15286 /* Emit hints only in the case default branch prediction
15287 heuristics would fail. */
15288 if (taken != cputaken)
15289 {
15290 /* We use 3e (DS) prefix for taken branches and
15291 2e (CS) prefix for not taken branches. */
15292 if (taken)
15293 fputs ("ds ; ", file);
15294 else
15295 fputs ("cs ; ", file);
15296 }
15297 }
15298 }
15299 return;
15300 }
15301
15302 case ';':
15303 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
15304 putc (';', file);
15305 #endif
15306 return;
15307
15308 case '@':
15309 if (ASSEMBLER_DIALECT == ASM_ATT)
15310 putc ('%', file);
15311
15312 /* The kernel uses a different segment register for performance
15313 reasons; a system call would not have to trash the userspace
15314 segment register, which would be expensive. */
15315 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
15316 fputs ("fs", file);
15317 else
15318 fputs ("gs", file);
15319 return;
15320
15321 case '~':
15322 putc (TARGET_AVX2 ? 'i' : 'f', file);
15323 return;
15324
15325 case '^':
15326 if (TARGET_64BIT && Pmode != word_mode)
15327 fputs ("addr32 ", file);
15328 return;
15329
15330 default:
15331 output_operand_lossage ("invalid operand code '%c'", code);
15332 }
15333 }
15334
15335 if (REG_P (x))
15336 print_reg (x, code, file);
15337
15338 else if (MEM_P (x))
15339 {
15340 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
15341 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
15342 && GET_MODE (x) != BLKmode)
15343 {
15344 const char * size;
15345 switch (GET_MODE_SIZE (GET_MODE (x)))
15346 {
15347 case 1: size = "BYTE"; break;
15348 case 2: size = "WORD"; break;
15349 case 4: size = "DWORD"; break;
15350 case 8: size = "QWORD"; break;
15351 case 12: size = "TBYTE"; break;
15352 case 16:
15353 if (GET_MODE (x) == XFmode)
15354 size = "TBYTE";
15355 else
15356 size = "XMMWORD";
15357 break;
15358 case 32: size = "YMMWORD"; break;
15359 case 64: size = "ZMMWORD"; break;
15360 default:
15361 gcc_unreachable ();
15362 }
15363
15364 /* Check for explicit size override (codes 'b', 'w', 'k',
15365 'q' and 'x') */
15366 if (code == 'b')
15367 size = "BYTE";
15368 else if (code == 'w')
15369 size = "WORD";
15370 else if (code == 'k')
15371 size = "DWORD";
15372 else if (code == 'q')
15373 size = "QWORD";
15374 else if (code == 'x')
15375 size = "XMMWORD";
15376
15377 fputs (size, file);
15378 fputs (" PTR ", file);
15379 }
15380
15381 x = XEXP (x, 0);
15382 /* Avoid (%rip) for call operands. */
15383 if (CONSTANT_ADDRESS_P (x) && code == 'P'
15384 && !CONST_INT_P (x))
15385 output_addr_const (file, x);
15386 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
15387 output_operand_lossage ("invalid constraints for operand");
15388 else
15389 output_address (x);
15390 }
15391
15392 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
15393 {
15394 REAL_VALUE_TYPE r;
15395 long l;
15396
15397 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15398 REAL_VALUE_TO_TARGET_SINGLE (r, l);
15399
15400 if (ASSEMBLER_DIALECT == ASM_ATT)
15401 putc ('$', file);
15402 /* Sign extend 32bit SFmode immediate to 8 bytes. */
15403 if (code == 'q')
15404 fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
15405 (unsigned long long) (int) l);
15406 else
15407 fprintf (file, "0x%08x", (unsigned int) l);
15408 }
15409
15410 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
15411 {
15412 REAL_VALUE_TYPE r;
15413 long l[2];
15414
15415 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
15416 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
15417
15418 if (ASSEMBLER_DIALECT == ASM_ATT)
15419 putc ('$', file);
15420 fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
15421 }
15422
15423 /* These float cases don't actually occur as immediate operands. */
15424 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
15425 {
15426 char dstr[30];
15427
15428 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
15429 fputs (dstr, file);
15430 }
15431
15432 else
15433 {
15434 /* We have patterns that allow zero sets of memory, for instance.
15435 In 64-bit mode, we should probably support all 8-byte vectors,
15436 since we can in fact encode that into an immediate. */
15437 if (GET_CODE (x) == CONST_VECTOR)
15438 {
15439 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
15440 x = const0_rtx;
15441 }
15442
15443 if (code != 'P' && code != 'p')
15444 {
15445 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
15446 {
15447 if (ASSEMBLER_DIALECT == ASM_ATT)
15448 putc ('$', file);
15449 }
15450 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
15451 || GET_CODE (x) == LABEL_REF)
15452 {
15453 if (ASSEMBLER_DIALECT == ASM_ATT)
15454 putc ('$', file);
15455 else
15456 fputs ("OFFSET FLAT:", file);
15457 }
15458 }
15459 if (CONST_INT_P (x))
15460 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
15461 else if (flag_pic || MACHOPIC_INDIRECT)
15462 output_pic_addr_const (file, x, code);
15463 else
15464 output_addr_const (file, x);
15465 }
15466 }
15467
15468 static bool
15469 ix86_print_operand_punct_valid_p (unsigned char code)
15470 {
15471 return (code == '@' || code == '*' || code == '+' || code == '&'
15472 || code == ';' || code == '~' || code == '^');
15473 }
15474 \f
15475 /* Print a memory operand whose address is ADDR. */
15476
15477 static void
15478 ix86_print_operand_address (FILE *file, rtx addr)
15479 {
15480 struct ix86_address parts;
15481 rtx base, index, disp;
15482 int scale;
15483 int ok;
15484 bool vsib = false;
15485 int code = 0;
15486
15487 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
15488 {
15489 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15490 gcc_assert (parts.index == NULL_RTX);
15491 parts.index = XVECEXP (addr, 0, 1);
15492 parts.scale = INTVAL (XVECEXP (addr, 0, 2));
15493 addr = XVECEXP (addr, 0, 0);
15494 vsib = true;
15495 }
15496 else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
15497 {
15498 gcc_assert (TARGET_64BIT);
15499 ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
15500 code = 'q';
15501 }
15502 else
15503 ok = ix86_decompose_address (addr, &parts);
15504
15505 gcc_assert (ok);
15506
15507 base = parts.base;
15508 index = parts.index;
15509 disp = parts.disp;
15510 scale = parts.scale;
15511
15512 switch (parts.seg)
15513 {
15514 case SEG_DEFAULT:
15515 break;
15516 case SEG_FS:
15517 case SEG_GS:
15518 if (ASSEMBLER_DIALECT == ASM_ATT)
15519 putc ('%', file);
15520 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
15521 break;
15522 default:
15523 gcc_unreachable ();
15524 }
15525
15526 /* Use one byte shorter RIP relative addressing for 64bit mode. */
15527 if (TARGET_64BIT && !base && !index)
15528 {
15529 rtx symbol = disp;
15530
15531 if (GET_CODE (disp) == CONST
15532 && GET_CODE (XEXP (disp, 0)) == PLUS
15533 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15534 symbol = XEXP (XEXP (disp, 0), 0);
15535
15536 if (GET_CODE (symbol) == LABEL_REF
15537 || (GET_CODE (symbol) == SYMBOL_REF
15538 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
15539 base = pc_rtx;
15540 }
15541 if (!base && !index)
15542 {
15543 /* Displacement only requires special attention. */
15544
15545 if (CONST_INT_P (disp))
15546 {
15547 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
15548 fputs ("ds:", file);
15549 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
15550 }
15551 else if (flag_pic)
15552 output_pic_addr_const (file, disp, 0);
15553 else
15554 output_addr_const (file, disp);
15555 }
15556 else
15557 {
15558 /* Print SImode register names to force addr32 prefix. */
15559 if (SImode_address_operand (addr, VOIDmode))
15560 {
15561 #ifdef ENABLE_CHECKING
15562 gcc_assert (TARGET_64BIT);
15563 switch (GET_CODE (addr))
15564 {
15565 case SUBREG:
15566 gcc_assert (GET_MODE (addr) == SImode);
15567 gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
15568 break;
15569 case ZERO_EXTEND:
15570 case AND:
15571 gcc_assert (GET_MODE (addr) == DImode);
15572 break;
15573 default:
15574 gcc_unreachable ();
15575 }
15576 #endif
15577 gcc_assert (!code);
15578 code = 'k';
15579 }
15580 else if (code == 0
15581 && TARGET_X32
15582 && disp
15583 && CONST_INT_P (disp)
15584 && INTVAL (disp) < -16*1024*1024)
15585 {
15586 /* X32 runs in 64-bit mode, where displacement, DISP, in
15587 address DISP(%r64), is encoded as 32-bit immediate sign-
15588 extended from 32-bit to 64-bit. For -0x40000300(%r64),
15589 address is %r64 + 0xffffffffbffffd00. When %r64 <
15590 0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
15591 which is invalid for x32. The correct address is %r64
15592 - 0x40000300 == 0xf7ffdd64. To properly encode
15593 -0x40000300(%r64) for x32, we zero-extend negative
15594 displacement by forcing addr32 prefix which truncates
15595 0xfffffffff7ffdd64 to 0xf7ffdd64. In theory, we should
15596 zero-extend all negative displacements, including -1(%rsp).
15597 However, for small negative displacements, sign-extension
15598 won't cause overflow. We only zero-extend negative
15599 displacements if they < -16*1024*1024, which is also used
15600 to check legitimate address displacements for PIC. */
15601 code = 'k';
15602 }
15603
15604 if (ASSEMBLER_DIALECT == ASM_ATT)
15605 {
15606 if (disp)
15607 {
15608 if (flag_pic)
15609 output_pic_addr_const (file, disp, 0);
15610 else if (GET_CODE (disp) == LABEL_REF)
15611 output_asm_label (disp);
15612 else
15613 output_addr_const (file, disp);
15614 }
15615
15616 putc ('(', file);
15617 if (base)
15618 print_reg (base, code, file);
15619 if (index)
15620 {
15621 putc (',', file);
15622 print_reg (index, vsib ? 0 : code, file);
15623 if (scale != 1 || vsib)
15624 fprintf (file, ",%d", scale);
15625 }
15626 putc (')', file);
15627 }
15628 else
15629 {
15630 rtx offset = NULL_RTX;
15631
15632 if (disp)
15633 {
15634 /* Pull out the offset of a symbol; print any symbol itself. */
15635 if (GET_CODE (disp) == CONST
15636 && GET_CODE (XEXP (disp, 0)) == PLUS
15637 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
15638 {
15639 offset = XEXP (XEXP (disp, 0), 1);
15640 disp = gen_rtx_CONST (VOIDmode,
15641 XEXP (XEXP (disp, 0), 0));
15642 }
15643
15644 if (flag_pic)
15645 output_pic_addr_const (file, disp, 0);
15646 else if (GET_CODE (disp) == LABEL_REF)
15647 output_asm_label (disp);
15648 else if (CONST_INT_P (disp))
15649 offset = disp;
15650 else
15651 output_addr_const (file, disp);
15652 }
15653
15654 putc ('[', file);
15655 if (base)
15656 {
15657 print_reg (base, code, file);
15658 if (offset)
15659 {
15660 if (INTVAL (offset) >= 0)
15661 putc ('+', file);
15662 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15663 }
15664 }
15665 else if (offset)
15666 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
15667 else
15668 putc ('0', file);
15669
15670 if (index)
15671 {
15672 putc ('+', file);
15673 print_reg (index, vsib ? 0 : code, file);
15674 if (scale != 1 || vsib)
15675 fprintf (file, "*%d", scale);
15676 }
15677 putc (']', file);
15678 }
15679 }
15680 }
15681
15682 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
15683
15684 static bool
15685 i386_asm_output_addr_const_extra (FILE *file, rtx x)
15686 {
15687 rtx op;
15688
15689 if (GET_CODE (x) != UNSPEC)
15690 return false;
15691
15692 op = XVECEXP (x, 0, 0);
15693 switch (XINT (x, 1))
15694 {
15695 case UNSPEC_GOTTPOFF:
15696 output_addr_const (file, op);
15697 /* FIXME: This might be @TPOFF in Sun ld. */
15698 fputs ("@gottpoff", file);
15699 break;
15700 case UNSPEC_TPOFF:
15701 output_addr_const (file, op);
15702 fputs ("@tpoff", file);
15703 break;
15704 case UNSPEC_NTPOFF:
15705 output_addr_const (file, op);
15706 if (TARGET_64BIT)
15707 fputs ("@tpoff", file);
15708 else
15709 fputs ("@ntpoff", file);
15710 break;
15711 case UNSPEC_DTPOFF:
15712 output_addr_const (file, op);
15713 fputs ("@dtpoff", file);
15714 break;
15715 case UNSPEC_GOTNTPOFF:
15716 output_addr_const (file, op);
15717 if (TARGET_64BIT)
15718 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
15719 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
15720 else
15721 fputs ("@gotntpoff", file);
15722 break;
15723 case UNSPEC_INDNTPOFF:
15724 output_addr_const (file, op);
15725 fputs ("@indntpoff", file);
15726 break;
15727 #if TARGET_MACHO
15728 case UNSPEC_MACHOPIC_OFFSET:
15729 output_addr_const (file, op);
15730 putc ('-', file);
15731 machopic_output_function_base_name (file);
15732 break;
15733 #endif
15734
15735 case UNSPEC_STACK_CHECK:
15736 {
15737 int offset;
15738
15739 gcc_assert (flag_split_stack);
15740
15741 #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
15742 offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
15743 #else
15744 gcc_unreachable ();
15745 #endif
15746
15747 fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
15748 }
15749 break;
15750
15751 default:
15752 return false;
15753 }
15754
15755 return true;
15756 }
15757 \f
15758 /* Split one or more double-mode RTL references into pairs of half-mode
15759 references. The RTL can be REG, offsettable MEM, integer constant, or
15760 CONST_DOUBLE. "operands" is a pointer to an array of double-mode RTLs to
15761 split and "num" is its length. lo_half and hi_half are output arrays
15762 that parallel "operands". */
15763
15764 void
15765 split_double_mode (enum machine_mode mode, rtx operands[],
15766 int num, rtx lo_half[], rtx hi_half[])
15767 {
15768 enum machine_mode half_mode;
15769 unsigned int byte;
15770
15771 switch (mode)
15772 {
15773 case TImode:
15774 half_mode = DImode;
15775 break;
15776 case DImode:
15777 half_mode = SImode;
15778 break;
15779 default:
15780 gcc_unreachable ();
15781 }
15782
15783 byte = GET_MODE_SIZE (half_mode);
15784
15785 while (num--)
15786 {
15787 rtx op = operands[num];
15788
15789 /* simplify_subreg refuse to split volatile memory addresses,
15790 but we still have to handle it. */
15791 if (MEM_P (op))
15792 {
15793 lo_half[num] = adjust_address (op, half_mode, 0);
15794 hi_half[num] = adjust_address (op, half_mode, byte);
15795 }
15796 else
15797 {
15798 lo_half[num] = simplify_gen_subreg (half_mode, op,
15799 GET_MODE (op) == VOIDmode
15800 ? mode : GET_MODE (op), 0);
15801 hi_half[num] = simplify_gen_subreg (half_mode, op,
15802 GET_MODE (op) == VOIDmode
15803 ? mode : GET_MODE (op), byte);
15804 }
15805 }
15806 }
15807 \f
15808 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
15809 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
15810 is the expression of the binary operation. The output may either be
15811 emitted here, or returned to the caller, like all output_* functions.
15812
15813 There is no guarantee that the operands are the same mode, as they
15814 might be within FLOAT or FLOAT_EXTEND expressions. */
15815
15816 #ifndef SYSV386_COMPAT
15817 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
15818 wants to fix the assemblers because that causes incompatibility
15819 with gcc. No-one wants to fix gcc because that causes
15820 incompatibility with assemblers... You can use the option of
15821 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
15822 #define SYSV386_COMPAT 1
15823 #endif
15824
15825 const char *
15826 output_387_binary_op (rtx insn, rtx *operands)
15827 {
15828 static char buf[40];
15829 const char *p;
15830 const char *ssep;
15831 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
15832
15833 #ifdef ENABLE_CHECKING
15834 /* Even if we do not want to check the inputs, this documents input
15835 constraints. Which helps in understanding the following code. */
15836 if (STACK_REG_P (operands[0])
15837 && ((REG_P (operands[1])
15838 && REGNO (operands[0]) == REGNO (operands[1])
15839 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
15840 || (REG_P (operands[2])
15841 && REGNO (operands[0]) == REGNO (operands[2])
15842 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
15843 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
15844 ; /* ok */
15845 else
15846 gcc_assert (is_sse);
15847 #endif
15848
15849 switch (GET_CODE (operands[3]))
15850 {
15851 case PLUS:
15852 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15853 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15854 p = "fiadd";
15855 else
15856 p = "fadd";
15857 ssep = "vadd";
15858 break;
15859
15860 case MINUS:
15861 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15862 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15863 p = "fisub";
15864 else
15865 p = "fsub";
15866 ssep = "vsub";
15867 break;
15868
15869 case MULT:
15870 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15871 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15872 p = "fimul";
15873 else
15874 p = "fmul";
15875 ssep = "vmul";
15876 break;
15877
15878 case DIV:
15879 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
15880 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
15881 p = "fidiv";
15882 else
15883 p = "fdiv";
15884 ssep = "vdiv";
15885 break;
15886
15887 default:
15888 gcc_unreachable ();
15889 }
15890
15891 if (is_sse)
15892 {
15893 if (TARGET_AVX)
15894 {
15895 strcpy (buf, ssep);
15896 if (GET_MODE (operands[0]) == SFmode)
15897 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
15898 else
15899 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
15900 }
15901 else
15902 {
15903 strcpy (buf, ssep + 1);
15904 if (GET_MODE (operands[0]) == SFmode)
15905 strcat (buf, "ss\t{%2, %0|%0, %2}");
15906 else
15907 strcat (buf, "sd\t{%2, %0|%0, %2}");
15908 }
15909 return buf;
15910 }
15911 strcpy (buf, p);
15912
15913 switch (GET_CODE (operands[3]))
15914 {
15915 case MULT:
15916 case PLUS:
15917 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
15918 {
15919 rtx temp = operands[2];
15920 operands[2] = operands[1];
15921 operands[1] = temp;
15922 }
15923
15924 /* know operands[0] == operands[1]. */
15925
15926 if (MEM_P (operands[2]))
15927 {
15928 p = "%Z2\t%2";
15929 break;
15930 }
15931
15932 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15933 {
15934 if (STACK_TOP_P (operands[0]))
15935 /* How is it that we are storing to a dead operand[2]?
15936 Well, presumably operands[1] is dead too. We can't
15937 store the result to st(0) as st(0) gets popped on this
15938 instruction. Instead store to operands[2] (which I
15939 think has to be st(1)). st(1) will be popped later.
15940 gcc <= 2.8.1 didn't have this check and generated
15941 assembly code that the Unixware assembler rejected. */
15942 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15943 else
15944 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15945 break;
15946 }
15947
15948 if (STACK_TOP_P (operands[0]))
15949 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
15950 else
15951 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
15952 break;
15953
15954 case MINUS:
15955 case DIV:
15956 if (MEM_P (operands[1]))
15957 {
15958 p = "r%Z1\t%1";
15959 break;
15960 }
15961
15962 if (MEM_P (operands[2]))
15963 {
15964 p = "%Z2\t%2";
15965 break;
15966 }
15967
15968 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
15969 {
15970 #if SYSV386_COMPAT
15971 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
15972 derived assemblers, confusingly reverse the direction of
15973 the operation for fsub{r} and fdiv{r} when the
15974 destination register is not st(0). The Intel assembler
15975 doesn't have this brain damage. Read !SYSV386_COMPAT to
15976 figure out what the hardware really does. */
15977 if (STACK_TOP_P (operands[0]))
15978 p = "{p\t%0, %2|rp\t%2, %0}";
15979 else
15980 p = "{rp\t%2, %0|p\t%0, %2}";
15981 #else
15982 if (STACK_TOP_P (operands[0]))
15983 /* As above for fmul/fadd, we can't store to st(0). */
15984 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
15985 else
15986 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
15987 #endif
15988 break;
15989 }
15990
15991 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
15992 {
15993 #if SYSV386_COMPAT
15994 if (STACK_TOP_P (operands[0]))
15995 p = "{rp\t%0, %1|p\t%1, %0}";
15996 else
15997 p = "{p\t%1, %0|rp\t%0, %1}";
15998 #else
15999 if (STACK_TOP_P (operands[0]))
16000 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
16001 else
16002 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
16003 #endif
16004 break;
16005 }
16006
16007 if (STACK_TOP_P (operands[0]))
16008 {
16009 if (STACK_TOP_P (operands[1]))
16010 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
16011 else
16012 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
16013 break;
16014 }
16015 else if (STACK_TOP_P (operands[1]))
16016 {
16017 #if SYSV386_COMPAT
16018 p = "{\t%1, %0|r\t%0, %1}";
16019 #else
16020 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
16021 #endif
16022 }
16023 else
16024 {
16025 #if SYSV386_COMPAT
16026 p = "{r\t%2, %0|\t%0, %2}";
16027 #else
16028 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
16029 #endif
16030 }
16031 break;
16032
16033 default:
16034 gcc_unreachable ();
16035 }
16036
16037 strcat (buf, p);
16038 return buf;
16039 }
16040
16041 /* Check if a 256bit AVX register is referenced inside of EXP. */
16042
16043 static int
16044 ix86_check_avx256_register (rtx *pexp, void *data ATTRIBUTE_UNUSED)
16045 {
16046 rtx exp = *pexp;
16047
16048 if (GET_CODE (exp) == SUBREG)
16049 exp = SUBREG_REG (exp);
16050
16051 if (REG_P (exp)
16052 && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)))
16053 return 1;
16054
16055 return 0;
16056 }
16057
16058 /* Return needed mode for entity in optimize_mode_switching pass. */
16059
16060 static int
16061 ix86_avx_u128_mode_needed (rtx insn)
16062 {
16063 if (CALL_P (insn))
16064 {
16065 rtx link;
16066
16067 /* Needed mode is set to AVX_U128_CLEAN if there are
16068 no 256bit modes used in function arguments. */
16069 for (link = CALL_INSN_FUNCTION_USAGE (insn);
16070 link;
16071 link = XEXP (link, 1))
16072 {
16073 if (GET_CODE (XEXP (link, 0)) == USE)
16074 {
16075 rtx arg = XEXP (XEXP (link, 0), 0);
16076
16077 if (ix86_check_avx256_register (&arg, NULL))
16078 return AVX_U128_DIRTY;
16079 }
16080 }
16081
16082 return AVX_U128_CLEAN;
16083 }
16084
16085 /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
16086 changes state only when a 256bit register is written to, but we need
16087 to prevent the compiler from moving optimal insertion point above
16088 eventual read from 256bit register. */
16089 if (for_each_rtx (&PATTERN (insn), ix86_check_avx256_register, NULL))
16090 return AVX_U128_DIRTY;
16091
16092 return AVX_U128_ANY;
16093 }
16094
16095 /* Return mode that i387 must be switched into
16096 prior to the execution of insn. */
16097
16098 static int
16099 ix86_i387_mode_needed (int entity, rtx insn)
16100 {
16101 enum attr_i387_cw mode;
16102
16103 /* The mode UNINITIALIZED is used to store control word after a
16104 function call or ASM pattern. The mode ANY specify that function
16105 has no requirements on the control word and make no changes in the
16106 bits we are interested in. */
16107
16108 if (CALL_P (insn)
16109 || (NONJUMP_INSN_P (insn)
16110 && (asm_noperands (PATTERN (insn)) >= 0
16111 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16112 return I387_CW_UNINITIALIZED;
16113
16114 if (recog_memoized (insn) < 0)
16115 return I387_CW_ANY;
16116
16117 mode = get_attr_i387_cw (insn);
16118
16119 switch (entity)
16120 {
16121 case I387_TRUNC:
16122 if (mode == I387_CW_TRUNC)
16123 return mode;
16124 break;
16125
16126 case I387_FLOOR:
16127 if (mode == I387_CW_FLOOR)
16128 return mode;
16129 break;
16130
16131 case I387_CEIL:
16132 if (mode == I387_CW_CEIL)
16133 return mode;
16134 break;
16135
16136 case I387_MASK_PM:
16137 if (mode == I387_CW_MASK_PM)
16138 return mode;
16139 break;
16140
16141 default:
16142 gcc_unreachable ();
16143 }
16144
16145 return I387_CW_ANY;
16146 }
16147
16148 /* Return mode that entity must be switched into
16149 prior to the execution of insn. */
16150
16151 static int
16152 ix86_mode_needed (int entity, rtx insn)
16153 {
16154 switch (entity)
16155 {
16156 case AVX_U128:
16157 return ix86_avx_u128_mode_needed (insn);
16158 case I387_TRUNC:
16159 case I387_FLOOR:
16160 case I387_CEIL:
16161 case I387_MASK_PM:
16162 return ix86_i387_mode_needed (entity, insn);
16163 default:
16164 gcc_unreachable ();
16165 }
16166 return 0;
16167 }
16168
16169 /* Check if a 256bit AVX register is referenced in stores. */
16170
16171 static void
16172 ix86_check_avx256_stores (rtx dest, const_rtx set ATTRIBUTE_UNUSED, void *data)
16173 {
16174 if (ix86_check_avx256_register (&dest, NULL))
16175 {
16176 bool *used = (bool *) data;
16177 *used = true;
16178 }
16179 }
16180
16181 /* Calculate mode of upper 128bit AVX registers after the insn. */
16182
16183 static int
16184 ix86_avx_u128_mode_after (int mode, rtx insn)
16185 {
16186 rtx pat = PATTERN (insn);
16187
16188 if (vzeroupper_operation (pat, VOIDmode)
16189 || vzeroall_operation (pat, VOIDmode))
16190 return AVX_U128_CLEAN;
16191
16192 /* We know that state is clean after CALL insn if there are no
16193 256bit registers used in the function return register. */
16194 if (CALL_P (insn))
16195 {
16196 bool avx_reg256_found = false;
16197 note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16198
16199 return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16200 }
16201
16202 /* Otherwise, return current mode. Remember that if insn
16203 references AVX 256bit registers, the mode was already changed
16204 to DIRTY from MODE_NEEDED. */
16205 return mode;
16206 }
16207
16208 /* Return the mode that an insn results in. */
16209
16210 int
16211 ix86_mode_after (int entity, int mode, rtx insn)
16212 {
16213 switch (entity)
16214 {
16215 case AVX_U128:
16216 return ix86_avx_u128_mode_after (mode, insn);
16217 case I387_TRUNC:
16218 case I387_FLOOR:
16219 case I387_CEIL:
16220 case I387_MASK_PM:
16221 return mode;
16222 default:
16223 gcc_unreachable ();
16224 }
16225 }
16226
16227 static int
16228 ix86_avx_u128_mode_entry (void)
16229 {
16230 tree arg;
16231
16232 /* Entry mode is set to AVX_U128_DIRTY if there are
16233 256bit modes used in function arguments. */
16234 for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16235 arg = TREE_CHAIN (arg))
16236 {
16237 rtx incoming = DECL_INCOMING_RTL (arg);
16238
16239 if (incoming && ix86_check_avx256_register (&incoming, NULL))
16240 return AVX_U128_DIRTY;
16241 }
16242
16243 return AVX_U128_CLEAN;
16244 }
16245
16246 /* Return a mode that ENTITY is assumed to be
16247 switched to at function entry. */
16248
16249 static int
16250 ix86_mode_entry (int entity)
16251 {
16252 switch (entity)
16253 {
16254 case AVX_U128:
16255 return ix86_avx_u128_mode_entry ();
16256 case I387_TRUNC:
16257 case I387_FLOOR:
16258 case I387_CEIL:
16259 case I387_MASK_PM:
16260 return I387_CW_ANY;
16261 default:
16262 gcc_unreachable ();
16263 }
16264 }
16265
16266 static int
16267 ix86_avx_u128_mode_exit (void)
16268 {
16269 rtx reg = crtl->return_rtx;
16270
16271 /* Exit mode is set to AVX_U128_DIRTY if there are
16272 256bit modes used in the function return register. */
16273 if (reg && ix86_check_avx256_register (&reg, NULL))
16274 return AVX_U128_DIRTY;
16275
16276 return AVX_U128_CLEAN;
16277 }
16278
16279 /* Return a mode that ENTITY is assumed to be
16280 switched to at function exit. */
16281
16282 static int
16283 ix86_mode_exit (int entity)
16284 {
16285 switch (entity)
16286 {
16287 case AVX_U128:
16288 return ix86_avx_u128_mode_exit ();
16289 case I387_TRUNC:
16290 case I387_FLOOR:
16291 case I387_CEIL:
16292 case I387_MASK_PM:
16293 return I387_CW_ANY;
16294 default:
16295 gcc_unreachable ();
16296 }
16297 }
16298
16299 static int
16300 ix86_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
16301 {
16302 return n;
16303 }
16304
16305 /* Output code to initialize control word copies used by trunc?f?i and
16306 rounding patterns. CURRENT_MODE is set to current control word,
16307 while NEW_MODE is set to new control word. */
16308
16309 static void
16310 emit_i387_cw_initialization (int mode)
16311 {
16312 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
16313 rtx new_mode;
16314
16315 enum ix86_stack_slot slot;
16316
16317 rtx reg = gen_reg_rtx (HImode);
16318
16319 emit_insn (gen_x86_fnstcw_1 (stored_mode));
16320 emit_move_insn (reg, copy_rtx (stored_mode));
16321
16322 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
16323 || optimize_insn_for_size_p ())
16324 {
16325 switch (mode)
16326 {
16327 case I387_CW_TRUNC:
16328 /* round toward zero (truncate) */
16329 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
16330 slot = SLOT_CW_TRUNC;
16331 break;
16332
16333 case I387_CW_FLOOR:
16334 /* round down toward -oo */
16335 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16336 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
16337 slot = SLOT_CW_FLOOR;
16338 break;
16339
16340 case I387_CW_CEIL:
16341 /* round up toward +oo */
16342 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
16343 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
16344 slot = SLOT_CW_CEIL;
16345 break;
16346
16347 case I387_CW_MASK_PM:
16348 /* mask precision exception for nearbyint() */
16349 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16350 slot = SLOT_CW_MASK_PM;
16351 break;
16352
16353 default:
16354 gcc_unreachable ();
16355 }
16356 }
16357 else
16358 {
16359 switch (mode)
16360 {
16361 case I387_CW_TRUNC:
16362 /* round toward zero (truncate) */
16363 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
16364 slot = SLOT_CW_TRUNC;
16365 break;
16366
16367 case I387_CW_FLOOR:
16368 /* round down toward -oo */
16369 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
16370 slot = SLOT_CW_FLOOR;
16371 break;
16372
16373 case I387_CW_CEIL:
16374 /* round up toward +oo */
16375 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
16376 slot = SLOT_CW_CEIL;
16377 break;
16378
16379 case I387_CW_MASK_PM:
16380 /* mask precision exception for nearbyint() */
16381 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
16382 slot = SLOT_CW_MASK_PM;
16383 break;
16384
16385 default:
16386 gcc_unreachable ();
16387 }
16388 }
16389
16390 gcc_assert (slot < MAX_386_STACK_LOCALS);
16391
16392 new_mode = assign_386_stack_local (HImode, slot);
16393 emit_move_insn (new_mode, reg);
16394 }
16395
16396 /* Emit vzeroupper. */
16397
16398 void
16399 ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
16400 {
16401 int i;
16402
16403 /* Cancel automatic vzeroupper insertion if there are
16404 live call-saved SSE registers at the insertion point. */
16405
16406 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16407 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16408 return;
16409
16410 if (TARGET_64BIT)
16411 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16412 if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
16413 return;
16414
16415 emit_insn (gen_avx_vzeroupper ());
16416 }
16417
16418 /* Generate one or more insns to set ENTITY to MODE. */
16419
16420 /* Generate one or more insns to set ENTITY to MODE. HARD_REG_LIVE
16421 is the set of hard registers live at the point where the insn(s)
16422 are to be inserted. */
16423
16424 static void
16425 ix86_emit_mode_set (int entity, int mode, HARD_REG_SET regs_live)
16426 {
16427 switch (entity)
16428 {
16429 case AVX_U128:
16430 if (mode == AVX_U128_CLEAN)
16431 ix86_avx_emit_vzeroupper (regs_live);
16432 break;
16433 case I387_TRUNC:
16434 case I387_FLOOR:
16435 case I387_CEIL:
16436 case I387_MASK_PM:
16437 if (mode != I387_CW_ANY
16438 && mode != I387_CW_UNINITIALIZED)
16439 emit_i387_cw_initialization (mode);
16440 break;
16441 default:
16442 gcc_unreachable ();
16443 }
16444 }
16445
16446 /* Output code for INSN to convert a float to a signed int. OPERANDS
16447 are the insn operands. The output may be [HSD]Imode and the input
16448 operand may be [SDX]Fmode. */
16449
16450 const char *
16451 output_fix_trunc (rtx insn, rtx *operands, bool fisttp)
16452 {
16453 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16454 int dimode_p = GET_MODE (operands[0]) == DImode;
16455 int round_mode = get_attr_i387_cw (insn);
16456
16457 /* Jump through a hoop or two for DImode, since the hardware has no
16458 non-popping instruction. We used to do this a different way, but
16459 that was somewhat fragile and broke with post-reload splitters. */
16460 if ((dimode_p || fisttp) && !stack_top_dies)
16461 output_asm_insn ("fld\t%y1", operands);
16462
16463 gcc_assert (STACK_TOP_P (operands[1]));
16464 gcc_assert (MEM_P (operands[0]));
16465 gcc_assert (GET_MODE (operands[1]) != TFmode);
16466
16467 if (fisttp)
16468 output_asm_insn ("fisttp%Z0\t%0", operands);
16469 else
16470 {
16471 if (round_mode != I387_CW_ANY)
16472 output_asm_insn ("fldcw\t%3", operands);
16473 if (stack_top_dies || dimode_p)
16474 output_asm_insn ("fistp%Z0\t%0", operands);
16475 else
16476 output_asm_insn ("fist%Z0\t%0", operands);
16477 if (round_mode != I387_CW_ANY)
16478 output_asm_insn ("fldcw\t%2", operands);
16479 }
16480
16481 return "";
16482 }
16483
16484 /* Output code for x87 ffreep insn. The OPNO argument, which may only
16485 have the values zero or one, indicates the ffreep insn's operand
16486 from the OPERANDS array. */
16487
16488 static const char *
16489 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
16490 {
16491 if (TARGET_USE_FFREEP)
16492 #ifdef HAVE_AS_IX86_FFREEP
16493 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
16494 #else
16495 {
16496 static char retval[32];
16497 int regno = REGNO (operands[opno]);
16498
16499 gcc_assert (STACK_REGNO_P (regno));
16500
16501 regno -= FIRST_STACK_REG;
16502
16503 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
16504 return retval;
16505 }
16506 #endif
16507
16508 return opno ? "fstp\t%y1" : "fstp\t%y0";
16509 }
16510
16511
16512 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
16513 should be used. UNORDERED_P is true when fucom should be used. */
16514
16515 const char *
16516 output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
16517 {
16518 int stack_top_dies;
16519 rtx cmp_op0, cmp_op1;
16520 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
16521
16522 if (eflags_p)
16523 {
16524 cmp_op0 = operands[0];
16525 cmp_op1 = operands[1];
16526 }
16527 else
16528 {
16529 cmp_op0 = operands[1];
16530 cmp_op1 = operands[2];
16531 }
16532
16533 if (is_sse)
16534 {
16535 if (GET_MODE (operands[0]) == SFmode)
16536 if (unordered_p)
16537 return "%vucomiss\t{%1, %0|%0, %1}";
16538 else
16539 return "%vcomiss\t{%1, %0|%0, %1}";
16540 else
16541 if (unordered_p)
16542 return "%vucomisd\t{%1, %0|%0, %1}";
16543 else
16544 return "%vcomisd\t{%1, %0|%0, %1}";
16545 }
16546
16547 gcc_assert (STACK_TOP_P (cmp_op0));
16548
16549 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
16550
16551 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
16552 {
16553 if (stack_top_dies)
16554 {
16555 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
16556 return output_387_ffreep (operands, 1);
16557 }
16558 else
16559 return "ftst\n\tfnstsw\t%0";
16560 }
16561
16562 if (STACK_REG_P (cmp_op1)
16563 && stack_top_dies
16564 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
16565 && REGNO (cmp_op1) != FIRST_STACK_REG)
16566 {
16567 /* If both the top of the 387 stack dies, and the other operand
16568 is also a stack register that dies, then this must be a
16569 `fcompp' float compare */
16570
16571 if (eflags_p)
16572 {
16573 /* There is no double popping fcomi variant. Fortunately,
16574 eflags is immune from the fstp's cc clobbering. */
16575 if (unordered_p)
16576 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
16577 else
16578 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
16579 return output_387_ffreep (operands, 0);
16580 }
16581 else
16582 {
16583 if (unordered_p)
16584 return "fucompp\n\tfnstsw\t%0";
16585 else
16586 return "fcompp\n\tfnstsw\t%0";
16587 }
16588 }
16589 else
16590 {
16591 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
16592
16593 static const char * const alt[16] =
16594 {
16595 "fcom%Z2\t%y2\n\tfnstsw\t%0",
16596 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
16597 "fucom%Z2\t%y2\n\tfnstsw\t%0",
16598 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
16599
16600 "ficom%Z2\t%y2\n\tfnstsw\t%0",
16601 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
16602 NULL,
16603 NULL,
16604
16605 "fcomi\t{%y1, %0|%0, %y1}",
16606 "fcomip\t{%y1, %0|%0, %y1}",
16607 "fucomi\t{%y1, %0|%0, %y1}",
16608 "fucomip\t{%y1, %0|%0, %y1}",
16609
16610 NULL,
16611 NULL,
16612 NULL,
16613 NULL
16614 };
16615
16616 int mask;
16617 const char *ret;
16618
16619 mask = eflags_p << 3;
16620 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
16621 mask |= unordered_p << 1;
16622 mask |= stack_top_dies;
16623
16624 gcc_assert (mask < 16);
16625 ret = alt[mask];
16626 gcc_assert (ret);
16627
16628 return ret;
16629 }
16630 }
16631
16632 void
16633 ix86_output_addr_vec_elt (FILE *file, int value)
16634 {
16635 const char *directive = ASM_LONG;
16636
16637 #ifdef ASM_QUAD
16638 if (TARGET_LP64)
16639 directive = ASM_QUAD;
16640 #else
16641 gcc_assert (!TARGET_64BIT);
16642 #endif
16643
16644 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
16645 }
16646
16647 void
16648 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
16649 {
16650 const char *directive = ASM_LONG;
16651
16652 #ifdef ASM_QUAD
16653 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
16654 directive = ASM_QUAD;
16655 #else
16656 gcc_assert (!TARGET_64BIT);
16657 #endif
16658 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
16659 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
16660 fprintf (file, "%s%s%d-%s%d\n",
16661 directive, LPREFIX, value, LPREFIX, rel);
16662 else if (HAVE_AS_GOTOFF_IN_DATA)
16663 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
16664 #if TARGET_MACHO
16665 else if (TARGET_MACHO)
16666 {
16667 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
16668 machopic_output_function_base_name (file);
16669 putc ('\n', file);
16670 }
16671 #endif
16672 else
16673 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
16674 GOT_SYMBOL_NAME, LPREFIX, value);
16675 }
16676 \f
16677 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
16678 for the target. */
16679
16680 void
16681 ix86_expand_clear (rtx dest)
16682 {
16683 rtx tmp;
16684
16685 /* We play register width games, which are only valid after reload. */
16686 gcc_assert (reload_completed);
16687
16688 /* Avoid HImode and its attendant prefix byte. */
16689 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
16690 dest = gen_rtx_REG (SImode, REGNO (dest));
16691 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
16692
16693 if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
16694 {
16695 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
16696 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
16697 }
16698
16699 emit_insn (tmp);
16700 }
16701
16702 /* X is an unchanging MEM. If it is a constant pool reference, return
16703 the constant pool rtx, else NULL. */
16704
16705 rtx
16706 maybe_get_pool_constant (rtx x)
16707 {
16708 x = ix86_delegitimize_address (XEXP (x, 0));
16709
16710 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
16711 return get_pool_constant (x);
16712
16713 return NULL_RTX;
16714 }
16715
16716 void
16717 ix86_expand_move (enum machine_mode mode, rtx operands[])
16718 {
16719 rtx op0, op1;
16720 enum tls_model model;
16721
16722 op0 = operands[0];
16723 op1 = operands[1];
16724
16725 if (GET_CODE (op1) == SYMBOL_REF)
16726 {
16727 rtx tmp;
16728
16729 model = SYMBOL_REF_TLS_MODEL (op1);
16730 if (model)
16731 {
16732 op1 = legitimize_tls_address (op1, model, true);
16733 op1 = force_operand (op1, op0);
16734 if (op1 == op0)
16735 return;
16736 op1 = convert_to_mode (mode, op1, 1);
16737 }
16738 else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
16739 op1 = tmp;
16740 }
16741 else if (GET_CODE (op1) == CONST
16742 && GET_CODE (XEXP (op1, 0)) == PLUS
16743 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
16744 {
16745 rtx addend = XEXP (XEXP (op1, 0), 1);
16746 rtx symbol = XEXP (XEXP (op1, 0), 0);
16747 rtx tmp;
16748
16749 model = SYMBOL_REF_TLS_MODEL (symbol);
16750 if (model)
16751 tmp = legitimize_tls_address (symbol, model, true);
16752 else
16753 tmp = legitimize_pe_coff_symbol (symbol, true);
16754
16755 if (tmp)
16756 {
16757 tmp = force_operand (tmp, NULL);
16758 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
16759 op0, 1, OPTAB_DIRECT);
16760 if (tmp == op0)
16761 return;
16762 op1 = convert_to_mode (mode, tmp, 1);
16763 }
16764 }
16765
16766 if ((flag_pic || MACHOPIC_INDIRECT)
16767 && symbolic_operand (op1, mode))
16768 {
16769 if (TARGET_MACHO && !TARGET_64BIT)
16770 {
16771 #if TARGET_MACHO
16772 /* dynamic-no-pic */
16773 if (MACHOPIC_INDIRECT)
16774 {
16775 rtx temp = ((reload_in_progress
16776 || ((op0 && REG_P (op0))
16777 && mode == Pmode))
16778 ? op0 : gen_reg_rtx (Pmode));
16779 op1 = machopic_indirect_data_reference (op1, temp);
16780 if (MACHOPIC_PURE)
16781 op1 = machopic_legitimize_pic_address (op1, mode,
16782 temp == op1 ? 0 : temp);
16783 }
16784 if (op0 != op1 && GET_CODE (op0) != MEM)
16785 {
16786 rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
16787 emit_insn (insn);
16788 return;
16789 }
16790 if (GET_CODE (op0) == MEM)
16791 op1 = force_reg (Pmode, op1);
16792 else
16793 {
16794 rtx temp = op0;
16795 if (GET_CODE (temp) != REG)
16796 temp = gen_reg_rtx (Pmode);
16797 temp = legitimize_pic_address (op1, temp);
16798 if (temp == op0)
16799 return;
16800 op1 = temp;
16801 }
16802 /* dynamic-no-pic */
16803 #endif
16804 }
16805 else
16806 {
16807 if (MEM_P (op0))
16808 op1 = force_reg (mode, op1);
16809 else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
16810 {
16811 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
16812 op1 = legitimize_pic_address (op1, reg);
16813 if (op0 == op1)
16814 return;
16815 op1 = convert_to_mode (mode, op1, 1);
16816 }
16817 }
16818 }
16819 else
16820 {
16821 if (MEM_P (op0)
16822 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
16823 || !push_operand (op0, mode))
16824 && MEM_P (op1))
16825 op1 = force_reg (mode, op1);
16826
16827 if (push_operand (op0, mode)
16828 && ! general_no_elim_operand (op1, mode))
16829 op1 = copy_to_mode_reg (mode, op1);
16830
16831 /* Force large constants in 64bit compilation into register
16832 to get them CSEed. */
16833 if (can_create_pseudo_p ()
16834 && (mode == DImode) && TARGET_64BIT
16835 && immediate_operand (op1, mode)
16836 && !x86_64_zext_immediate_operand (op1, VOIDmode)
16837 && !register_operand (op0, mode)
16838 && optimize)
16839 op1 = copy_to_mode_reg (mode, op1);
16840
16841 if (can_create_pseudo_p ()
16842 && FLOAT_MODE_P (mode)
16843 && GET_CODE (op1) == CONST_DOUBLE)
16844 {
16845 /* If we are loading a floating point constant to a register,
16846 force the value to memory now, since we'll get better code
16847 out the back end. */
16848
16849 op1 = validize_mem (force_const_mem (mode, op1));
16850 if (!register_operand (op0, mode))
16851 {
16852 rtx temp = gen_reg_rtx (mode);
16853 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
16854 emit_move_insn (op0, temp);
16855 return;
16856 }
16857 }
16858 }
16859
16860 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16861 }
16862
16863 void
16864 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
16865 {
16866 rtx op0 = operands[0], op1 = operands[1];
16867 unsigned int align = GET_MODE_ALIGNMENT (mode);
16868
16869 if (push_operand (op0, VOIDmode))
16870 op0 = emit_move_resolve_push (mode, op0);
16871
16872 /* Force constants other than zero into memory. We do not know how
16873 the instructions used to build constants modify the upper 64 bits
16874 of the register, once we have that information we may be able
16875 to handle some of them more efficiently. */
16876 if (can_create_pseudo_p ()
16877 && register_operand (op0, mode)
16878 && (CONSTANT_P (op1)
16879 || (GET_CODE (op1) == SUBREG
16880 && CONSTANT_P (SUBREG_REG (op1))))
16881 && !standard_sse_constant_p (op1))
16882 op1 = validize_mem (force_const_mem (mode, op1));
16883
16884 /* We need to check memory alignment for SSE mode since attribute
16885 can make operands unaligned. */
16886 if (can_create_pseudo_p ()
16887 && SSE_REG_MODE_P (mode)
16888 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
16889 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
16890 {
16891 rtx tmp[2];
16892
16893 /* ix86_expand_vector_move_misalign() does not like constants ... */
16894 if (CONSTANT_P (op1)
16895 || (GET_CODE (op1) == SUBREG
16896 && CONSTANT_P (SUBREG_REG (op1))))
16897 op1 = validize_mem (force_const_mem (mode, op1));
16898
16899 /* ... nor both arguments in memory. */
16900 if (!register_operand (op0, mode)
16901 && !register_operand (op1, mode))
16902 op1 = force_reg (mode, op1);
16903
16904 tmp[0] = op0; tmp[1] = op1;
16905 ix86_expand_vector_move_misalign (mode, tmp);
16906 return;
16907 }
16908
16909 /* Make operand1 a register if it isn't already. */
16910 if (can_create_pseudo_p ()
16911 && !register_operand (op0, mode)
16912 && !register_operand (op1, mode))
16913 {
16914 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
16915 return;
16916 }
16917
16918 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16919 }
16920
16921 /* Split 32-byte AVX unaligned load and store if needed. */
16922
16923 static void
16924 ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
16925 {
16926 rtx m;
16927 rtx (*extract) (rtx, rtx, rtx);
16928 rtx (*load_unaligned) (rtx, rtx);
16929 rtx (*store_unaligned) (rtx, rtx);
16930 enum machine_mode mode;
16931
16932 switch (GET_MODE (op0))
16933 {
16934 default:
16935 gcc_unreachable ();
16936 case V32QImode:
16937 extract = gen_avx_vextractf128v32qi;
16938 load_unaligned = gen_avx_loaddquv32qi;
16939 store_unaligned = gen_avx_storedquv32qi;
16940 mode = V16QImode;
16941 break;
16942 case V8SFmode:
16943 extract = gen_avx_vextractf128v8sf;
16944 load_unaligned = gen_avx_loadups256;
16945 store_unaligned = gen_avx_storeups256;
16946 mode = V4SFmode;
16947 break;
16948 case V4DFmode:
16949 extract = gen_avx_vextractf128v4df;
16950 load_unaligned = gen_avx_loadupd256;
16951 store_unaligned = gen_avx_storeupd256;
16952 mode = V2DFmode;
16953 break;
16954 }
16955
16956 if (MEM_P (op1))
16957 {
16958 if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD)
16959 {
16960 rtx r = gen_reg_rtx (mode);
16961 m = adjust_address (op1, mode, 0);
16962 emit_move_insn (r, m);
16963 m = adjust_address (op1, mode, 16);
16964 r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
16965 emit_move_insn (op0, r);
16966 }
16967 /* Normal *mov<mode>_internal pattern will handle
16968 unaligned loads just fine if misaligned_operand
16969 is true, and without the UNSPEC it can be combined
16970 with arithmetic instructions. */
16971 else if (misaligned_operand (op1, GET_MODE (op1)))
16972 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
16973 else
16974 emit_insn (load_unaligned (op0, op1));
16975 }
16976 else if (MEM_P (op0))
16977 {
16978 if (TARGET_AVX256_SPLIT_UNALIGNED_STORE)
16979 {
16980 m = adjust_address (op0, mode, 0);
16981 emit_insn (extract (m, op1, const0_rtx));
16982 m = adjust_address (op0, mode, 16);
16983 emit_insn (extract (m, op1, const1_rtx));
16984 }
16985 else
16986 emit_insn (store_unaligned (op0, op1));
16987 }
16988 else
16989 gcc_unreachable ();
16990 }
16991
16992 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
16993 straight to ix86_expand_vector_move. */
16994 /* Code generation for scalar reg-reg moves of single and double precision data:
16995 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
16996 movaps reg, reg
16997 else
16998 movss reg, reg
16999 if (x86_sse_partial_reg_dependency == true)
17000 movapd reg, reg
17001 else
17002 movsd reg, reg
17003
17004 Code generation for scalar loads of double precision data:
17005 if (x86_sse_split_regs == true)
17006 movlpd mem, reg (gas syntax)
17007 else
17008 movsd mem, reg
17009
17010 Code generation for unaligned packed loads of single precision data
17011 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17012 if (x86_sse_unaligned_move_optimal)
17013 movups mem, reg
17014
17015 if (x86_sse_partial_reg_dependency == true)
17016 {
17017 xorps reg, reg
17018 movlps mem, reg
17019 movhps mem+8, reg
17020 }
17021 else
17022 {
17023 movlps mem, reg
17024 movhps mem+8, reg
17025 }
17026
17027 Code generation for unaligned packed loads of double precision data
17028 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17029 if (x86_sse_unaligned_move_optimal)
17030 movupd mem, reg
17031
17032 if (x86_sse_split_regs == true)
17033 {
17034 movlpd mem, reg
17035 movhpd mem+8, reg
17036 }
17037 else
17038 {
17039 movsd mem, reg
17040 movhpd mem+8, reg
17041 }
17042 */
17043
17044 void
17045 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
17046 {
17047 rtx op0, op1, orig_op0 = NULL_RTX, m;
17048 rtx (*load_unaligned) (rtx, rtx);
17049 rtx (*store_unaligned) (rtx, rtx);
17050
17051 op0 = operands[0];
17052 op1 = operands[1];
17053
17054 if (GET_MODE_SIZE (mode) == 64)
17055 {
17056 switch (GET_MODE_CLASS (mode))
17057 {
17058 case MODE_VECTOR_INT:
17059 case MODE_INT:
17060 if (GET_MODE (op0) != V16SImode)
17061 {
17062 if (!MEM_P (op0))
17063 {
17064 orig_op0 = op0;
17065 op0 = gen_reg_rtx (V16SImode);
17066 }
17067 else
17068 op0 = gen_lowpart (V16SImode, op0);
17069 }
17070 op1 = gen_lowpart (V16SImode, op1);
17071 /* FALLTHRU */
17072
17073 case MODE_VECTOR_FLOAT:
17074 switch (GET_MODE (op0))
17075 {
17076 default:
17077 gcc_unreachable ();
17078 case V16SImode:
17079 load_unaligned = gen_avx512f_loaddquv16si;
17080 store_unaligned = gen_avx512f_storedquv16si;
17081 break;
17082 case V16SFmode:
17083 load_unaligned = gen_avx512f_loadups512;
17084 store_unaligned = gen_avx512f_storeups512;
17085 break;
17086 case V8DFmode:
17087 load_unaligned = gen_avx512f_loadupd512;
17088 store_unaligned = gen_avx512f_storeupd512;
17089 break;
17090 }
17091
17092 if (MEM_P (op1))
17093 emit_insn (load_unaligned (op0, op1));
17094 else if (MEM_P (op0))
17095 emit_insn (store_unaligned (op0, op1));
17096 else
17097 gcc_unreachable ();
17098 if (orig_op0)
17099 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17100 break;
17101
17102 default:
17103 gcc_unreachable ();
17104 }
17105
17106 return;
17107 }
17108
17109 if (TARGET_AVX
17110 && GET_MODE_SIZE (mode) == 32)
17111 {
17112 switch (GET_MODE_CLASS (mode))
17113 {
17114 case MODE_VECTOR_INT:
17115 case MODE_INT:
17116 if (GET_MODE (op0) != V32QImode)
17117 {
17118 if (!MEM_P (op0))
17119 {
17120 orig_op0 = op0;
17121 op0 = gen_reg_rtx (V32QImode);
17122 }
17123 else
17124 op0 = gen_lowpart (V32QImode, op0);
17125 }
17126 op1 = gen_lowpart (V32QImode, op1);
17127 /* FALLTHRU */
17128
17129 case MODE_VECTOR_FLOAT:
17130 ix86_avx256_split_vector_move_misalign (op0, op1);
17131 if (orig_op0)
17132 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17133 break;
17134
17135 default:
17136 gcc_unreachable ();
17137 }
17138
17139 return;
17140 }
17141
17142 if (MEM_P (op1))
17143 {
17144 /* Normal *mov<mode>_internal pattern will handle
17145 unaligned loads just fine if misaligned_operand
17146 is true, and without the UNSPEC it can be combined
17147 with arithmetic instructions. */
17148 if (TARGET_AVX
17149 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17150 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17151 && misaligned_operand (op1, GET_MODE (op1)))
17152 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17153 /* ??? If we have typed data, then it would appear that using
17154 movdqu is the only way to get unaligned data loaded with
17155 integer type. */
17156 else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17157 {
17158 if (GET_MODE (op0) != V16QImode)
17159 {
17160 orig_op0 = op0;
17161 op0 = gen_reg_rtx (V16QImode);
17162 }
17163 op1 = gen_lowpart (V16QImode, op1);
17164 /* We will eventually emit movups based on insn attributes. */
17165 emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17166 if (orig_op0)
17167 emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17168 }
17169 else if (TARGET_SSE2 && mode == V2DFmode)
17170 {
17171 rtx zero;
17172
17173 if (TARGET_AVX
17174 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17175 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17176 || optimize_insn_for_size_p ())
17177 {
17178 /* We will eventually emit movups based on insn attributes. */
17179 emit_insn (gen_sse2_loadupd (op0, op1));
17180 return;
17181 }
17182
17183 /* When SSE registers are split into halves, we can avoid
17184 writing to the top half twice. */
17185 if (TARGET_SSE_SPLIT_REGS)
17186 {
17187 emit_clobber (op0);
17188 zero = op0;
17189 }
17190 else
17191 {
17192 /* ??? Not sure about the best option for the Intel chips.
17193 The following would seem to satisfy; the register is
17194 entirely cleared, breaking the dependency chain. We
17195 then store to the upper half, with a dependency depth
17196 of one. A rumor has it that Intel recommends two movsd
17197 followed by an unpacklpd, but this is unconfirmed. And
17198 given that the dependency depth of the unpacklpd would
17199 still be one, I'm not sure why this would be better. */
17200 zero = CONST0_RTX (V2DFmode);
17201 }
17202
17203 m = adjust_address (op1, DFmode, 0);
17204 emit_insn (gen_sse2_loadlpd (op0, zero, m));
17205 m = adjust_address (op1, DFmode, 8);
17206 emit_insn (gen_sse2_loadhpd (op0, op0, m));
17207 }
17208 else
17209 {
17210 rtx t;
17211
17212 if (TARGET_AVX
17213 || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17214 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17215 || optimize_insn_for_size_p ())
17216 {
17217 if (GET_MODE (op0) != V4SFmode)
17218 {
17219 orig_op0 = op0;
17220 op0 = gen_reg_rtx (V4SFmode);
17221 }
17222 op1 = gen_lowpart (V4SFmode, op1);
17223 emit_insn (gen_sse_loadups (op0, op1));
17224 if (orig_op0)
17225 emit_move_insn (orig_op0,
17226 gen_lowpart (GET_MODE (orig_op0), op0));
17227 return;
17228 }
17229
17230 if (mode != V4SFmode)
17231 t = gen_reg_rtx (V4SFmode);
17232 else
17233 t = op0;
17234
17235 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17236 emit_move_insn (t, CONST0_RTX (V4SFmode));
17237 else
17238 emit_clobber (t);
17239
17240 m = adjust_address (op1, V2SFmode, 0);
17241 emit_insn (gen_sse_loadlps (t, t, m));
17242 m = adjust_address (op1, V2SFmode, 8);
17243 emit_insn (gen_sse_loadhps (t, t, m));
17244 if (mode != V4SFmode)
17245 emit_move_insn (op0, gen_lowpart (mode, t));
17246 }
17247 }
17248 else if (MEM_P (op0))
17249 {
17250 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17251 {
17252 op0 = gen_lowpart (V16QImode, op0);
17253 op1 = gen_lowpart (V16QImode, op1);
17254 /* We will eventually emit movups based on insn attributes. */
17255 emit_insn (gen_sse2_storedquv16qi (op0, op1));
17256 }
17257 else if (TARGET_SSE2 && mode == V2DFmode)
17258 {
17259 if (TARGET_AVX
17260 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17261 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17262 || optimize_insn_for_size_p ())
17263 /* We will eventually emit movups based on insn attributes. */
17264 emit_insn (gen_sse2_storeupd (op0, op1));
17265 else
17266 {
17267 m = adjust_address (op0, DFmode, 0);
17268 emit_insn (gen_sse2_storelpd (m, op1));
17269 m = adjust_address (op0, DFmode, 8);
17270 emit_insn (gen_sse2_storehpd (m, op1));
17271 }
17272 }
17273 else
17274 {
17275 if (mode != V4SFmode)
17276 op1 = gen_lowpart (V4SFmode, op1);
17277
17278 if (TARGET_AVX
17279 || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17280 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17281 || optimize_insn_for_size_p ())
17282 {
17283 op0 = gen_lowpart (V4SFmode, op0);
17284 emit_insn (gen_sse_storeups (op0, op1));
17285 }
17286 else
17287 {
17288 m = adjust_address (op0, V2SFmode, 0);
17289 emit_insn (gen_sse_storelps (m, op1));
17290 m = adjust_address (op0, V2SFmode, 8);
17291 emit_insn (gen_sse_storehps (m, op1));
17292 }
17293 }
17294 }
17295 else
17296 gcc_unreachable ();
17297 }
17298
17299 /* Helper function of ix86_fixup_binary_operands to canonicalize
17300 operand order. Returns true if the operands should be swapped. */
17301
17302 static bool
17303 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
17304 rtx operands[])
17305 {
17306 rtx dst = operands[0];
17307 rtx src1 = operands[1];
17308 rtx src2 = operands[2];
17309
17310 /* If the operation is not commutative, we can't do anything. */
17311 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
17312 return false;
17313
17314 /* Highest priority is that src1 should match dst. */
17315 if (rtx_equal_p (dst, src1))
17316 return false;
17317 if (rtx_equal_p (dst, src2))
17318 return true;
17319
17320 /* Next highest priority is that immediate constants come second. */
17321 if (immediate_operand (src2, mode))
17322 return false;
17323 if (immediate_operand (src1, mode))
17324 return true;
17325
17326 /* Lowest priority is that memory references should come second. */
17327 if (MEM_P (src2))
17328 return false;
17329 if (MEM_P (src1))
17330 return true;
17331
17332 return false;
17333 }
17334
17335
17336 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
17337 destination to use for the operation. If different from the true
17338 destination in operands[0], a copy operation will be required. */
17339
17340 rtx
17341 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
17342 rtx operands[])
17343 {
17344 rtx dst = operands[0];
17345 rtx src1 = operands[1];
17346 rtx src2 = operands[2];
17347
17348 /* Canonicalize operand order. */
17349 if (ix86_swap_binary_operands_p (code, mode, operands))
17350 {
17351 rtx temp;
17352
17353 /* It is invalid to swap operands of different modes. */
17354 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
17355
17356 temp = src1;
17357 src1 = src2;
17358 src2 = temp;
17359 }
17360
17361 /* Both source operands cannot be in memory. */
17362 if (MEM_P (src1) && MEM_P (src2))
17363 {
17364 /* Optimization: Only read from memory once. */
17365 if (rtx_equal_p (src1, src2))
17366 {
17367 src2 = force_reg (mode, src2);
17368 src1 = src2;
17369 }
17370 else if (rtx_equal_p (dst, src1))
17371 src2 = force_reg (mode, src2);
17372 else
17373 src1 = force_reg (mode, src1);
17374 }
17375
17376 /* If the destination is memory, and we do not have matching source
17377 operands, do things in registers. */
17378 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17379 dst = gen_reg_rtx (mode);
17380
17381 /* Source 1 cannot be a constant. */
17382 if (CONSTANT_P (src1))
17383 src1 = force_reg (mode, src1);
17384
17385 /* Source 1 cannot be a non-matching memory. */
17386 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17387 src1 = force_reg (mode, src1);
17388
17389 /* Improve address combine. */
17390 if (code == PLUS
17391 && GET_MODE_CLASS (mode) == MODE_INT
17392 && MEM_P (src2))
17393 src2 = force_reg (mode, src2);
17394
17395 operands[1] = src1;
17396 operands[2] = src2;
17397 return dst;
17398 }
17399
17400 /* Similarly, but assume that the destination has already been
17401 set up properly. */
17402
17403 void
17404 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
17405 enum machine_mode mode, rtx operands[])
17406 {
17407 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
17408 gcc_assert (dst == operands[0]);
17409 }
17410
17411 /* Attempt to expand a binary operator. Make the expansion closer to the
17412 actual machine, then just general_operand, which will allow 3 separate
17413 memory references (one output, two input) in a single insn. */
17414
17415 void
17416 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
17417 rtx operands[])
17418 {
17419 rtx src1, src2, dst, op, clob;
17420
17421 dst = ix86_fixup_binary_operands (code, mode, operands);
17422 src1 = operands[1];
17423 src2 = operands[2];
17424
17425 /* Emit the instruction. */
17426
17427 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
17428 if (reload_in_progress)
17429 {
17430 /* Reload doesn't know about the flags register, and doesn't know that
17431 it doesn't want to clobber it. We can only do this with PLUS. */
17432 gcc_assert (code == PLUS);
17433 emit_insn (op);
17434 }
17435 else if (reload_completed
17436 && code == PLUS
17437 && !rtx_equal_p (dst, src1))
17438 {
17439 /* This is going to be an LEA; avoid splitting it later. */
17440 emit_insn (op);
17441 }
17442 else
17443 {
17444 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17445 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17446 }
17447
17448 /* Fix up the destination if needed. */
17449 if (dst != operands[0])
17450 emit_move_insn (operands[0], dst);
17451 }
17452
17453 /* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
17454 the given OPERANDS. */
17455
17456 void
17457 ix86_expand_vector_logical_operator (enum rtx_code code, enum machine_mode mode,
17458 rtx operands[])
17459 {
17460 rtx op1 = NULL_RTX, op2 = NULL_RTX;
17461 if (GET_CODE (operands[1]) == SUBREG)
17462 {
17463 op1 = operands[1];
17464 op2 = operands[2];
17465 }
17466 else if (GET_CODE (operands[2]) == SUBREG)
17467 {
17468 op1 = operands[2];
17469 op2 = operands[1];
17470 }
17471 /* Optimize (__m128i) d | (__m128i) e and similar code
17472 when d and e are float vectors into float vector logical
17473 insn. In C/C++ without using intrinsics there is no other way
17474 to express vector logical operation on float vectors than
17475 to cast them temporarily to integer vectors. */
17476 if (op1
17477 && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17478 && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
17479 && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
17480 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
17481 && SUBREG_BYTE (op1) == 0
17482 && (GET_CODE (op2) == CONST_VECTOR
17483 || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
17484 && SUBREG_BYTE (op2) == 0))
17485 && can_create_pseudo_p ())
17486 {
17487 rtx dst;
17488 switch (GET_MODE (SUBREG_REG (op1)))
17489 {
17490 case V4SFmode:
17491 case V8SFmode:
17492 case V2DFmode:
17493 case V4DFmode:
17494 dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
17495 if (GET_CODE (op2) == CONST_VECTOR)
17496 {
17497 op2 = gen_lowpart (GET_MODE (dst), op2);
17498 op2 = force_reg (GET_MODE (dst), op2);
17499 }
17500 else
17501 {
17502 op1 = operands[1];
17503 op2 = SUBREG_REG (operands[2]);
17504 if (!nonimmediate_operand (op2, GET_MODE (dst)))
17505 op2 = force_reg (GET_MODE (dst), op2);
17506 }
17507 op1 = SUBREG_REG (op1);
17508 if (!nonimmediate_operand (op1, GET_MODE (dst)))
17509 op1 = force_reg (GET_MODE (dst), op1);
17510 emit_insn (gen_rtx_SET (VOIDmode, dst,
17511 gen_rtx_fmt_ee (code, GET_MODE (dst),
17512 op1, op2)));
17513 emit_move_insn (operands[0], gen_lowpart (mode, dst));
17514 return;
17515 default:
17516 break;
17517 }
17518 }
17519 if (!nonimmediate_operand (operands[1], mode))
17520 operands[1] = force_reg (mode, operands[1]);
17521 if (!nonimmediate_operand (operands[2], mode))
17522 operands[2] = force_reg (mode, operands[2]);
17523 ix86_fixup_binary_operands_no_copy (code, mode, operands);
17524 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17525 gen_rtx_fmt_ee (code, mode, operands[1],
17526 operands[2])));
17527 }
17528
17529 /* Return TRUE or FALSE depending on whether the binary operator meets the
17530 appropriate constraints. */
17531
17532 bool
17533 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
17534 rtx operands[3])
17535 {
17536 rtx dst = operands[0];
17537 rtx src1 = operands[1];
17538 rtx src2 = operands[2];
17539
17540 /* Both source operands cannot be in memory. */
17541 if (MEM_P (src1) && MEM_P (src2))
17542 return false;
17543
17544 /* Canonicalize operand order for commutative operators. */
17545 if (ix86_swap_binary_operands_p (code, mode, operands))
17546 {
17547 rtx temp = src1;
17548 src1 = src2;
17549 src2 = temp;
17550 }
17551
17552 /* If the destination is memory, we must have a matching source operand. */
17553 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
17554 return false;
17555
17556 /* Source 1 cannot be a constant. */
17557 if (CONSTANT_P (src1))
17558 return false;
17559
17560 /* Source 1 cannot be a non-matching memory. */
17561 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
17562 /* Support "andhi/andsi/anddi" as a zero-extending move. */
17563 return (code == AND
17564 && (mode == HImode
17565 || mode == SImode
17566 || (TARGET_64BIT && mode == DImode))
17567 && satisfies_constraint_L (src2));
17568
17569 return true;
17570 }
17571
17572 /* Attempt to expand a unary operator. Make the expansion closer to the
17573 actual machine, then just general_operand, which will allow 2 separate
17574 memory references (one output, one input) in a single insn. */
17575
17576 void
17577 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
17578 rtx operands[])
17579 {
17580 int matching_memory;
17581 rtx src, dst, op, clob;
17582
17583 dst = operands[0];
17584 src = operands[1];
17585
17586 /* If the destination is memory, and we do not have matching source
17587 operands, do things in registers. */
17588 matching_memory = 0;
17589 if (MEM_P (dst))
17590 {
17591 if (rtx_equal_p (dst, src))
17592 matching_memory = 1;
17593 else
17594 dst = gen_reg_rtx (mode);
17595 }
17596
17597 /* When source operand is memory, destination must match. */
17598 if (MEM_P (src) && !matching_memory)
17599 src = force_reg (mode, src);
17600
17601 /* Emit the instruction. */
17602
17603 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
17604 if (reload_in_progress || code == NOT)
17605 {
17606 /* Reload doesn't know about the flags register, and doesn't know that
17607 it doesn't want to clobber it. */
17608 gcc_assert (code == NOT);
17609 emit_insn (op);
17610 }
17611 else
17612 {
17613 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17614 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
17615 }
17616
17617 /* Fix up the destination if needed. */
17618 if (dst != operands[0])
17619 emit_move_insn (operands[0], dst);
17620 }
17621
17622 /* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
17623 divisor are within the range [0-255]. */
17624
17625 void
17626 ix86_split_idivmod (enum machine_mode mode, rtx operands[],
17627 bool signed_p)
17628 {
17629 rtx end_label, qimode_label;
17630 rtx insn, div, mod;
17631 rtx scratch, tmp0, tmp1, tmp2;
17632 rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
17633 rtx (*gen_zero_extend) (rtx, rtx);
17634 rtx (*gen_test_ccno_1) (rtx, rtx);
17635
17636 switch (mode)
17637 {
17638 case SImode:
17639 gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
17640 gen_test_ccno_1 = gen_testsi_ccno_1;
17641 gen_zero_extend = gen_zero_extendqisi2;
17642 break;
17643 case DImode:
17644 gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
17645 gen_test_ccno_1 = gen_testdi_ccno_1;
17646 gen_zero_extend = gen_zero_extendqidi2;
17647 break;
17648 default:
17649 gcc_unreachable ();
17650 }
17651
17652 end_label = gen_label_rtx ();
17653 qimode_label = gen_label_rtx ();
17654
17655 scratch = gen_reg_rtx (mode);
17656
17657 /* Use 8bit unsigned divimod if dividend and divisor are within
17658 the range [0-255]. */
17659 emit_move_insn (scratch, operands[2]);
17660 scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
17661 scratch, 1, OPTAB_DIRECT);
17662 emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
17663 tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
17664 tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
17665 tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
17666 gen_rtx_LABEL_REF (VOIDmode, qimode_label),
17667 pc_rtx);
17668 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
17669 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17670 JUMP_LABEL (insn) = qimode_label;
17671
17672 /* Generate original signed/unsigned divimod. */
17673 div = gen_divmod4_1 (operands[0], operands[1],
17674 operands[2], operands[3]);
17675 emit_insn (div);
17676
17677 /* Branch to the end. */
17678 emit_jump_insn (gen_jump (end_label));
17679 emit_barrier ();
17680
17681 /* Generate 8bit unsigned divide. */
17682 emit_label (qimode_label);
17683 /* Don't use operands[0] for result of 8bit divide since not all
17684 registers support QImode ZERO_EXTRACT. */
17685 tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
17686 tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
17687 tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
17688 emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
17689
17690 if (signed_p)
17691 {
17692 div = gen_rtx_DIV (SImode, operands[2], operands[3]);
17693 mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
17694 }
17695 else
17696 {
17697 div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
17698 mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
17699 }
17700
17701 /* Extract remainder from AH. */
17702 tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
17703 if (REG_P (operands[1]))
17704 insn = emit_move_insn (operands[1], tmp1);
17705 else
17706 {
17707 /* Need a new scratch register since the old one has result
17708 of 8bit divide. */
17709 scratch = gen_reg_rtx (mode);
17710 emit_move_insn (scratch, tmp1);
17711 insn = emit_move_insn (operands[1], scratch);
17712 }
17713 set_unique_reg_note (insn, REG_EQUAL, mod);
17714
17715 /* Zero extend quotient from AL. */
17716 tmp1 = gen_lowpart (QImode, tmp0);
17717 insn = emit_insn (gen_zero_extend (operands[0], tmp1));
17718 set_unique_reg_note (insn, REG_EQUAL, div);
17719
17720 emit_label (end_label);
17721 }
17722
17723 /* Whether it is OK to emit CFI directives when emitting asm code. */
17724
17725 bool
17726 ix86_emit_cfi ()
17727 {
17728 return dwarf2out_do_cfi_asm ();
17729 }
17730
17731 #define LEA_MAX_STALL (3)
17732 #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
17733
17734 /* Increase given DISTANCE in half-cycles according to
17735 dependencies between PREV and NEXT instructions.
17736 Add 1 half-cycle if there is no dependency and
17737 go to next cycle if there is some dependecy. */
17738
17739 static unsigned int
17740 increase_distance (rtx prev, rtx next, unsigned int distance)
17741 {
17742 df_ref *use_rec;
17743 df_ref *def_rec;
17744
17745 if (!prev || !next)
17746 return distance + (distance & 1) + 2;
17747
17748 if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
17749 return distance + 1;
17750
17751 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
17752 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
17753 if (!DF_REF_IS_ARTIFICIAL (*def_rec)
17754 && DF_REF_REGNO (*use_rec) == DF_REF_REGNO (*def_rec))
17755 return distance + (distance & 1) + 2;
17756
17757 return distance + 1;
17758 }
17759
17760 /* Function checks if instruction INSN defines register number
17761 REGNO1 or REGNO2. */
17762
17763 static bool
17764 insn_defines_reg (unsigned int regno1, unsigned int regno2,
17765 rtx insn)
17766 {
17767 df_ref *def_rec;
17768
17769 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
17770 if (DF_REF_REG_DEF_P (*def_rec)
17771 && !DF_REF_IS_ARTIFICIAL (*def_rec)
17772 && (regno1 == DF_REF_REGNO (*def_rec)
17773 || regno2 == DF_REF_REGNO (*def_rec)))
17774 {
17775 return true;
17776 }
17777
17778 return false;
17779 }
17780
17781 /* Function checks if instruction INSN uses register number
17782 REGNO as a part of address expression. */
17783
17784 static bool
17785 insn_uses_reg_mem (unsigned int regno, rtx insn)
17786 {
17787 df_ref *use_rec;
17788
17789 for (use_rec = DF_INSN_USES (insn); *use_rec; use_rec++)
17790 if (DF_REF_REG_MEM_P (*use_rec) && regno == DF_REF_REGNO (*use_rec))
17791 return true;
17792
17793 return false;
17794 }
17795
17796 /* Search backward for non-agu definition of register number REGNO1
17797 or register number REGNO2 in basic block starting from instruction
17798 START up to head of basic block or instruction INSN.
17799
17800 Function puts true value into *FOUND var if definition was found
17801 and false otherwise.
17802
17803 Distance in half-cycles between START and found instruction or head
17804 of BB is added to DISTANCE and returned. */
17805
17806 static int
17807 distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
17808 rtx insn, int distance,
17809 rtx start, bool *found)
17810 {
17811 basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
17812 rtx prev = start;
17813 rtx next = NULL;
17814
17815 *found = false;
17816
17817 while (prev
17818 && prev != insn
17819 && distance < LEA_SEARCH_THRESHOLD)
17820 {
17821 if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
17822 {
17823 distance = increase_distance (prev, next, distance);
17824 if (insn_defines_reg (regno1, regno2, prev))
17825 {
17826 if (recog_memoized (prev) < 0
17827 || get_attr_type (prev) != TYPE_LEA)
17828 {
17829 *found = true;
17830 return distance;
17831 }
17832 }
17833
17834 next = prev;
17835 }
17836 if (prev == BB_HEAD (bb))
17837 break;
17838
17839 prev = PREV_INSN (prev);
17840 }
17841
17842 return distance;
17843 }
17844
17845 /* Search backward for non-agu definition of register number REGNO1
17846 or register number REGNO2 in INSN's basic block until
17847 1. Pass LEA_SEARCH_THRESHOLD instructions, or
17848 2. Reach neighbour BBs boundary, or
17849 3. Reach agu definition.
17850 Returns the distance between the non-agu definition point and INSN.
17851 If no definition point, returns -1. */
17852
17853 static int
17854 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
17855 rtx insn)
17856 {
17857 basic_block bb = BLOCK_FOR_INSN (insn);
17858 int distance = 0;
17859 bool found = false;
17860
17861 if (insn != BB_HEAD (bb))
17862 distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
17863 distance, PREV_INSN (insn),
17864 &found);
17865
17866 if (!found && distance < LEA_SEARCH_THRESHOLD)
17867 {
17868 edge e;
17869 edge_iterator ei;
17870 bool simple_loop = false;
17871
17872 FOR_EACH_EDGE (e, ei, bb->preds)
17873 if (e->src == bb)
17874 {
17875 simple_loop = true;
17876 break;
17877 }
17878
17879 if (simple_loop)
17880 distance = distance_non_agu_define_in_bb (regno1, regno2,
17881 insn, distance,
17882 BB_END (bb), &found);
17883 else
17884 {
17885 int shortest_dist = -1;
17886 bool found_in_bb = false;
17887
17888 FOR_EACH_EDGE (e, ei, bb->preds)
17889 {
17890 int bb_dist
17891 = distance_non_agu_define_in_bb (regno1, regno2,
17892 insn, distance,
17893 BB_END (e->src),
17894 &found_in_bb);
17895 if (found_in_bb)
17896 {
17897 if (shortest_dist < 0)
17898 shortest_dist = bb_dist;
17899 else if (bb_dist > 0)
17900 shortest_dist = MIN (bb_dist, shortest_dist);
17901
17902 found = true;
17903 }
17904 }
17905
17906 distance = shortest_dist;
17907 }
17908 }
17909
17910 /* get_attr_type may modify recog data. We want to make sure
17911 that recog data is valid for instruction INSN, on which
17912 distance_non_agu_define is called. INSN is unchanged here. */
17913 extract_insn_cached (insn);
17914
17915 if (!found)
17916 return -1;
17917
17918 return distance >> 1;
17919 }
17920
17921 /* Return the distance in half-cycles between INSN and the next
17922 insn that uses register number REGNO in memory address added
17923 to DISTANCE. Return -1 if REGNO0 is set.
17924
17925 Put true value into *FOUND if register usage was found and
17926 false otherwise.
17927 Put true value into *REDEFINED if register redefinition was
17928 found and false otherwise. */
17929
17930 static int
17931 distance_agu_use_in_bb (unsigned int regno,
17932 rtx insn, int distance, rtx start,
17933 bool *found, bool *redefined)
17934 {
17935 basic_block bb = NULL;
17936 rtx next = start;
17937 rtx prev = NULL;
17938
17939 *found = false;
17940 *redefined = false;
17941
17942 if (start != NULL_RTX)
17943 {
17944 bb = BLOCK_FOR_INSN (start);
17945 if (start != BB_HEAD (bb))
17946 /* If insn and start belong to the same bb, set prev to insn,
17947 so the call to increase_distance will increase the distance
17948 between insns by 1. */
17949 prev = insn;
17950 }
17951
17952 while (next
17953 && next != insn
17954 && distance < LEA_SEARCH_THRESHOLD)
17955 {
17956 if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
17957 {
17958 distance = increase_distance(prev, next, distance);
17959 if (insn_uses_reg_mem (regno, next))
17960 {
17961 /* Return DISTANCE if OP0 is used in memory
17962 address in NEXT. */
17963 *found = true;
17964 return distance;
17965 }
17966
17967 if (insn_defines_reg (regno, INVALID_REGNUM, next))
17968 {
17969 /* Return -1 if OP0 is set in NEXT. */
17970 *redefined = true;
17971 return -1;
17972 }
17973
17974 prev = next;
17975 }
17976
17977 if (next == BB_END (bb))
17978 break;
17979
17980 next = NEXT_INSN (next);
17981 }
17982
17983 return distance;
17984 }
17985
17986 /* Return the distance between INSN and the next insn that uses
17987 register number REGNO0 in memory address. Return -1 if no such
17988 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
17989
17990 static int
17991 distance_agu_use (unsigned int regno0, rtx insn)
17992 {
17993 basic_block bb = BLOCK_FOR_INSN (insn);
17994 int distance = 0;
17995 bool found = false;
17996 bool redefined = false;
17997
17998 if (insn != BB_END (bb))
17999 distance = distance_agu_use_in_bb (regno0, insn, distance,
18000 NEXT_INSN (insn),
18001 &found, &redefined);
18002
18003 if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18004 {
18005 edge e;
18006 edge_iterator ei;
18007 bool simple_loop = false;
18008
18009 FOR_EACH_EDGE (e, ei, bb->succs)
18010 if (e->dest == bb)
18011 {
18012 simple_loop = true;
18013 break;
18014 }
18015
18016 if (simple_loop)
18017 distance = distance_agu_use_in_bb (regno0, insn,
18018 distance, BB_HEAD (bb),
18019 &found, &redefined);
18020 else
18021 {
18022 int shortest_dist = -1;
18023 bool found_in_bb = false;
18024 bool redefined_in_bb = false;
18025
18026 FOR_EACH_EDGE (e, ei, bb->succs)
18027 {
18028 int bb_dist
18029 = distance_agu_use_in_bb (regno0, insn,
18030 distance, BB_HEAD (e->dest),
18031 &found_in_bb, &redefined_in_bb);
18032 if (found_in_bb)
18033 {
18034 if (shortest_dist < 0)
18035 shortest_dist = bb_dist;
18036 else if (bb_dist > 0)
18037 shortest_dist = MIN (bb_dist, shortest_dist);
18038
18039 found = true;
18040 }
18041 }
18042
18043 distance = shortest_dist;
18044 }
18045 }
18046
18047 if (!found || redefined)
18048 return -1;
18049
18050 return distance >> 1;
18051 }
18052
18053 /* Define this macro to tune LEA priority vs ADD, it take effect when
18054 there is a dilemma of choicing LEA or ADD
18055 Negative value: ADD is more preferred than LEA
18056 Zero: Netrual
18057 Positive value: LEA is more preferred than ADD*/
18058 #define IX86_LEA_PRIORITY 0
18059
18060 /* Return true if usage of lea INSN has performance advantage
18061 over a sequence of instructions. Instructions sequence has
18062 SPLIT_COST cycles higher latency than lea latency. */
18063
18064 static bool
18065 ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
18066 unsigned int regno2, int split_cost, bool has_scale)
18067 {
18068 int dist_define, dist_use;
18069
18070 /* For Silvermont if using a 2-source or 3-source LEA for
18071 non-destructive destination purposes, or due to wanting
18072 ability to use SCALE, the use of LEA is justified. */
18073 if (TARGET_SILVERMONT || TARGET_INTEL)
18074 {
18075 if (has_scale)
18076 return true;
18077 if (split_cost < 1)
18078 return false;
18079 if (regno0 == regno1 || regno0 == regno2)
18080 return false;
18081 return true;
18082 }
18083
18084 dist_define = distance_non_agu_define (regno1, regno2, insn);
18085 dist_use = distance_agu_use (regno0, insn);
18086
18087 if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18088 {
18089 /* If there is no non AGU operand definition, no AGU
18090 operand usage and split cost is 0 then both lea
18091 and non lea variants have same priority. Currently
18092 we prefer lea for 64 bit code and non lea on 32 bit
18093 code. */
18094 if (dist_use < 0 && split_cost == 0)
18095 return TARGET_64BIT || IX86_LEA_PRIORITY;
18096 else
18097 return true;
18098 }
18099
18100 /* With longer definitions distance lea is more preferable.
18101 Here we change it to take into account splitting cost and
18102 lea priority. */
18103 dist_define += split_cost + IX86_LEA_PRIORITY;
18104
18105 /* If there is no use in memory addess then we just check
18106 that split cost exceeds AGU stall. */
18107 if (dist_use < 0)
18108 return dist_define > LEA_MAX_STALL;
18109
18110 /* If this insn has both backward non-agu dependence and forward
18111 agu dependence, the one with short distance takes effect. */
18112 return dist_define >= dist_use;
18113 }
18114
18115 /* Return true if it is legal to clobber flags by INSN and
18116 false otherwise. */
18117
18118 static bool
18119 ix86_ok_to_clobber_flags (rtx insn)
18120 {
18121 basic_block bb = BLOCK_FOR_INSN (insn);
18122 df_ref *use;
18123 bitmap live;
18124
18125 while (insn)
18126 {
18127 if (NONDEBUG_INSN_P (insn))
18128 {
18129 for (use = DF_INSN_USES (insn); *use; use++)
18130 if (DF_REF_REG_USE_P (*use) && DF_REF_REGNO (*use) == FLAGS_REG)
18131 return false;
18132
18133 if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18134 return true;
18135 }
18136
18137 if (insn == BB_END (bb))
18138 break;
18139
18140 insn = NEXT_INSN (insn);
18141 }
18142
18143 live = df_get_live_out(bb);
18144 return !REGNO_REG_SET_P (live, FLAGS_REG);
18145 }
18146
18147 /* Return true if we need to split op0 = op1 + op2 into a sequence of
18148 move and add to avoid AGU stalls. */
18149
18150 bool
18151 ix86_avoid_lea_for_add (rtx insn, rtx operands[])
18152 {
18153 unsigned int regno0, regno1, regno2;
18154
18155 /* Check if we need to optimize. */
18156 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18157 return false;
18158
18159 /* Check it is correct to split here. */
18160 if (!ix86_ok_to_clobber_flags(insn))
18161 return false;
18162
18163 regno0 = true_regnum (operands[0]);
18164 regno1 = true_regnum (operands[1]);
18165 regno2 = true_regnum (operands[2]);
18166
18167 /* We need to split only adds with non destructive
18168 destination operand. */
18169 if (regno0 == regno1 || regno0 == regno2)
18170 return false;
18171 else
18172 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18173 }
18174
18175 /* Return true if we should emit lea instruction instead of mov
18176 instruction. */
18177
18178 bool
18179 ix86_use_lea_for_mov (rtx insn, rtx operands[])
18180 {
18181 unsigned int regno0, regno1;
18182
18183 /* Check if we need to optimize. */
18184 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18185 return false;
18186
18187 /* Use lea for reg to reg moves only. */
18188 if (!REG_P (operands[0]) || !REG_P (operands[1]))
18189 return false;
18190
18191 regno0 = true_regnum (operands[0]);
18192 regno1 = true_regnum (operands[1]);
18193
18194 return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18195 }
18196
18197 /* Return true if we need to split lea into a sequence of
18198 instructions to avoid AGU stalls. */
18199
18200 bool
18201 ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
18202 {
18203 unsigned int regno0, regno1, regno2;
18204 int split_cost;
18205 struct ix86_address parts;
18206 int ok;
18207
18208 /* Check we need to optimize. */
18209 if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18210 return false;
18211
18212 /* The "at least two components" test below might not catch simple
18213 move or zero extension insns if parts.base is non-NULL and parts.disp
18214 is const0_rtx as the only components in the address, e.g. if the
18215 register is %rbp or %r13. As this test is much cheaper and moves or
18216 zero extensions are the common case, do this check first. */
18217 if (REG_P (operands[1])
18218 || (SImode_address_operand (operands[1], VOIDmode)
18219 && REG_P (XEXP (operands[1], 0))))
18220 return false;
18221
18222 /* Check if it is OK to split here. */
18223 if (!ix86_ok_to_clobber_flags (insn))
18224 return false;
18225
18226 ok = ix86_decompose_address (operands[1], &parts);
18227 gcc_assert (ok);
18228
18229 /* There should be at least two components in the address. */
18230 if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18231 + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18232 return false;
18233
18234 /* We should not split into add if non legitimate pic
18235 operand is used as displacement. */
18236 if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18237 return false;
18238
18239 regno0 = true_regnum (operands[0]) ;
18240 regno1 = INVALID_REGNUM;
18241 regno2 = INVALID_REGNUM;
18242
18243 if (parts.base)
18244 regno1 = true_regnum (parts.base);
18245 if (parts.index)
18246 regno2 = true_regnum (parts.index);
18247
18248 split_cost = 0;
18249
18250 /* Compute how many cycles we will add to execution time
18251 if split lea into a sequence of instructions. */
18252 if (parts.base || parts.index)
18253 {
18254 /* Have to use mov instruction if non desctructive
18255 destination form is used. */
18256 if (regno1 != regno0 && regno2 != regno0)
18257 split_cost += 1;
18258
18259 /* Have to add index to base if both exist. */
18260 if (parts.base && parts.index)
18261 split_cost += 1;
18262
18263 /* Have to use shift and adds if scale is 2 or greater. */
18264 if (parts.scale > 1)
18265 {
18266 if (regno0 != regno1)
18267 split_cost += 1;
18268 else if (regno2 == regno0)
18269 split_cost += 4;
18270 else
18271 split_cost += parts.scale;
18272 }
18273
18274 /* Have to use add instruction with immediate if
18275 disp is non zero. */
18276 if (parts.disp && parts.disp != const0_rtx)
18277 split_cost += 1;
18278
18279 /* Subtract the price of lea. */
18280 split_cost -= 1;
18281 }
18282
18283 return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18284 parts.scale > 1);
18285 }
18286
18287 /* Emit x86 binary operand CODE in mode MODE, where the first operand
18288 matches destination. RTX includes clobber of FLAGS_REG. */
18289
18290 static void
18291 ix86_emit_binop (enum rtx_code code, enum machine_mode mode,
18292 rtx dst, rtx src)
18293 {
18294 rtx op, clob;
18295
18296 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
18297 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18298
18299 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18300 }
18301
18302 /* Return true if regno1 def is nearest to the insn. */
18303
18304 static bool
18305 find_nearest_reg_def (rtx insn, int regno1, int regno2)
18306 {
18307 rtx prev = insn;
18308 rtx start = BB_HEAD (BLOCK_FOR_INSN (insn));
18309
18310 if (insn == start)
18311 return false;
18312 while (prev && prev != start)
18313 {
18314 if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
18315 {
18316 prev = PREV_INSN (prev);
18317 continue;
18318 }
18319 if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
18320 return true;
18321 else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
18322 return false;
18323 prev = PREV_INSN (prev);
18324 }
18325
18326 /* None of the regs is defined in the bb. */
18327 return false;
18328 }
18329
18330 /* Split lea instructions into a sequence of instructions
18331 which are executed on ALU to avoid AGU stalls.
18332 It is assumed that it is allowed to clobber flags register
18333 at lea position. */
18334
18335 void
18336 ix86_split_lea_for_addr (rtx insn, rtx operands[], enum machine_mode mode)
18337 {
18338 unsigned int regno0, regno1, regno2;
18339 struct ix86_address parts;
18340 rtx target, tmp;
18341 int ok, adds;
18342
18343 ok = ix86_decompose_address (operands[1], &parts);
18344 gcc_assert (ok);
18345
18346 target = gen_lowpart (mode, operands[0]);
18347
18348 regno0 = true_regnum (target);
18349 regno1 = INVALID_REGNUM;
18350 regno2 = INVALID_REGNUM;
18351
18352 if (parts.base)
18353 {
18354 parts.base = gen_lowpart (mode, parts.base);
18355 regno1 = true_regnum (parts.base);
18356 }
18357
18358 if (parts.index)
18359 {
18360 parts.index = gen_lowpart (mode, parts.index);
18361 regno2 = true_regnum (parts.index);
18362 }
18363
18364 if (parts.disp)
18365 parts.disp = gen_lowpart (mode, parts.disp);
18366
18367 if (parts.scale > 1)
18368 {
18369 /* Case r1 = r1 + ... */
18370 if (regno1 == regno0)
18371 {
18372 /* If we have a case r1 = r1 + C * r2 then we
18373 should use multiplication which is very
18374 expensive. Assume cost model is wrong if we
18375 have such case here. */
18376 gcc_assert (regno2 != regno0);
18377
18378 for (adds = parts.scale; adds > 0; adds--)
18379 ix86_emit_binop (PLUS, mode, target, parts.index);
18380 }
18381 else
18382 {
18383 /* r1 = r2 + r3 * C case. Need to move r3 into r1. */
18384 if (regno0 != regno2)
18385 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18386
18387 /* Use shift for scaling. */
18388 ix86_emit_binop (ASHIFT, mode, target,
18389 GEN_INT (exact_log2 (parts.scale)));
18390
18391 if (parts.base)
18392 ix86_emit_binop (PLUS, mode, target, parts.base);
18393
18394 if (parts.disp && parts.disp != const0_rtx)
18395 ix86_emit_binop (PLUS, mode, target, parts.disp);
18396 }
18397 }
18398 else if (!parts.base && !parts.index)
18399 {
18400 gcc_assert(parts.disp);
18401 emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
18402 }
18403 else
18404 {
18405 if (!parts.base)
18406 {
18407 if (regno0 != regno2)
18408 emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
18409 }
18410 else if (!parts.index)
18411 {
18412 if (regno0 != regno1)
18413 emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
18414 }
18415 else
18416 {
18417 if (regno0 == regno1)
18418 tmp = parts.index;
18419 else if (regno0 == regno2)
18420 tmp = parts.base;
18421 else
18422 {
18423 rtx tmp1;
18424
18425 /* Find better operand for SET instruction, depending
18426 on which definition is farther from the insn. */
18427 if (find_nearest_reg_def (insn, regno1, regno2))
18428 tmp = parts.index, tmp1 = parts.base;
18429 else
18430 tmp = parts.base, tmp1 = parts.index;
18431
18432 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18433
18434 if (parts.disp && parts.disp != const0_rtx)
18435 ix86_emit_binop (PLUS, mode, target, parts.disp);
18436
18437 ix86_emit_binop (PLUS, mode, target, tmp1);
18438 return;
18439 }
18440
18441 ix86_emit_binop (PLUS, mode, target, tmp);
18442 }
18443
18444 if (parts.disp && parts.disp != const0_rtx)
18445 ix86_emit_binop (PLUS, mode, target, parts.disp);
18446 }
18447 }
18448
18449 /* Return true if it is ok to optimize an ADD operation to LEA
18450 operation to avoid flag register consumation. For most processors,
18451 ADD is faster than LEA. For the processors like BONNELL, if the
18452 destination register of LEA holds an actual address which will be
18453 used soon, LEA is better and otherwise ADD is better. */
18454
18455 bool
18456 ix86_lea_for_add_ok (rtx insn, rtx operands[])
18457 {
18458 unsigned int regno0 = true_regnum (operands[0]);
18459 unsigned int regno1 = true_regnum (operands[1]);
18460 unsigned int regno2 = true_regnum (operands[2]);
18461
18462 /* If a = b + c, (a!=b && a!=c), must use lea form. */
18463 if (regno0 != regno1 && regno0 != regno2)
18464 return true;
18465
18466 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18467 return false;
18468
18469 return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
18470 }
18471
18472 /* Return true if destination reg of SET_BODY is shift count of
18473 USE_BODY. */
18474
18475 static bool
18476 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
18477 {
18478 rtx set_dest;
18479 rtx shift_rtx;
18480 int i;
18481
18482 /* Retrieve destination of SET_BODY. */
18483 switch (GET_CODE (set_body))
18484 {
18485 case SET:
18486 set_dest = SET_DEST (set_body);
18487 if (!set_dest || !REG_P (set_dest))
18488 return false;
18489 break;
18490 case PARALLEL:
18491 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
18492 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
18493 use_body))
18494 return true;
18495 default:
18496 return false;
18497 break;
18498 }
18499
18500 /* Retrieve shift count of USE_BODY. */
18501 switch (GET_CODE (use_body))
18502 {
18503 case SET:
18504 shift_rtx = XEXP (use_body, 1);
18505 break;
18506 case PARALLEL:
18507 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
18508 if (ix86_dep_by_shift_count_body (set_body,
18509 XVECEXP (use_body, 0, i)))
18510 return true;
18511 default:
18512 return false;
18513 break;
18514 }
18515
18516 if (shift_rtx
18517 && (GET_CODE (shift_rtx) == ASHIFT
18518 || GET_CODE (shift_rtx) == LSHIFTRT
18519 || GET_CODE (shift_rtx) == ASHIFTRT
18520 || GET_CODE (shift_rtx) == ROTATE
18521 || GET_CODE (shift_rtx) == ROTATERT))
18522 {
18523 rtx shift_count = XEXP (shift_rtx, 1);
18524
18525 /* Return true if shift count is dest of SET_BODY. */
18526 if (REG_P (shift_count))
18527 {
18528 /* Add check since it can be invoked before register
18529 allocation in pre-reload schedule. */
18530 if (reload_completed
18531 && true_regnum (set_dest) == true_regnum (shift_count))
18532 return true;
18533 else if (REGNO(set_dest) == REGNO(shift_count))
18534 return true;
18535 }
18536 }
18537
18538 return false;
18539 }
18540
18541 /* Return true if destination reg of SET_INSN is shift count of
18542 USE_INSN. */
18543
18544 bool
18545 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
18546 {
18547 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
18548 PATTERN (use_insn));
18549 }
18550
18551 /* Return TRUE or FALSE depending on whether the unary operator meets the
18552 appropriate constraints. */
18553
18554 bool
18555 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
18556 enum machine_mode mode ATTRIBUTE_UNUSED,
18557 rtx operands[2])
18558 {
18559 /* If one of operands is memory, source and destination must match. */
18560 if ((MEM_P (operands[0])
18561 || MEM_P (operands[1]))
18562 && ! rtx_equal_p (operands[0], operands[1]))
18563 return false;
18564 return true;
18565 }
18566
18567 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
18568 are ok, keeping in mind the possible movddup alternative. */
18569
18570 bool
18571 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
18572 {
18573 if (MEM_P (operands[0]))
18574 return rtx_equal_p (operands[0], operands[1 + high]);
18575 if (MEM_P (operands[1]) && MEM_P (operands[2]))
18576 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
18577 return true;
18578 }
18579
18580 /* Post-reload splitter for converting an SF or DFmode value in an
18581 SSE register into an unsigned SImode. */
18582
18583 void
18584 ix86_split_convert_uns_si_sse (rtx operands[])
18585 {
18586 enum machine_mode vecmode;
18587 rtx value, large, zero_or_two31, input, two31, x;
18588
18589 large = operands[1];
18590 zero_or_two31 = operands[2];
18591 input = operands[3];
18592 two31 = operands[4];
18593 vecmode = GET_MODE (large);
18594 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
18595
18596 /* Load up the value into the low element. We must ensure that the other
18597 elements are valid floats -- zero is the easiest such value. */
18598 if (MEM_P (input))
18599 {
18600 if (vecmode == V4SFmode)
18601 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
18602 else
18603 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
18604 }
18605 else
18606 {
18607 input = gen_rtx_REG (vecmode, REGNO (input));
18608 emit_move_insn (value, CONST0_RTX (vecmode));
18609 if (vecmode == V4SFmode)
18610 emit_insn (gen_sse_movss (value, value, input));
18611 else
18612 emit_insn (gen_sse2_movsd (value, value, input));
18613 }
18614
18615 emit_move_insn (large, two31);
18616 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
18617
18618 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
18619 emit_insn (gen_rtx_SET (VOIDmode, large, x));
18620
18621 x = gen_rtx_AND (vecmode, zero_or_two31, large);
18622 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
18623
18624 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
18625 emit_insn (gen_rtx_SET (VOIDmode, value, x));
18626
18627 large = gen_rtx_REG (V4SImode, REGNO (large));
18628 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
18629
18630 x = gen_rtx_REG (V4SImode, REGNO (value));
18631 if (vecmode == V4SFmode)
18632 emit_insn (gen_fix_truncv4sfv4si2 (x, value));
18633 else
18634 emit_insn (gen_sse2_cvttpd2dq (x, value));
18635 value = x;
18636
18637 emit_insn (gen_xorv4si3 (value, value, large));
18638 }
18639
18640 /* Convert an unsigned DImode value into a DFmode, using only SSE.
18641 Expects the 64-bit DImode to be supplied in a pair of integral
18642 registers. Requires SSE2; will use SSE3 if available. For x86_32,
18643 -mfpmath=sse, !optimize_size only. */
18644
18645 void
18646 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
18647 {
18648 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
18649 rtx int_xmm, fp_xmm;
18650 rtx biases, exponents;
18651 rtx x;
18652
18653 int_xmm = gen_reg_rtx (V4SImode);
18654 if (TARGET_INTER_UNIT_MOVES_TO_VEC)
18655 emit_insn (gen_movdi_to_sse (int_xmm, input));
18656 else if (TARGET_SSE_SPLIT_REGS)
18657 {
18658 emit_clobber (int_xmm);
18659 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
18660 }
18661 else
18662 {
18663 x = gen_reg_rtx (V2DImode);
18664 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
18665 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
18666 }
18667
18668 x = gen_rtx_CONST_VECTOR (V4SImode,
18669 gen_rtvec (4, GEN_INT (0x43300000UL),
18670 GEN_INT (0x45300000UL),
18671 const0_rtx, const0_rtx));
18672 exponents = validize_mem (force_const_mem (V4SImode, x));
18673
18674 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
18675 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
18676
18677 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
18678 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
18679 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
18680 (0x1.0p84 + double(fp_value_hi_xmm)).
18681 Note these exponents differ by 32. */
18682
18683 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
18684
18685 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
18686 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
18687 real_ldexp (&bias_lo_rvt, &dconst1, 52);
18688 real_ldexp (&bias_hi_rvt, &dconst1, 84);
18689 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
18690 x = const_double_from_real_value (bias_hi_rvt, DFmode);
18691 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
18692 biases = validize_mem (force_const_mem (V2DFmode, biases));
18693 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
18694
18695 /* Add the upper and lower DFmode values together. */
18696 if (TARGET_SSE3)
18697 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
18698 else
18699 {
18700 x = copy_to_mode_reg (V2DFmode, fp_xmm);
18701 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
18702 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
18703 }
18704
18705 ix86_expand_vector_extract (false, target, fp_xmm, 0);
18706 }
18707
18708 /* Not used, but eases macroization of patterns. */
18709 void
18710 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
18711 rtx input ATTRIBUTE_UNUSED)
18712 {
18713 gcc_unreachable ();
18714 }
18715
18716 /* Convert an unsigned SImode value into a DFmode. Only currently used
18717 for SSE, but applicable anywhere. */
18718
18719 void
18720 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
18721 {
18722 REAL_VALUE_TYPE TWO31r;
18723 rtx x, fp;
18724
18725 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
18726 NULL, 1, OPTAB_DIRECT);
18727
18728 fp = gen_reg_rtx (DFmode);
18729 emit_insn (gen_floatsidf2 (fp, x));
18730
18731 real_ldexp (&TWO31r, &dconst1, 31);
18732 x = const_double_from_real_value (TWO31r, DFmode);
18733
18734 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
18735 if (x != target)
18736 emit_move_insn (target, x);
18737 }
18738
18739 /* Convert a signed DImode value into a DFmode. Only used for SSE in
18740 32-bit mode; otherwise we have a direct convert instruction. */
18741
18742 void
18743 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
18744 {
18745 REAL_VALUE_TYPE TWO32r;
18746 rtx fp_lo, fp_hi, x;
18747
18748 fp_lo = gen_reg_rtx (DFmode);
18749 fp_hi = gen_reg_rtx (DFmode);
18750
18751 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
18752
18753 real_ldexp (&TWO32r, &dconst1, 32);
18754 x = const_double_from_real_value (TWO32r, DFmode);
18755 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
18756
18757 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
18758
18759 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
18760 0, OPTAB_DIRECT);
18761 if (x != target)
18762 emit_move_insn (target, x);
18763 }
18764
18765 /* Convert an unsigned SImode value into a SFmode, using only SSE.
18766 For x86_32, -mfpmath=sse, !optimize_size only. */
18767 void
18768 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
18769 {
18770 REAL_VALUE_TYPE ONE16r;
18771 rtx fp_hi, fp_lo, int_hi, int_lo, x;
18772
18773 real_ldexp (&ONE16r, &dconst1, 16);
18774 x = const_double_from_real_value (ONE16r, SFmode);
18775 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
18776 NULL, 0, OPTAB_DIRECT);
18777 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
18778 NULL, 0, OPTAB_DIRECT);
18779 fp_hi = gen_reg_rtx (SFmode);
18780 fp_lo = gen_reg_rtx (SFmode);
18781 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
18782 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
18783 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
18784 0, OPTAB_DIRECT);
18785 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
18786 0, OPTAB_DIRECT);
18787 if (!rtx_equal_p (target, fp_hi))
18788 emit_move_insn (target, fp_hi);
18789 }
18790
18791 /* floatunsv{4,8}siv{4,8}sf2 expander. Expand code to convert
18792 a vector of unsigned ints VAL to vector of floats TARGET. */
18793
18794 void
18795 ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
18796 {
18797 rtx tmp[8];
18798 REAL_VALUE_TYPE TWO16r;
18799 enum machine_mode intmode = GET_MODE (val);
18800 enum machine_mode fltmode = GET_MODE (target);
18801 rtx (*cvt) (rtx, rtx);
18802
18803 if (intmode == V4SImode)
18804 cvt = gen_floatv4siv4sf2;
18805 else
18806 cvt = gen_floatv8siv8sf2;
18807 tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
18808 tmp[0] = force_reg (intmode, tmp[0]);
18809 tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
18810 OPTAB_DIRECT);
18811 tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
18812 NULL_RTX, 1, OPTAB_DIRECT);
18813 tmp[3] = gen_reg_rtx (fltmode);
18814 emit_insn (cvt (tmp[3], tmp[1]));
18815 tmp[4] = gen_reg_rtx (fltmode);
18816 emit_insn (cvt (tmp[4], tmp[2]));
18817 real_ldexp (&TWO16r, &dconst1, 16);
18818 tmp[5] = const_double_from_real_value (TWO16r, SFmode);
18819 tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
18820 tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
18821 OPTAB_DIRECT);
18822 tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
18823 OPTAB_DIRECT);
18824 if (tmp[7] != target)
18825 emit_move_insn (target, tmp[7]);
18826 }
18827
18828 /* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
18829 pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
18830 This is done by doing just signed conversion if < 0x1p31, and otherwise by
18831 subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards. */
18832
18833 rtx
18834 ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
18835 {
18836 REAL_VALUE_TYPE TWO31r;
18837 rtx two31r, tmp[4];
18838 enum machine_mode mode = GET_MODE (val);
18839 enum machine_mode scalarmode = GET_MODE_INNER (mode);
18840 enum machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
18841 rtx (*cmp) (rtx, rtx, rtx, rtx);
18842 int i;
18843
18844 for (i = 0; i < 3; i++)
18845 tmp[i] = gen_reg_rtx (mode);
18846 real_ldexp (&TWO31r, &dconst1, 31);
18847 two31r = const_double_from_real_value (TWO31r, scalarmode);
18848 two31r = ix86_build_const_vector (mode, 1, two31r);
18849 two31r = force_reg (mode, two31r);
18850 switch (mode)
18851 {
18852 case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
18853 case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
18854 case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
18855 case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
18856 default: gcc_unreachable ();
18857 }
18858 tmp[3] = gen_rtx_LE (mode, two31r, val);
18859 emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
18860 tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
18861 0, OPTAB_DIRECT);
18862 if (intmode == V4SImode || TARGET_AVX2)
18863 *xorp = expand_simple_binop (intmode, ASHIFT,
18864 gen_lowpart (intmode, tmp[0]),
18865 GEN_INT (31), NULL_RTX, 0,
18866 OPTAB_DIRECT);
18867 else
18868 {
18869 rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
18870 two31 = ix86_build_const_vector (intmode, 1, two31);
18871 *xorp = expand_simple_binop (intmode, AND,
18872 gen_lowpart (intmode, tmp[0]),
18873 two31, NULL_RTX, 0,
18874 OPTAB_DIRECT);
18875 }
18876 return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
18877 0, OPTAB_DIRECT);
18878 }
18879
18880 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
18881 then replicate the value for all elements of the vector
18882 register. */
18883
18884 rtx
18885 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
18886 {
18887 int i, n_elt;
18888 rtvec v;
18889 enum machine_mode scalar_mode;
18890
18891 switch (mode)
18892 {
18893 case V64QImode:
18894 case V32QImode:
18895 case V16QImode:
18896 case V32HImode:
18897 case V16HImode:
18898 case V8HImode:
18899 case V16SImode:
18900 case V8SImode:
18901 case V4SImode:
18902 case V8DImode:
18903 case V4DImode:
18904 case V2DImode:
18905 gcc_assert (vect);
18906 case V16SFmode:
18907 case V8SFmode:
18908 case V4SFmode:
18909 case V8DFmode:
18910 case V4DFmode:
18911 case V2DFmode:
18912 n_elt = GET_MODE_NUNITS (mode);
18913 v = rtvec_alloc (n_elt);
18914 scalar_mode = GET_MODE_INNER (mode);
18915
18916 RTVEC_ELT (v, 0) = value;
18917
18918 for (i = 1; i < n_elt; ++i)
18919 RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
18920
18921 return gen_rtx_CONST_VECTOR (mode, v);
18922
18923 default:
18924 gcc_unreachable ();
18925 }
18926 }
18927
18928 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
18929 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
18930 for an SSE register. If VECT is true, then replicate the mask for
18931 all elements of the vector register. If INVERT is true, then create
18932 a mask excluding the sign bit. */
18933
18934 rtx
18935 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
18936 {
18937 enum machine_mode vec_mode, imode;
18938 HOST_WIDE_INT hi, lo;
18939 int shift = 63;
18940 rtx v;
18941 rtx mask;
18942
18943 /* Find the sign bit, sign extended to 2*HWI. */
18944 switch (mode)
18945 {
18946 case V16SImode:
18947 case V16SFmode:
18948 case V8SImode:
18949 case V4SImode:
18950 case V8SFmode:
18951 case V4SFmode:
18952 vec_mode = mode;
18953 mode = GET_MODE_INNER (mode);
18954 imode = SImode;
18955 lo = 0x80000000, hi = lo < 0;
18956 break;
18957
18958 case V8DImode:
18959 case V4DImode:
18960 case V2DImode:
18961 case V8DFmode:
18962 case V4DFmode:
18963 case V2DFmode:
18964 vec_mode = mode;
18965 mode = GET_MODE_INNER (mode);
18966 imode = DImode;
18967 if (HOST_BITS_PER_WIDE_INT >= 64)
18968 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
18969 else
18970 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18971 break;
18972
18973 case TImode:
18974 case TFmode:
18975 vec_mode = VOIDmode;
18976 if (HOST_BITS_PER_WIDE_INT >= 64)
18977 {
18978 imode = TImode;
18979 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
18980 }
18981 else
18982 {
18983 rtvec vec;
18984
18985 imode = DImode;
18986 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
18987
18988 if (invert)
18989 {
18990 lo = ~lo, hi = ~hi;
18991 v = constm1_rtx;
18992 }
18993 else
18994 v = const0_rtx;
18995
18996 mask = immed_double_const (lo, hi, imode);
18997
18998 vec = gen_rtvec (2, v, mask);
18999 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19000 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19001
19002 return v;
19003 }
19004 break;
19005
19006 default:
19007 gcc_unreachable ();
19008 }
19009
19010 if (invert)
19011 lo = ~lo, hi = ~hi;
19012
19013 /* Force this value into the low part of a fp vector constant. */
19014 mask = immed_double_const (lo, hi, imode);
19015 mask = gen_lowpart (mode, mask);
19016
19017 if (vec_mode == VOIDmode)
19018 return force_reg (mode, mask);
19019
19020 v = ix86_build_const_vector (vec_mode, vect, mask);
19021 return force_reg (vec_mode, v);
19022 }
19023
19024 /* Generate code for floating point ABS or NEG. */
19025
19026 void
19027 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
19028 rtx operands[])
19029 {
19030 rtx mask, set, dst, src;
19031 bool use_sse = false;
19032 bool vector_mode = VECTOR_MODE_P (mode);
19033 enum machine_mode vmode = mode;
19034
19035 if (vector_mode)
19036 use_sse = true;
19037 else if (mode == TFmode)
19038 use_sse = true;
19039 else if (TARGET_SSE_MATH)
19040 {
19041 use_sse = SSE_FLOAT_MODE_P (mode);
19042 if (mode == SFmode)
19043 vmode = V4SFmode;
19044 else if (mode == DFmode)
19045 vmode = V2DFmode;
19046 }
19047
19048 /* NEG and ABS performed with SSE use bitwise mask operations.
19049 Create the appropriate mask now. */
19050 if (use_sse)
19051 mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19052 else
19053 mask = NULL_RTX;
19054
19055 dst = operands[0];
19056 src = operands[1];
19057
19058 set = gen_rtx_fmt_e (code, mode, src);
19059 set = gen_rtx_SET (VOIDmode, dst, set);
19060
19061 if (mask)
19062 {
19063 rtx use, clob;
19064 rtvec par;
19065
19066 use = gen_rtx_USE (VOIDmode, mask);
19067 if (vector_mode)
19068 par = gen_rtvec (2, set, use);
19069 else
19070 {
19071 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19072 par = gen_rtvec (3, set, use, clob);
19073 }
19074 emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19075 }
19076 else
19077 emit_insn (set);
19078 }
19079
19080 /* Expand a copysign operation. Special case operand 0 being a constant. */
19081
19082 void
19083 ix86_expand_copysign (rtx operands[])
19084 {
19085 enum machine_mode mode, vmode;
19086 rtx dest, op0, op1, mask, nmask;
19087
19088 dest = operands[0];
19089 op0 = operands[1];
19090 op1 = operands[2];
19091
19092 mode = GET_MODE (dest);
19093
19094 if (mode == SFmode)
19095 vmode = V4SFmode;
19096 else if (mode == DFmode)
19097 vmode = V2DFmode;
19098 else
19099 vmode = mode;
19100
19101 if (GET_CODE (op0) == CONST_DOUBLE)
19102 {
19103 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19104
19105 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19106 op0 = simplify_unary_operation (ABS, mode, op0, mode);
19107
19108 if (mode == SFmode || mode == DFmode)
19109 {
19110 if (op0 == CONST0_RTX (mode))
19111 op0 = CONST0_RTX (vmode);
19112 else
19113 {
19114 rtx v = ix86_build_const_vector (vmode, false, op0);
19115
19116 op0 = force_reg (vmode, v);
19117 }
19118 }
19119 else if (op0 != CONST0_RTX (mode))
19120 op0 = force_reg (mode, op0);
19121
19122 mask = ix86_build_signbit_mask (vmode, 0, 0);
19123
19124 if (mode == SFmode)
19125 copysign_insn = gen_copysignsf3_const;
19126 else if (mode == DFmode)
19127 copysign_insn = gen_copysigndf3_const;
19128 else
19129 copysign_insn = gen_copysigntf3_const;
19130
19131 emit_insn (copysign_insn (dest, op0, op1, mask));
19132 }
19133 else
19134 {
19135 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19136
19137 nmask = ix86_build_signbit_mask (vmode, 0, 1);
19138 mask = ix86_build_signbit_mask (vmode, 0, 0);
19139
19140 if (mode == SFmode)
19141 copysign_insn = gen_copysignsf3_var;
19142 else if (mode == DFmode)
19143 copysign_insn = gen_copysigndf3_var;
19144 else
19145 copysign_insn = gen_copysigntf3_var;
19146
19147 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19148 }
19149 }
19150
19151 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
19152 be a constant, and so has already been expanded into a vector constant. */
19153
19154 void
19155 ix86_split_copysign_const (rtx operands[])
19156 {
19157 enum machine_mode mode, vmode;
19158 rtx dest, op0, mask, x;
19159
19160 dest = operands[0];
19161 op0 = operands[1];
19162 mask = operands[3];
19163
19164 mode = GET_MODE (dest);
19165 vmode = GET_MODE (mask);
19166
19167 dest = simplify_gen_subreg (vmode, dest, mode, 0);
19168 x = gen_rtx_AND (vmode, dest, mask);
19169 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19170
19171 if (op0 != CONST0_RTX (vmode))
19172 {
19173 x = gen_rtx_IOR (vmode, dest, op0);
19174 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19175 }
19176 }
19177
19178 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
19179 so we have to do two masks. */
19180
19181 void
19182 ix86_split_copysign_var (rtx operands[])
19183 {
19184 enum machine_mode mode, vmode;
19185 rtx dest, scratch, op0, op1, mask, nmask, x;
19186
19187 dest = operands[0];
19188 scratch = operands[1];
19189 op0 = operands[2];
19190 op1 = operands[3];
19191 nmask = operands[4];
19192 mask = operands[5];
19193
19194 mode = GET_MODE (dest);
19195 vmode = GET_MODE (mask);
19196
19197 if (rtx_equal_p (op0, op1))
19198 {
19199 /* Shouldn't happen often (it's useless, obviously), but when it does
19200 we'd generate incorrect code if we continue below. */
19201 emit_move_insn (dest, op0);
19202 return;
19203 }
19204
19205 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
19206 {
19207 gcc_assert (REGNO (op1) == REGNO (scratch));
19208
19209 x = gen_rtx_AND (vmode, scratch, mask);
19210 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19211
19212 dest = mask;
19213 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19214 x = gen_rtx_NOT (vmode, dest);
19215 x = gen_rtx_AND (vmode, x, op0);
19216 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19217 }
19218 else
19219 {
19220 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
19221 {
19222 x = gen_rtx_AND (vmode, scratch, mask);
19223 }
19224 else /* alternative 2,4 */
19225 {
19226 gcc_assert (REGNO (mask) == REGNO (scratch));
19227 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19228 x = gen_rtx_AND (vmode, scratch, op1);
19229 }
19230 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19231
19232 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
19233 {
19234 dest = simplify_gen_subreg (vmode, op0, mode, 0);
19235 x = gen_rtx_AND (vmode, dest, nmask);
19236 }
19237 else /* alternative 3,4 */
19238 {
19239 gcc_assert (REGNO (nmask) == REGNO (dest));
19240 dest = nmask;
19241 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19242 x = gen_rtx_AND (vmode, dest, op0);
19243 }
19244 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19245 }
19246
19247 x = gen_rtx_IOR (vmode, dest, scratch);
19248 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19249 }
19250
19251 /* Return TRUE or FALSE depending on whether the first SET in INSN
19252 has source and destination with matching CC modes, and that the
19253 CC mode is at least as constrained as REQ_MODE. */
19254
19255 bool
19256 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
19257 {
19258 rtx set;
19259 enum machine_mode set_mode;
19260
19261 set = PATTERN (insn);
19262 if (GET_CODE (set) == PARALLEL)
19263 set = XVECEXP (set, 0, 0);
19264 gcc_assert (GET_CODE (set) == SET);
19265 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19266
19267 set_mode = GET_MODE (SET_DEST (set));
19268 switch (set_mode)
19269 {
19270 case CCNOmode:
19271 if (req_mode != CCNOmode
19272 && (req_mode != CCmode
19273 || XEXP (SET_SRC (set), 1) != const0_rtx))
19274 return false;
19275 break;
19276 case CCmode:
19277 if (req_mode == CCGCmode)
19278 return false;
19279 /* FALLTHRU */
19280 case CCGCmode:
19281 if (req_mode == CCGOCmode || req_mode == CCNOmode)
19282 return false;
19283 /* FALLTHRU */
19284 case CCGOCmode:
19285 if (req_mode == CCZmode)
19286 return false;
19287 /* FALLTHRU */
19288 case CCZmode:
19289 break;
19290
19291 case CCAmode:
19292 case CCCmode:
19293 case CCOmode:
19294 case CCSmode:
19295 if (set_mode != req_mode)
19296 return false;
19297 break;
19298
19299 default:
19300 gcc_unreachable ();
19301 }
19302
19303 return GET_MODE (SET_SRC (set)) == set_mode;
19304 }
19305
19306 /* Generate insn patterns to do an integer compare of OPERANDS. */
19307
19308 static rtx
19309 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
19310 {
19311 enum machine_mode cmpmode;
19312 rtx tmp, flags;
19313
19314 cmpmode = SELECT_CC_MODE (code, op0, op1);
19315 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
19316
19317 /* This is very simple, but making the interface the same as in the
19318 FP case makes the rest of the code easier. */
19319 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
19320 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
19321
19322 /* Return the test that should be put into the flags user, i.e.
19323 the bcc, scc, or cmov instruction. */
19324 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
19325 }
19326
19327 /* Figure out whether to use ordered or unordered fp comparisons.
19328 Return the appropriate mode to use. */
19329
19330 enum machine_mode
19331 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
19332 {
19333 /* ??? In order to make all comparisons reversible, we do all comparisons
19334 non-trapping when compiling for IEEE. Once gcc is able to distinguish
19335 all forms trapping and nontrapping comparisons, we can make inequality
19336 comparisons trapping again, since it results in better code when using
19337 FCOM based compares. */
19338 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
19339 }
19340
19341 enum machine_mode
19342 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
19343 {
19344 enum machine_mode mode = GET_MODE (op0);
19345
19346 if (SCALAR_FLOAT_MODE_P (mode))
19347 {
19348 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
19349 return ix86_fp_compare_mode (code);
19350 }
19351
19352 switch (code)
19353 {
19354 /* Only zero flag is needed. */
19355 case EQ: /* ZF=0 */
19356 case NE: /* ZF!=0 */
19357 return CCZmode;
19358 /* Codes needing carry flag. */
19359 case GEU: /* CF=0 */
19360 case LTU: /* CF=1 */
19361 /* Detect overflow checks. They need just the carry flag. */
19362 if (GET_CODE (op0) == PLUS
19363 && rtx_equal_p (op1, XEXP (op0, 0)))
19364 return CCCmode;
19365 else
19366 return CCmode;
19367 case GTU: /* CF=0 & ZF=0 */
19368 case LEU: /* CF=1 | ZF=1 */
19369 return CCmode;
19370 /* Codes possibly doable only with sign flag when
19371 comparing against zero. */
19372 case GE: /* SF=OF or SF=0 */
19373 case LT: /* SF<>OF or SF=1 */
19374 if (op1 == const0_rtx)
19375 return CCGOCmode;
19376 else
19377 /* For other cases Carry flag is not required. */
19378 return CCGCmode;
19379 /* Codes doable only with sign flag when comparing
19380 against zero, but we miss jump instruction for it
19381 so we need to use relational tests against overflow
19382 that thus needs to be zero. */
19383 case GT: /* ZF=0 & SF=OF */
19384 case LE: /* ZF=1 | SF<>OF */
19385 if (op1 == const0_rtx)
19386 return CCNOmode;
19387 else
19388 return CCGCmode;
19389 /* strcmp pattern do (use flags) and combine may ask us for proper
19390 mode. */
19391 case USE:
19392 return CCmode;
19393 default:
19394 gcc_unreachable ();
19395 }
19396 }
19397
19398 /* Return the fixed registers used for condition codes. */
19399
19400 static bool
19401 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
19402 {
19403 *p1 = FLAGS_REG;
19404 *p2 = FPSR_REG;
19405 return true;
19406 }
19407
19408 /* If two condition code modes are compatible, return a condition code
19409 mode which is compatible with both. Otherwise, return
19410 VOIDmode. */
19411
19412 static enum machine_mode
19413 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
19414 {
19415 if (m1 == m2)
19416 return m1;
19417
19418 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
19419 return VOIDmode;
19420
19421 if ((m1 == CCGCmode && m2 == CCGOCmode)
19422 || (m1 == CCGOCmode && m2 == CCGCmode))
19423 return CCGCmode;
19424
19425 if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
19426 return m2;
19427 else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
19428 return m1;
19429
19430 switch (m1)
19431 {
19432 default:
19433 gcc_unreachable ();
19434
19435 case CCmode:
19436 case CCGCmode:
19437 case CCGOCmode:
19438 case CCNOmode:
19439 case CCAmode:
19440 case CCCmode:
19441 case CCOmode:
19442 case CCSmode:
19443 case CCZmode:
19444 switch (m2)
19445 {
19446 default:
19447 return VOIDmode;
19448
19449 case CCmode:
19450 case CCGCmode:
19451 case CCGOCmode:
19452 case CCNOmode:
19453 case CCAmode:
19454 case CCCmode:
19455 case CCOmode:
19456 case CCSmode:
19457 case CCZmode:
19458 return CCmode;
19459 }
19460
19461 case CCFPmode:
19462 case CCFPUmode:
19463 /* These are only compatible with themselves, which we already
19464 checked above. */
19465 return VOIDmode;
19466 }
19467 }
19468
19469
19470 /* Return a comparison we can do and that it is equivalent to
19471 swap_condition (code) apart possibly from orderedness.
19472 But, never change orderedness if TARGET_IEEE_FP, returning
19473 UNKNOWN in that case if necessary. */
19474
19475 static enum rtx_code
19476 ix86_fp_swap_condition (enum rtx_code code)
19477 {
19478 switch (code)
19479 {
19480 case GT: /* GTU - CF=0 & ZF=0 */
19481 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
19482 case GE: /* GEU - CF=0 */
19483 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
19484 case UNLT: /* LTU - CF=1 */
19485 return TARGET_IEEE_FP ? UNKNOWN : GT;
19486 case UNLE: /* LEU - CF=1 | ZF=1 */
19487 return TARGET_IEEE_FP ? UNKNOWN : GE;
19488 default:
19489 return swap_condition (code);
19490 }
19491 }
19492
19493 /* Return cost of comparison CODE using the best strategy for performance.
19494 All following functions do use number of instructions as a cost metrics.
19495 In future this should be tweaked to compute bytes for optimize_size and
19496 take into account performance of various instructions on various CPUs. */
19497
19498 static int
19499 ix86_fp_comparison_cost (enum rtx_code code)
19500 {
19501 int arith_cost;
19502
19503 /* The cost of code using bit-twiddling on %ah. */
19504 switch (code)
19505 {
19506 case UNLE:
19507 case UNLT:
19508 case LTGT:
19509 case GT:
19510 case GE:
19511 case UNORDERED:
19512 case ORDERED:
19513 case UNEQ:
19514 arith_cost = 4;
19515 break;
19516 case LT:
19517 case NE:
19518 case EQ:
19519 case UNGE:
19520 arith_cost = TARGET_IEEE_FP ? 5 : 4;
19521 break;
19522 case LE:
19523 case UNGT:
19524 arith_cost = TARGET_IEEE_FP ? 6 : 4;
19525 break;
19526 default:
19527 gcc_unreachable ();
19528 }
19529
19530 switch (ix86_fp_comparison_strategy (code))
19531 {
19532 case IX86_FPCMP_COMI:
19533 return arith_cost > 4 ? 3 : 2;
19534 case IX86_FPCMP_SAHF:
19535 return arith_cost > 4 ? 4 : 3;
19536 default:
19537 return arith_cost;
19538 }
19539 }
19540
19541 /* Return strategy to use for floating-point. We assume that fcomi is always
19542 preferrable where available, since that is also true when looking at size
19543 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
19544
19545 enum ix86_fpcmp_strategy
19546 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
19547 {
19548 /* Do fcomi/sahf based test when profitable. */
19549
19550 if (TARGET_CMOVE)
19551 return IX86_FPCMP_COMI;
19552
19553 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
19554 return IX86_FPCMP_SAHF;
19555
19556 return IX86_FPCMP_ARITH;
19557 }
19558
19559 /* Swap, force into registers, or otherwise massage the two operands
19560 to a fp comparison. The operands are updated in place; the new
19561 comparison code is returned. */
19562
19563 static enum rtx_code
19564 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
19565 {
19566 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
19567 rtx op0 = *pop0, op1 = *pop1;
19568 enum machine_mode op_mode = GET_MODE (op0);
19569 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
19570
19571 /* All of the unordered compare instructions only work on registers.
19572 The same is true of the fcomi compare instructions. The XFmode
19573 compare instructions require registers except when comparing
19574 against zero or when converting operand 1 from fixed point to
19575 floating point. */
19576
19577 if (!is_sse
19578 && (fpcmp_mode == CCFPUmode
19579 || (op_mode == XFmode
19580 && ! (standard_80387_constant_p (op0) == 1
19581 || standard_80387_constant_p (op1) == 1)
19582 && GET_CODE (op1) != FLOAT)
19583 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
19584 {
19585 op0 = force_reg (op_mode, op0);
19586 op1 = force_reg (op_mode, op1);
19587 }
19588 else
19589 {
19590 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
19591 things around if they appear profitable, otherwise force op0
19592 into a register. */
19593
19594 if (standard_80387_constant_p (op0) == 0
19595 || (MEM_P (op0)
19596 && ! (standard_80387_constant_p (op1) == 0
19597 || MEM_P (op1))))
19598 {
19599 enum rtx_code new_code = ix86_fp_swap_condition (code);
19600 if (new_code != UNKNOWN)
19601 {
19602 rtx tmp;
19603 tmp = op0, op0 = op1, op1 = tmp;
19604 code = new_code;
19605 }
19606 }
19607
19608 if (!REG_P (op0))
19609 op0 = force_reg (op_mode, op0);
19610
19611 if (CONSTANT_P (op1))
19612 {
19613 int tmp = standard_80387_constant_p (op1);
19614 if (tmp == 0)
19615 op1 = validize_mem (force_const_mem (op_mode, op1));
19616 else if (tmp == 1)
19617 {
19618 if (TARGET_CMOVE)
19619 op1 = force_reg (op_mode, op1);
19620 }
19621 else
19622 op1 = force_reg (op_mode, op1);
19623 }
19624 }
19625
19626 /* Try to rearrange the comparison to make it cheaper. */
19627 if (ix86_fp_comparison_cost (code)
19628 > ix86_fp_comparison_cost (swap_condition (code))
19629 && (REG_P (op1) || can_create_pseudo_p ()))
19630 {
19631 rtx tmp;
19632 tmp = op0, op0 = op1, op1 = tmp;
19633 code = swap_condition (code);
19634 if (!REG_P (op0))
19635 op0 = force_reg (op_mode, op0);
19636 }
19637
19638 *pop0 = op0;
19639 *pop1 = op1;
19640 return code;
19641 }
19642
19643 /* Convert comparison codes we use to represent FP comparison to integer
19644 code that will result in proper branch. Return UNKNOWN if no such code
19645 is available. */
19646
19647 enum rtx_code
19648 ix86_fp_compare_code_to_integer (enum rtx_code code)
19649 {
19650 switch (code)
19651 {
19652 case GT:
19653 return GTU;
19654 case GE:
19655 return GEU;
19656 case ORDERED:
19657 case UNORDERED:
19658 return code;
19659 break;
19660 case UNEQ:
19661 return EQ;
19662 break;
19663 case UNLT:
19664 return LTU;
19665 break;
19666 case UNLE:
19667 return LEU;
19668 break;
19669 case LTGT:
19670 return NE;
19671 break;
19672 default:
19673 return UNKNOWN;
19674 }
19675 }
19676
19677 /* Generate insn patterns to do a floating point compare of OPERANDS. */
19678
19679 static rtx
19680 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
19681 {
19682 enum machine_mode fpcmp_mode, intcmp_mode;
19683 rtx tmp, tmp2;
19684
19685 fpcmp_mode = ix86_fp_compare_mode (code);
19686 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
19687
19688 /* Do fcomi/sahf based test when profitable. */
19689 switch (ix86_fp_comparison_strategy (code))
19690 {
19691 case IX86_FPCMP_COMI:
19692 intcmp_mode = fpcmp_mode;
19693 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19694 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19695 tmp);
19696 emit_insn (tmp);
19697 break;
19698
19699 case IX86_FPCMP_SAHF:
19700 intcmp_mode = fpcmp_mode;
19701 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19702 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
19703 tmp);
19704
19705 if (!scratch)
19706 scratch = gen_reg_rtx (HImode);
19707 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
19708 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
19709 break;
19710
19711 case IX86_FPCMP_ARITH:
19712 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
19713 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
19714 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
19715 if (!scratch)
19716 scratch = gen_reg_rtx (HImode);
19717 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
19718
19719 /* In the unordered case, we have to check C2 for NaN's, which
19720 doesn't happen to work out to anything nice combination-wise.
19721 So do some bit twiddling on the value we've got in AH to come
19722 up with an appropriate set of condition codes. */
19723
19724 intcmp_mode = CCNOmode;
19725 switch (code)
19726 {
19727 case GT:
19728 case UNGT:
19729 if (code == GT || !TARGET_IEEE_FP)
19730 {
19731 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19732 code = EQ;
19733 }
19734 else
19735 {
19736 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19737 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19738 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
19739 intcmp_mode = CCmode;
19740 code = GEU;
19741 }
19742 break;
19743 case LT:
19744 case UNLT:
19745 if (code == LT && TARGET_IEEE_FP)
19746 {
19747 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19748 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
19749 intcmp_mode = CCmode;
19750 code = EQ;
19751 }
19752 else
19753 {
19754 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
19755 code = NE;
19756 }
19757 break;
19758 case GE:
19759 case UNGE:
19760 if (code == GE || !TARGET_IEEE_FP)
19761 {
19762 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
19763 code = EQ;
19764 }
19765 else
19766 {
19767 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19768 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
19769 code = NE;
19770 }
19771 break;
19772 case LE:
19773 case UNLE:
19774 if (code == LE && TARGET_IEEE_FP)
19775 {
19776 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19777 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
19778 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19779 intcmp_mode = CCmode;
19780 code = LTU;
19781 }
19782 else
19783 {
19784 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
19785 code = NE;
19786 }
19787 break;
19788 case EQ:
19789 case UNEQ:
19790 if (code == EQ && TARGET_IEEE_FP)
19791 {
19792 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19793 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
19794 intcmp_mode = CCmode;
19795 code = EQ;
19796 }
19797 else
19798 {
19799 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19800 code = NE;
19801 }
19802 break;
19803 case NE:
19804 case LTGT:
19805 if (code == NE && TARGET_IEEE_FP)
19806 {
19807 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
19808 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
19809 GEN_INT (0x40)));
19810 code = NE;
19811 }
19812 else
19813 {
19814 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
19815 code = EQ;
19816 }
19817 break;
19818
19819 case UNORDERED:
19820 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19821 code = NE;
19822 break;
19823 case ORDERED:
19824 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
19825 code = EQ;
19826 break;
19827
19828 default:
19829 gcc_unreachable ();
19830 }
19831 break;
19832
19833 default:
19834 gcc_unreachable();
19835 }
19836
19837 /* Return the test that should be put into the flags user, i.e.
19838 the bcc, scc, or cmov instruction. */
19839 return gen_rtx_fmt_ee (code, VOIDmode,
19840 gen_rtx_REG (intcmp_mode, FLAGS_REG),
19841 const0_rtx);
19842 }
19843
19844 static rtx
19845 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
19846 {
19847 rtx ret;
19848
19849 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
19850 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
19851
19852 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
19853 {
19854 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
19855 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
19856 }
19857 else
19858 ret = ix86_expand_int_compare (code, op0, op1);
19859
19860 return ret;
19861 }
19862
19863 void
19864 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
19865 {
19866 enum machine_mode mode = GET_MODE (op0);
19867 rtx tmp;
19868
19869 switch (mode)
19870 {
19871 case SFmode:
19872 case DFmode:
19873 case XFmode:
19874 case QImode:
19875 case HImode:
19876 case SImode:
19877 simple:
19878 tmp = ix86_expand_compare (code, op0, op1);
19879 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19880 gen_rtx_LABEL_REF (VOIDmode, label),
19881 pc_rtx);
19882 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19883 return;
19884
19885 case DImode:
19886 if (TARGET_64BIT)
19887 goto simple;
19888 case TImode:
19889 /* Expand DImode branch into multiple compare+branch. */
19890 {
19891 rtx lo[2], hi[2], label2;
19892 enum rtx_code code1, code2, code3;
19893 enum machine_mode submode;
19894
19895 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
19896 {
19897 tmp = op0, op0 = op1, op1 = tmp;
19898 code = swap_condition (code);
19899 }
19900
19901 split_double_mode (mode, &op0, 1, lo+0, hi+0);
19902 split_double_mode (mode, &op1, 1, lo+1, hi+1);
19903
19904 submode = mode == DImode ? SImode : DImode;
19905
19906 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
19907 avoid two branches. This costs one extra insn, so disable when
19908 optimizing for size. */
19909
19910 if ((code == EQ || code == NE)
19911 && (!optimize_insn_for_size_p ()
19912 || hi[1] == const0_rtx || lo[1] == const0_rtx))
19913 {
19914 rtx xor0, xor1;
19915
19916 xor1 = hi[0];
19917 if (hi[1] != const0_rtx)
19918 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
19919 NULL_RTX, 0, OPTAB_WIDEN);
19920
19921 xor0 = lo[0];
19922 if (lo[1] != const0_rtx)
19923 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
19924 NULL_RTX, 0, OPTAB_WIDEN);
19925
19926 tmp = expand_binop (submode, ior_optab, xor1, xor0,
19927 NULL_RTX, 0, OPTAB_WIDEN);
19928
19929 ix86_expand_branch (code, tmp, const0_rtx, label);
19930 return;
19931 }
19932
19933 /* Otherwise, if we are doing less-than or greater-or-equal-than,
19934 op1 is a constant and the low word is zero, then we can just
19935 examine the high word. Similarly for low word -1 and
19936 less-or-equal-than or greater-than. */
19937
19938 if (CONST_INT_P (hi[1]))
19939 switch (code)
19940 {
19941 case LT: case LTU: case GE: case GEU:
19942 if (lo[1] == const0_rtx)
19943 {
19944 ix86_expand_branch (code, hi[0], hi[1], label);
19945 return;
19946 }
19947 break;
19948 case LE: case LEU: case GT: case GTU:
19949 if (lo[1] == constm1_rtx)
19950 {
19951 ix86_expand_branch (code, hi[0], hi[1], label);
19952 return;
19953 }
19954 break;
19955 default:
19956 break;
19957 }
19958
19959 /* Otherwise, we need two or three jumps. */
19960
19961 label2 = gen_label_rtx ();
19962
19963 code1 = code;
19964 code2 = swap_condition (code);
19965 code3 = unsigned_condition (code);
19966
19967 switch (code)
19968 {
19969 case LT: case GT: case LTU: case GTU:
19970 break;
19971
19972 case LE: code1 = LT; code2 = GT; break;
19973 case GE: code1 = GT; code2 = LT; break;
19974 case LEU: code1 = LTU; code2 = GTU; break;
19975 case GEU: code1 = GTU; code2 = LTU; break;
19976
19977 case EQ: code1 = UNKNOWN; code2 = NE; break;
19978 case NE: code2 = UNKNOWN; break;
19979
19980 default:
19981 gcc_unreachable ();
19982 }
19983
19984 /*
19985 * a < b =>
19986 * if (hi(a) < hi(b)) goto true;
19987 * if (hi(a) > hi(b)) goto false;
19988 * if (lo(a) < lo(b)) goto true;
19989 * false:
19990 */
19991
19992 if (code1 != UNKNOWN)
19993 ix86_expand_branch (code1, hi[0], hi[1], label);
19994 if (code2 != UNKNOWN)
19995 ix86_expand_branch (code2, hi[0], hi[1], label2);
19996
19997 ix86_expand_branch (code3, lo[0], lo[1], label);
19998
19999 if (code2 != UNKNOWN)
20000 emit_label (label2);
20001 return;
20002 }
20003
20004 default:
20005 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20006 goto simple;
20007 }
20008 }
20009
20010 /* Split branch based on floating point condition. */
20011 void
20012 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20013 rtx target1, rtx target2, rtx tmp)
20014 {
20015 rtx condition;
20016 rtx i;
20017
20018 if (target2 != pc_rtx)
20019 {
20020 rtx tmp = target2;
20021 code = reverse_condition_maybe_unordered (code);
20022 target2 = target1;
20023 target1 = tmp;
20024 }
20025
20026 condition = ix86_expand_fp_compare (code, op1, op2,
20027 tmp);
20028
20029 i = emit_jump_insn (gen_rtx_SET
20030 (VOIDmode, pc_rtx,
20031 gen_rtx_IF_THEN_ELSE (VOIDmode,
20032 condition, target1, target2)));
20033 if (split_branch_probability >= 0)
20034 add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20035 }
20036
20037 void
20038 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20039 {
20040 rtx ret;
20041
20042 gcc_assert (GET_MODE (dest) == QImode);
20043
20044 ret = ix86_expand_compare (code, op0, op1);
20045 PUT_MODE (ret, QImode);
20046 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20047 }
20048
20049 /* Expand comparison setting or clearing carry flag. Return true when
20050 successful and set pop for the operation. */
20051 static bool
20052 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20053 {
20054 enum machine_mode mode =
20055 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20056
20057 /* Do not handle double-mode compares that go through special path. */
20058 if (mode == (TARGET_64BIT ? TImode : DImode))
20059 return false;
20060
20061 if (SCALAR_FLOAT_MODE_P (mode))
20062 {
20063 rtx compare_op, compare_seq;
20064
20065 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20066
20067 /* Shortcut: following common codes never translate
20068 into carry flag compares. */
20069 if (code == EQ || code == NE || code == UNEQ || code == LTGT
20070 || code == ORDERED || code == UNORDERED)
20071 return false;
20072
20073 /* These comparisons require zero flag; swap operands so they won't. */
20074 if ((code == GT || code == UNLE || code == LE || code == UNGT)
20075 && !TARGET_IEEE_FP)
20076 {
20077 rtx tmp = op0;
20078 op0 = op1;
20079 op1 = tmp;
20080 code = swap_condition (code);
20081 }
20082
20083 /* Try to expand the comparison and verify that we end up with
20084 carry flag based comparison. This fails to be true only when
20085 we decide to expand comparison using arithmetic that is not
20086 too common scenario. */
20087 start_sequence ();
20088 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20089 compare_seq = get_insns ();
20090 end_sequence ();
20091
20092 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20093 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20094 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20095 else
20096 code = GET_CODE (compare_op);
20097
20098 if (code != LTU && code != GEU)
20099 return false;
20100
20101 emit_insn (compare_seq);
20102 *pop = compare_op;
20103 return true;
20104 }
20105
20106 if (!INTEGRAL_MODE_P (mode))
20107 return false;
20108
20109 switch (code)
20110 {
20111 case LTU:
20112 case GEU:
20113 break;
20114
20115 /* Convert a==0 into (unsigned)a<1. */
20116 case EQ:
20117 case NE:
20118 if (op1 != const0_rtx)
20119 return false;
20120 op1 = const1_rtx;
20121 code = (code == EQ ? LTU : GEU);
20122 break;
20123
20124 /* Convert a>b into b<a or a>=b-1. */
20125 case GTU:
20126 case LEU:
20127 if (CONST_INT_P (op1))
20128 {
20129 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20130 /* Bail out on overflow. We still can swap operands but that
20131 would force loading of the constant into register. */
20132 if (op1 == const0_rtx
20133 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20134 return false;
20135 code = (code == GTU ? GEU : LTU);
20136 }
20137 else
20138 {
20139 rtx tmp = op1;
20140 op1 = op0;
20141 op0 = tmp;
20142 code = (code == GTU ? LTU : GEU);
20143 }
20144 break;
20145
20146 /* Convert a>=0 into (unsigned)a<0x80000000. */
20147 case LT:
20148 case GE:
20149 if (mode == DImode || op1 != const0_rtx)
20150 return false;
20151 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20152 code = (code == LT ? GEU : LTU);
20153 break;
20154 case LE:
20155 case GT:
20156 if (mode == DImode || op1 != constm1_rtx)
20157 return false;
20158 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20159 code = (code == LE ? GEU : LTU);
20160 break;
20161
20162 default:
20163 return false;
20164 }
20165 /* Swapping operands may cause constant to appear as first operand. */
20166 if (!nonimmediate_operand (op0, VOIDmode))
20167 {
20168 if (!can_create_pseudo_p ())
20169 return false;
20170 op0 = force_reg (mode, op0);
20171 }
20172 *pop = ix86_expand_compare (code, op0, op1);
20173 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20174 return true;
20175 }
20176
20177 bool
20178 ix86_expand_int_movcc (rtx operands[])
20179 {
20180 enum rtx_code code = GET_CODE (operands[1]), compare_code;
20181 rtx compare_seq, compare_op;
20182 enum machine_mode mode = GET_MODE (operands[0]);
20183 bool sign_bit_compare_p = false;
20184 rtx op0 = XEXP (operands[1], 0);
20185 rtx op1 = XEXP (operands[1], 1);
20186
20187 if (GET_MODE (op0) == TImode
20188 || (GET_MODE (op0) == DImode
20189 && !TARGET_64BIT))
20190 return false;
20191
20192 start_sequence ();
20193 compare_op = ix86_expand_compare (code, op0, op1);
20194 compare_seq = get_insns ();
20195 end_sequence ();
20196
20197 compare_code = GET_CODE (compare_op);
20198
20199 if ((op1 == const0_rtx && (code == GE || code == LT))
20200 || (op1 == constm1_rtx && (code == GT || code == LE)))
20201 sign_bit_compare_p = true;
20202
20203 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20204 HImode insns, we'd be swallowed in word prefix ops. */
20205
20206 if ((mode != HImode || TARGET_FAST_PREFIX)
20207 && (mode != (TARGET_64BIT ? TImode : DImode))
20208 && CONST_INT_P (operands[2])
20209 && CONST_INT_P (operands[3]))
20210 {
20211 rtx out = operands[0];
20212 HOST_WIDE_INT ct = INTVAL (operands[2]);
20213 HOST_WIDE_INT cf = INTVAL (operands[3]);
20214 HOST_WIDE_INT diff;
20215
20216 diff = ct - cf;
20217 /* Sign bit compares are better done using shifts than we do by using
20218 sbb. */
20219 if (sign_bit_compare_p
20220 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20221 {
20222 /* Detect overlap between destination and compare sources. */
20223 rtx tmp = out;
20224
20225 if (!sign_bit_compare_p)
20226 {
20227 rtx flags;
20228 bool fpcmp = false;
20229
20230 compare_code = GET_CODE (compare_op);
20231
20232 flags = XEXP (compare_op, 0);
20233
20234 if (GET_MODE (flags) == CCFPmode
20235 || GET_MODE (flags) == CCFPUmode)
20236 {
20237 fpcmp = true;
20238 compare_code
20239 = ix86_fp_compare_code_to_integer (compare_code);
20240 }
20241
20242 /* To simplify rest of code, restrict to the GEU case. */
20243 if (compare_code == LTU)
20244 {
20245 HOST_WIDE_INT tmp = ct;
20246 ct = cf;
20247 cf = tmp;
20248 compare_code = reverse_condition (compare_code);
20249 code = reverse_condition (code);
20250 }
20251 else
20252 {
20253 if (fpcmp)
20254 PUT_CODE (compare_op,
20255 reverse_condition_maybe_unordered
20256 (GET_CODE (compare_op)));
20257 else
20258 PUT_CODE (compare_op,
20259 reverse_condition (GET_CODE (compare_op)));
20260 }
20261 diff = ct - cf;
20262
20263 if (reg_overlap_mentioned_p (out, op0)
20264 || reg_overlap_mentioned_p (out, op1))
20265 tmp = gen_reg_rtx (mode);
20266
20267 if (mode == DImode)
20268 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20269 else
20270 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
20271 flags, compare_op));
20272 }
20273 else
20274 {
20275 if (code == GT || code == GE)
20276 code = reverse_condition (code);
20277 else
20278 {
20279 HOST_WIDE_INT tmp = ct;
20280 ct = cf;
20281 cf = tmp;
20282 diff = ct - cf;
20283 }
20284 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20285 }
20286
20287 if (diff == 1)
20288 {
20289 /*
20290 * cmpl op0,op1
20291 * sbbl dest,dest
20292 * [addl dest, ct]
20293 *
20294 * Size 5 - 8.
20295 */
20296 if (ct)
20297 tmp = expand_simple_binop (mode, PLUS,
20298 tmp, GEN_INT (ct),
20299 copy_rtx (tmp), 1, OPTAB_DIRECT);
20300 }
20301 else if (cf == -1)
20302 {
20303 /*
20304 * cmpl op0,op1
20305 * sbbl dest,dest
20306 * orl $ct, dest
20307 *
20308 * Size 8.
20309 */
20310 tmp = expand_simple_binop (mode, IOR,
20311 tmp, GEN_INT (ct),
20312 copy_rtx (tmp), 1, OPTAB_DIRECT);
20313 }
20314 else if (diff == -1 && ct)
20315 {
20316 /*
20317 * cmpl op0,op1
20318 * sbbl dest,dest
20319 * notl dest
20320 * [addl dest, cf]
20321 *
20322 * Size 8 - 11.
20323 */
20324 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20325 if (cf)
20326 tmp = expand_simple_binop (mode, PLUS,
20327 copy_rtx (tmp), GEN_INT (cf),
20328 copy_rtx (tmp), 1, OPTAB_DIRECT);
20329 }
20330 else
20331 {
20332 /*
20333 * cmpl op0,op1
20334 * sbbl dest,dest
20335 * [notl dest]
20336 * andl cf - ct, dest
20337 * [addl dest, ct]
20338 *
20339 * Size 8 - 11.
20340 */
20341
20342 if (cf == 0)
20343 {
20344 cf = ct;
20345 ct = 0;
20346 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
20347 }
20348
20349 tmp = expand_simple_binop (mode, AND,
20350 copy_rtx (tmp),
20351 gen_int_mode (cf - ct, mode),
20352 copy_rtx (tmp), 1, OPTAB_DIRECT);
20353 if (ct)
20354 tmp = expand_simple_binop (mode, PLUS,
20355 copy_rtx (tmp), GEN_INT (ct),
20356 copy_rtx (tmp), 1, OPTAB_DIRECT);
20357 }
20358
20359 if (!rtx_equal_p (tmp, out))
20360 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
20361
20362 return true;
20363 }
20364
20365 if (diff < 0)
20366 {
20367 enum machine_mode cmp_mode = GET_MODE (op0);
20368
20369 HOST_WIDE_INT tmp;
20370 tmp = ct, ct = cf, cf = tmp;
20371 diff = -diff;
20372
20373 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20374 {
20375 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20376
20377 /* We may be reversing unordered compare to normal compare, that
20378 is not valid in general (we may convert non-trapping condition
20379 to trapping one), however on i386 we currently emit all
20380 comparisons unordered. */
20381 compare_code = reverse_condition_maybe_unordered (compare_code);
20382 code = reverse_condition_maybe_unordered (code);
20383 }
20384 else
20385 {
20386 compare_code = reverse_condition (compare_code);
20387 code = reverse_condition (code);
20388 }
20389 }
20390
20391 compare_code = UNKNOWN;
20392 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
20393 && CONST_INT_P (op1))
20394 {
20395 if (op1 == const0_rtx
20396 && (code == LT || code == GE))
20397 compare_code = code;
20398 else if (op1 == constm1_rtx)
20399 {
20400 if (code == LE)
20401 compare_code = LT;
20402 else if (code == GT)
20403 compare_code = GE;
20404 }
20405 }
20406
20407 /* Optimize dest = (op0 < 0) ? -1 : cf. */
20408 if (compare_code != UNKNOWN
20409 && GET_MODE (op0) == GET_MODE (out)
20410 && (cf == -1 || ct == -1))
20411 {
20412 /* If lea code below could be used, only optimize
20413 if it results in a 2 insn sequence. */
20414
20415 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
20416 || diff == 3 || diff == 5 || diff == 9)
20417 || (compare_code == LT && ct == -1)
20418 || (compare_code == GE && cf == -1))
20419 {
20420 /*
20421 * notl op1 (if necessary)
20422 * sarl $31, op1
20423 * orl cf, op1
20424 */
20425 if (ct != -1)
20426 {
20427 cf = ct;
20428 ct = -1;
20429 code = reverse_condition (code);
20430 }
20431
20432 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20433
20434 out = expand_simple_binop (mode, IOR,
20435 out, GEN_INT (cf),
20436 out, 1, OPTAB_DIRECT);
20437 if (out != operands[0])
20438 emit_move_insn (operands[0], out);
20439
20440 return true;
20441 }
20442 }
20443
20444
20445 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
20446 || diff == 3 || diff == 5 || diff == 9)
20447 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
20448 && (mode != DImode
20449 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
20450 {
20451 /*
20452 * xorl dest,dest
20453 * cmpl op1,op2
20454 * setcc dest
20455 * lea cf(dest*(ct-cf)),dest
20456 *
20457 * Size 14.
20458 *
20459 * This also catches the degenerate setcc-only case.
20460 */
20461
20462 rtx tmp;
20463 int nops;
20464
20465 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20466
20467 nops = 0;
20468 /* On x86_64 the lea instruction operates on Pmode, so we need
20469 to get arithmetics done in proper mode to match. */
20470 if (diff == 1)
20471 tmp = copy_rtx (out);
20472 else
20473 {
20474 rtx out1;
20475 out1 = copy_rtx (out);
20476 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
20477 nops++;
20478 if (diff & 1)
20479 {
20480 tmp = gen_rtx_PLUS (mode, tmp, out1);
20481 nops++;
20482 }
20483 }
20484 if (cf != 0)
20485 {
20486 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
20487 nops++;
20488 }
20489 if (!rtx_equal_p (tmp, out))
20490 {
20491 if (nops == 1)
20492 out = force_operand (tmp, copy_rtx (out));
20493 else
20494 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
20495 }
20496 if (!rtx_equal_p (out, operands[0]))
20497 emit_move_insn (operands[0], copy_rtx (out));
20498
20499 return true;
20500 }
20501
20502 /*
20503 * General case: Jumpful:
20504 * xorl dest,dest cmpl op1, op2
20505 * cmpl op1, op2 movl ct, dest
20506 * setcc dest jcc 1f
20507 * decl dest movl cf, dest
20508 * andl (cf-ct),dest 1:
20509 * addl ct,dest
20510 *
20511 * Size 20. Size 14.
20512 *
20513 * This is reasonably steep, but branch mispredict costs are
20514 * high on modern cpus, so consider failing only if optimizing
20515 * for space.
20516 */
20517
20518 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20519 && BRANCH_COST (optimize_insn_for_speed_p (),
20520 false) >= 2)
20521 {
20522 if (cf == 0)
20523 {
20524 enum machine_mode cmp_mode = GET_MODE (op0);
20525
20526 cf = ct;
20527 ct = 0;
20528
20529 if (SCALAR_FLOAT_MODE_P (cmp_mode))
20530 {
20531 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
20532
20533 /* We may be reversing unordered compare to normal compare,
20534 that is not valid in general (we may convert non-trapping
20535 condition to trapping one), however on i386 we currently
20536 emit all comparisons unordered. */
20537 code = reverse_condition_maybe_unordered (code);
20538 }
20539 else
20540 {
20541 code = reverse_condition (code);
20542 if (compare_code != UNKNOWN)
20543 compare_code = reverse_condition (compare_code);
20544 }
20545 }
20546
20547 if (compare_code != UNKNOWN)
20548 {
20549 /* notl op1 (if needed)
20550 sarl $31, op1
20551 andl (cf-ct), op1
20552 addl ct, op1
20553
20554 For x < 0 (resp. x <= -1) there will be no notl,
20555 so if possible swap the constants to get rid of the
20556 complement.
20557 True/false will be -1/0 while code below (store flag
20558 followed by decrement) is 0/-1, so the constants need
20559 to be exchanged once more. */
20560
20561 if (compare_code == GE || !cf)
20562 {
20563 code = reverse_condition (code);
20564 compare_code = LT;
20565 }
20566 else
20567 {
20568 HOST_WIDE_INT tmp = cf;
20569 cf = ct;
20570 ct = tmp;
20571 }
20572
20573 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
20574 }
20575 else
20576 {
20577 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
20578
20579 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
20580 constm1_rtx,
20581 copy_rtx (out), 1, OPTAB_DIRECT);
20582 }
20583
20584 out = expand_simple_binop (mode, AND, copy_rtx (out),
20585 gen_int_mode (cf - ct, mode),
20586 copy_rtx (out), 1, OPTAB_DIRECT);
20587 if (ct)
20588 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
20589 copy_rtx (out), 1, OPTAB_DIRECT);
20590 if (!rtx_equal_p (out, operands[0]))
20591 emit_move_insn (operands[0], copy_rtx (out));
20592
20593 return true;
20594 }
20595 }
20596
20597 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
20598 {
20599 /* Try a few things more with specific constants and a variable. */
20600
20601 optab op;
20602 rtx var, orig_out, out, tmp;
20603
20604 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
20605 return false;
20606
20607 /* If one of the two operands is an interesting constant, load a
20608 constant with the above and mask it in with a logical operation. */
20609
20610 if (CONST_INT_P (operands[2]))
20611 {
20612 var = operands[3];
20613 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
20614 operands[3] = constm1_rtx, op = and_optab;
20615 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
20616 operands[3] = const0_rtx, op = ior_optab;
20617 else
20618 return false;
20619 }
20620 else if (CONST_INT_P (operands[3]))
20621 {
20622 var = operands[2];
20623 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
20624 operands[2] = constm1_rtx, op = and_optab;
20625 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
20626 operands[2] = const0_rtx, op = ior_optab;
20627 else
20628 return false;
20629 }
20630 else
20631 return false;
20632
20633 orig_out = operands[0];
20634 tmp = gen_reg_rtx (mode);
20635 operands[0] = tmp;
20636
20637 /* Recurse to get the constant loaded. */
20638 if (ix86_expand_int_movcc (operands) == 0)
20639 return false;
20640
20641 /* Mask in the interesting variable. */
20642 out = expand_binop (mode, op, var, tmp, orig_out, 0,
20643 OPTAB_WIDEN);
20644 if (!rtx_equal_p (out, orig_out))
20645 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
20646
20647 return true;
20648 }
20649
20650 /*
20651 * For comparison with above,
20652 *
20653 * movl cf,dest
20654 * movl ct,tmp
20655 * cmpl op1,op2
20656 * cmovcc tmp,dest
20657 *
20658 * Size 15.
20659 */
20660
20661 if (! nonimmediate_operand (operands[2], mode))
20662 operands[2] = force_reg (mode, operands[2]);
20663 if (! nonimmediate_operand (operands[3], mode))
20664 operands[3] = force_reg (mode, operands[3]);
20665
20666 if (! register_operand (operands[2], VOIDmode)
20667 && (mode == QImode
20668 || ! register_operand (operands[3], VOIDmode)))
20669 operands[2] = force_reg (mode, operands[2]);
20670
20671 if (mode == QImode
20672 && ! register_operand (operands[3], VOIDmode))
20673 operands[3] = force_reg (mode, operands[3]);
20674
20675 emit_insn (compare_seq);
20676 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
20677 gen_rtx_IF_THEN_ELSE (mode,
20678 compare_op, operands[2],
20679 operands[3])));
20680 return true;
20681 }
20682
20683 /* Swap, force into registers, or otherwise massage the two operands
20684 to an sse comparison with a mask result. Thus we differ a bit from
20685 ix86_prepare_fp_compare_args which expects to produce a flags result.
20686
20687 The DEST operand exists to help determine whether to commute commutative
20688 operators. The POP0/POP1 operands are updated in place. The new
20689 comparison code is returned, or UNKNOWN if not implementable. */
20690
20691 static enum rtx_code
20692 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
20693 rtx *pop0, rtx *pop1)
20694 {
20695 rtx tmp;
20696
20697 switch (code)
20698 {
20699 case LTGT:
20700 case UNEQ:
20701 /* AVX supports all the needed comparisons. */
20702 if (TARGET_AVX)
20703 break;
20704 /* We have no LTGT as an operator. We could implement it with
20705 NE & ORDERED, but this requires an extra temporary. It's
20706 not clear that it's worth it. */
20707 return UNKNOWN;
20708
20709 case LT:
20710 case LE:
20711 case UNGT:
20712 case UNGE:
20713 /* These are supported directly. */
20714 break;
20715
20716 case EQ:
20717 case NE:
20718 case UNORDERED:
20719 case ORDERED:
20720 /* AVX has 3 operand comparisons, no need to swap anything. */
20721 if (TARGET_AVX)
20722 break;
20723 /* For commutative operators, try to canonicalize the destination
20724 operand to be first in the comparison - this helps reload to
20725 avoid extra moves. */
20726 if (!dest || !rtx_equal_p (dest, *pop1))
20727 break;
20728 /* FALLTHRU */
20729
20730 case GE:
20731 case GT:
20732 case UNLE:
20733 case UNLT:
20734 /* These are not supported directly before AVX, and furthermore
20735 ix86_expand_sse_fp_minmax only optimizes LT/UNGE. Swap the
20736 comparison operands to transform into something that is
20737 supported. */
20738 tmp = *pop0;
20739 *pop0 = *pop1;
20740 *pop1 = tmp;
20741 code = swap_condition (code);
20742 break;
20743
20744 default:
20745 gcc_unreachable ();
20746 }
20747
20748 return code;
20749 }
20750
20751 /* Detect conditional moves that exactly match min/max operational
20752 semantics. Note that this is IEEE safe, as long as we don't
20753 interchange the operands.
20754
20755 Returns FALSE if this conditional move doesn't match a MIN/MAX,
20756 and TRUE if the operation is successful and instructions are emitted. */
20757
20758 static bool
20759 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
20760 rtx cmp_op1, rtx if_true, rtx if_false)
20761 {
20762 enum machine_mode mode;
20763 bool is_min;
20764 rtx tmp;
20765
20766 if (code == LT)
20767 ;
20768 else if (code == UNGE)
20769 {
20770 tmp = if_true;
20771 if_true = if_false;
20772 if_false = tmp;
20773 }
20774 else
20775 return false;
20776
20777 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
20778 is_min = true;
20779 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
20780 is_min = false;
20781 else
20782 return false;
20783
20784 mode = GET_MODE (dest);
20785
20786 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
20787 but MODE may be a vector mode and thus not appropriate. */
20788 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
20789 {
20790 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
20791 rtvec v;
20792
20793 if_true = force_reg (mode, if_true);
20794 v = gen_rtvec (2, if_true, if_false);
20795 tmp = gen_rtx_UNSPEC (mode, v, u);
20796 }
20797 else
20798 {
20799 code = is_min ? SMIN : SMAX;
20800 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
20801 }
20802
20803 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
20804 return true;
20805 }
20806
20807 /* Expand an sse vector comparison. Return the register with the result. */
20808
20809 static rtx
20810 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
20811 rtx op_true, rtx op_false)
20812 {
20813 enum machine_mode mode = GET_MODE (dest);
20814 enum machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
20815
20816 /* In general case result of comparison can differ from operands' type. */
20817 enum machine_mode cmp_mode;
20818
20819 /* In AVX512F the result of comparison is an integer mask. */
20820 bool maskcmp = false;
20821 rtx x;
20822
20823 if (GET_MODE_SIZE (cmp_ops_mode) == 64)
20824 {
20825 cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
20826 gcc_assert (cmp_mode != BLKmode);
20827
20828 maskcmp = true;
20829 }
20830 else
20831 cmp_mode = cmp_ops_mode;
20832
20833
20834 cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
20835 if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
20836 cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
20837
20838 if (optimize
20839 || reg_overlap_mentioned_p (dest, op_true)
20840 || reg_overlap_mentioned_p (dest, op_false))
20841 dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
20842
20843 /* Compare patterns for int modes are unspec in AVX512F only. */
20844 if (maskcmp && (code == GT || code == EQ))
20845 {
20846 rtx (*gen)(rtx, rtx, rtx);
20847
20848 switch (cmp_ops_mode)
20849 {
20850 case V16SImode:
20851 gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
20852 break;
20853 case V8DImode:
20854 gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
20855 break;
20856 default:
20857 gen = NULL;
20858 }
20859
20860 if (gen)
20861 {
20862 emit_insn (gen (dest, cmp_op0, cmp_op1));
20863 return dest;
20864 }
20865 }
20866 x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
20867
20868 if (cmp_mode != mode && !maskcmp)
20869 {
20870 x = force_reg (cmp_ops_mode, x);
20871 convert_move (dest, x, false);
20872 }
20873 else
20874 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20875
20876 return dest;
20877 }
20878
20879 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
20880 operations. This is used for both scalar and vector conditional moves. */
20881
20882 static void
20883 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
20884 {
20885 enum machine_mode mode = GET_MODE (dest);
20886 enum machine_mode cmpmode = GET_MODE (cmp);
20887
20888 /* In AVX512F the result of comparison is an integer mask. */
20889 bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
20890
20891 rtx t2, t3, x;
20892
20893 if (vector_all_ones_operand (op_true, mode)
20894 && rtx_equal_p (op_false, CONST0_RTX (mode))
20895 && !maskcmp)
20896 {
20897 emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
20898 }
20899 else if (op_false == CONST0_RTX (mode)
20900 && !maskcmp)
20901 {
20902 op_true = force_reg (mode, op_true);
20903 x = gen_rtx_AND (mode, cmp, op_true);
20904 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20905 }
20906 else if (op_true == CONST0_RTX (mode)
20907 && !maskcmp)
20908 {
20909 op_false = force_reg (mode, op_false);
20910 x = gen_rtx_NOT (mode, cmp);
20911 x = gen_rtx_AND (mode, x, op_false);
20912 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20913 }
20914 else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
20915 && !maskcmp)
20916 {
20917 op_false = force_reg (mode, op_false);
20918 x = gen_rtx_IOR (mode, cmp, op_false);
20919 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
20920 }
20921 else if (TARGET_XOP
20922 && !maskcmp)
20923 {
20924 op_true = force_reg (mode, op_true);
20925
20926 if (!nonimmediate_operand (op_false, mode))
20927 op_false = force_reg (mode, op_false);
20928
20929 emit_insn (gen_rtx_SET (mode, dest,
20930 gen_rtx_IF_THEN_ELSE (mode, cmp,
20931 op_true,
20932 op_false)));
20933 }
20934 else
20935 {
20936 rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
20937 rtx d = dest;
20938
20939 if (!nonimmediate_operand (op_true, mode))
20940 op_true = force_reg (mode, op_true);
20941
20942 op_false = force_reg (mode, op_false);
20943
20944 switch (mode)
20945 {
20946 case V4SFmode:
20947 if (TARGET_SSE4_1)
20948 gen = gen_sse4_1_blendvps;
20949 break;
20950 case V2DFmode:
20951 if (TARGET_SSE4_1)
20952 gen = gen_sse4_1_blendvpd;
20953 break;
20954 case V16QImode:
20955 case V8HImode:
20956 case V4SImode:
20957 case V2DImode:
20958 if (TARGET_SSE4_1)
20959 {
20960 gen = gen_sse4_1_pblendvb;
20961 if (mode != V16QImode)
20962 d = gen_reg_rtx (V16QImode);
20963 op_false = gen_lowpart (V16QImode, op_false);
20964 op_true = gen_lowpart (V16QImode, op_true);
20965 cmp = gen_lowpart (V16QImode, cmp);
20966 }
20967 break;
20968 case V8SFmode:
20969 if (TARGET_AVX)
20970 gen = gen_avx_blendvps256;
20971 break;
20972 case V4DFmode:
20973 if (TARGET_AVX)
20974 gen = gen_avx_blendvpd256;
20975 break;
20976 case V32QImode:
20977 case V16HImode:
20978 case V8SImode:
20979 case V4DImode:
20980 if (TARGET_AVX2)
20981 {
20982 gen = gen_avx2_pblendvb;
20983 if (mode != V32QImode)
20984 d = gen_reg_rtx (V32QImode);
20985 op_false = gen_lowpart (V32QImode, op_false);
20986 op_true = gen_lowpart (V32QImode, op_true);
20987 cmp = gen_lowpart (V32QImode, cmp);
20988 }
20989 break;
20990
20991 case V16SImode:
20992 gen = gen_avx512f_blendmv16si;
20993 break;
20994 case V8DImode:
20995 gen = gen_avx512f_blendmv8di;
20996 break;
20997 case V8DFmode:
20998 gen = gen_avx512f_blendmv8df;
20999 break;
21000 case V16SFmode:
21001 gen = gen_avx512f_blendmv16sf;
21002 break;
21003
21004 default:
21005 break;
21006 }
21007
21008 if (gen != NULL)
21009 {
21010 emit_insn (gen (d, op_false, op_true, cmp));
21011 if (d != dest)
21012 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21013 }
21014 else
21015 {
21016 op_true = force_reg (mode, op_true);
21017
21018 t2 = gen_reg_rtx (mode);
21019 if (optimize)
21020 t3 = gen_reg_rtx (mode);
21021 else
21022 t3 = dest;
21023
21024 x = gen_rtx_AND (mode, op_true, cmp);
21025 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21026
21027 x = gen_rtx_NOT (mode, cmp);
21028 x = gen_rtx_AND (mode, x, op_false);
21029 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21030
21031 x = gen_rtx_IOR (mode, t3, t2);
21032 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21033 }
21034 }
21035 }
21036
21037 /* Expand a floating-point conditional move. Return true if successful. */
21038
21039 bool
21040 ix86_expand_fp_movcc (rtx operands[])
21041 {
21042 enum machine_mode mode = GET_MODE (operands[0]);
21043 enum rtx_code code = GET_CODE (operands[1]);
21044 rtx tmp, compare_op;
21045 rtx op0 = XEXP (operands[1], 0);
21046 rtx op1 = XEXP (operands[1], 1);
21047
21048 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21049 {
21050 enum machine_mode cmode;
21051
21052 /* Since we've no cmove for sse registers, don't force bad register
21053 allocation just to gain access to it. Deny movcc when the
21054 comparison mode doesn't match the move mode. */
21055 cmode = GET_MODE (op0);
21056 if (cmode == VOIDmode)
21057 cmode = GET_MODE (op1);
21058 if (cmode != mode)
21059 return false;
21060
21061 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21062 if (code == UNKNOWN)
21063 return false;
21064
21065 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21066 operands[2], operands[3]))
21067 return true;
21068
21069 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21070 operands[2], operands[3]);
21071 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21072 return true;
21073 }
21074
21075 if (GET_MODE (op0) == TImode
21076 || (GET_MODE (op0) == DImode
21077 && !TARGET_64BIT))
21078 return false;
21079
21080 /* The floating point conditional move instructions don't directly
21081 support conditions resulting from a signed integer comparison. */
21082
21083 compare_op = ix86_expand_compare (code, op0, op1);
21084 if (!fcmov_comparison_operator (compare_op, VOIDmode))
21085 {
21086 tmp = gen_reg_rtx (QImode);
21087 ix86_expand_setcc (tmp, code, op0, op1);
21088
21089 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21090 }
21091
21092 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21093 gen_rtx_IF_THEN_ELSE (mode, compare_op,
21094 operands[2], operands[3])));
21095
21096 return true;
21097 }
21098
21099 /* Expand a floating-point vector conditional move; a vcond operation
21100 rather than a movcc operation. */
21101
21102 bool
21103 ix86_expand_fp_vcond (rtx operands[])
21104 {
21105 enum rtx_code code = GET_CODE (operands[3]);
21106 rtx cmp;
21107
21108 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21109 &operands[4], &operands[5]);
21110 if (code == UNKNOWN)
21111 {
21112 rtx temp;
21113 switch (GET_CODE (operands[3]))
21114 {
21115 case LTGT:
21116 temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21117 operands[5], operands[0], operands[0]);
21118 cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21119 operands[5], operands[1], operands[2]);
21120 code = AND;
21121 break;
21122 case UNEQ:
21123 temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21124 operands[5], operands[0], operands[0]);
21125 cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21126 operands[5], operands[1], operands[2]);
21127 code = IOR;
21128 break;
21129 default:
21130 gcc_unreachable ();
21131 }
21132 cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21133 OPTAB_DIRECT);
21134 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21135 return true;
21136 }
21137
21138 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21139 operands[5], operands[1], operands[2]))
21140 return true;
21141
21142 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21143 operands[1], operands[2]);
21144 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21145 return true;
21146 }
21147
21148 /* Expand a signed/unsigned integral vector conditional move. */
21149
21150 bool
21151 ix86_expand_int_vcond (rtx operands[])
21152 {
21153 enum machine_mode data_mode = GET_MODE (operands[0]);
21154 enum machine_mode mode = GET_MODE (operands[4]);
21155 enum rtx_code code = GET_CODE (operands[3]);
21156 bool negate = false;
21157 rtx x, cop0, cop1;
21158
21159 cop0 = operands[4];
21160 cop1 = operands[5];
21161
21162 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21163 and x < 0 ? 1 : 0 into (unsigned) x >> 31. */
21164 if ((code == LT || code == GE)
21165 && data_mode == mode
21166 && cop1 == CONST0_RTX (mode)
21167 && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21168 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21169 && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21170 && (GET_MODE_SIZE (data_mode) == 16
21171 || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21172 {
21173 rtx negop = operands[2 - (code == LT)];
21174 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21175 if (negop == CONST1_RTX (data_mode))
21176 {
21177 rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21178 operands[0], 1, OPTAB_DIRECT);
21179 if (res != operands[0])
21180 emit_move_insn (operands[0], res);
21181 return true;
21182 }
21183 else if (GET_MODE_INNER (data_mode) != DImode
21184 && vector_all_ones_operand (negop, data_mode))
21185 {
21186 rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21187 operands[0], 0, OPTAB_DIRECT);
21188 if (res != operands[0])
21189 emit_move_insn (operands[0], res);
21190 return true;
21191 }
21192 }
21193
21194 if (!nonimmediate_operand (cop1, mode))
21195 cop1 = force_reg (mode, cop1);
21196 if (!general_operand (operands[1], data_mode))
21197 operands[1] = force_reg (data_mode, operands[1]);
21198 if (!general_operand (operands[2], data_mode))
21199 operands[2] = force_reg (data_mode, operands[2]);
21200
21201 /* XOP supports all of the comparisons on all 128-bit vector int types. */
21202 if (TARGET_XOP
21203 && (mode == V16QImode || mode == V8HImode
21204 || mode == V4SImode || mode == V2DImode))
21205 ;
21206 else
21207 {
21208 /* Canonicalize the comparison to EQ, GT, GTU. */
21209 switch (code)
21210 {
21211 case EQ:
21212 case GT:
21213 case GTU:
21214 break;
21215
21216 case NE:
21217 case LE:
21218 case LEU:
21219 code = reverse_condition (code);
21220 negate = true;
21221 break;
21222
21223 case GE:
21224 case GEU:
21225 code = reverse_condition (code);
21226 negate = true;
21227 /* FALLTHRU */
21228
21229 case LT:
21230 case LTU:
21231 code = swap_condition (code);
21232 x = cop0, cop0 = cop1, cop1 = x;
21233 break;
21234
21235 default:
21236 gcc_unreachable ();
21237 }
21238
21239 /* Only SSE4.1/SSE4.2 supports V2DImode. */
21240 if (mode == V2DImode)
21241 {
21242 switch (code)
21243 {
21244 case EQ:
21245 /* SSE4.1 supports EQ. */
21246 if (!TARGET_SSE4_1)
21247 return false;
21248 break;
21249
21250 case GT:
21251 case GTU:
21252 /* SSE4.2 supports GT/GTU. */
21253 if (!TARGET_SSE4_2)
21254 return false;
21255 break;
21256
21257 default:
21258 gcc_unreachable ();
21259 }
21260 }
21261
21262 /* Unsigned parallel compare is not supported by the hardware.
21263 Play some tricks to turn this into a signed comparison
21264 against 0. */
21265 if (code == GTU)
21266 {
21267 cop0 = force_reg (mode, cop0);
21268
21269 switch (mode)
21270 {
21271 case V16SImode:
21272 case V8DImode:
21273 case V8SImode:
21274 case V4DImode:
21275 case V4SImode:
21276 case V2DImode:
21277 {
21278 rtx t1, t2, mask;
21279 rtx (*gen_sub3) (rtx, rtx, rtx);
21280
21281 switch (mode)
21282 {
21283 case V16SImode: gen_sub3 = gen_subv16si3; break;
21284 case V8DImode: gen_sub3 = gen_subv8di3; break;
21285 case V8SImode: gen_sub3 = gen_subv8si3; break;
21286 case V4DImode: gen_sub3 = gen_subv4di3; break;
21287 case V4SImode: gen_sub3 = gen_subv4si3; break;
21288 case V2DImode: gen_sub3 = gen_subv2di3; break;
21289 default:
21290 gcc_unreachable ();
21291 }
21292 /* Subtract (-(INT MAX) - 1) from both operands to make
21293 them signed. */
21294 mask = ix86_build_signbit_mask (mode, true, false);
21295 t1 = gen_reg_rtx (mode);
21296 emit_insn (gen_sub3 (t1, cop0, mask));
21297
21298 t2 = gen_reg_rtx (mode);
21299 emit_insn (gen_sub3 (t2, cop1, mask));
21300
21301 cop0 = t1;
21302 cop1 = t2;
21303 code = GT;
21304 }
21305 break;
21306
21307 case V32QImode:
21308 case V16HImode:
21309 case V16QImode:
21310 case V8HImode:
21311 /* Perform a parallel unsigned saturating subtraction. */
21312 x = gen_reg_rtx (mode);
21313 emit_insn (gen_rtx_SET (VOIDmode, x,
21314 gen_rtx_US_MINUS (mode, cop0, cop1)));
21315
21316 cop0 = x;
21317 cop1 = CONST0_RTX (mode);
21318 code = EQ;
21319 negate = !negate;
21320 break;
21321
21322 default:
21323 gcc_unreachable ();
21324 }
21325 }
21326 }
21327
21328 /* Allow the comparison to be done in one mode, but the movcc to
21329 happen in another mode. */
21330 if (data_mode == mode)
21331 {
21332 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
21333 operands[1+negate], operands[2-negate]);
21334 }
21335 else
21336 {
21337 gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
21338 x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
21339 operands[1+negate], operands[2-negate]);
21340 if (GET_MODE (x) == mode)
21341 x = gen_lowpart (data_mode, x);
21342 }
21343
21344 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
21345 operands[2-negate]);
21346 return true;
21347 }
21348
21349 static bool
21350 ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1)
21351 {
21352 enum machine_mode mode = GET_MODE (op0);
21353 switch (mode)
21354 {
21355 case V16SImode:
21356 emit_insn (gen_avx512f_vpermi2varv16si3 (target, op0,
21357 force_reg (V16SImode, mask),
21358 op1));
21359 return true;
21360 case V16SFmode:
21361 emit_insn (gen_avx512f_vpermi2varv16sf3 (target, op0,
21362 force_reg (V16SImode, mask),
21363 op1));
21364 return true;
21365 case V8DImode:
21366 emit_insn (gen_avx512f_vpermi2varv8di3 (target, op0,
21367 force_reg (V8DImode, mask), op1));
21368 return true;
21369 case V8DFmode:
21370 emit_insn (gen_avx512f_vpermi2varv8df3 (target, op0,
21371 force_reg (V8DImode, mask), op1));
21372 return true;
21373 default:
21374 return false;
21375 }
21376 }
21377
21378 /* Expand a variable vector permutation. */
21379
21380 void
21381 ix86_expand_vec_perm (rtx operands[])
21382 {
21383 rtx target = operands[0];
21384 rtx op0 = operands[1];
21385 rtx op1 = operands[2];
21386 rtx mask = operands[3];
21387 rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
21388 enum machine_mode mode = GET_MODE (op0);
21389 enum machine_mode maskmode = GET_MODE (mask);
21390 int w, e, i;
21391 bool one_operand_shuffle = rtx_equal_p (op0, op1);
21392
21393 /* Number of elements in the vector. */
21394 w = GET_MODE_NUNITS (mode);
21395 e = GET_MODE_UNIT_SIZE (mode);
21396 gcc_assert (w <= 64);
21397
21398 if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1))
21399 return;
21400
21401 if (TARGET_AVX2)
21402 {
21403 if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
21404 {
21405 /* Unfortunately, the VPERMQ and VPERMPD instructions only support
21406 an constant shuffle operand. With a tiny bit of effort we can
21407 use VPERMD instead. A re-interpretation stall for V4DFmode is
21408 unfortunate but there's no avoiding it.
21409 Similarly for V16HImode we don't have instructions for variable
21410 shuffling, while for V32QImode we can use after preparing suitable
21411 masks vpshufb; vpshufb; vpermq; vpor. */
21412
21413 if (mode == V16HImode)
21414 {
21415 maskmode = mode = V32QImode;
21416 w = 32;
21417 e = 1;
21418 }
21419 else
21420 {
21421 maskmode = mode = V8SImode;
21422 w = 8;
21423 e = 4;
21424 }
21425 t1 = gen_reg_rtx (maskmode);
21426
21427 /* Replicate the low bits of the V4DImode mask into V8SImode:
21428 mask = { A B C D }
21429 t1 = { A A B B C C D D }. */
21430 for (i = 0; i < w / 2; ++i)
21431 vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
21432 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21433 vt = force_reg (maskmode, vt);
21434 mask = gen_lowpart (maskmode, mask);
21435 if (maskmode == V8SImode)
21436 emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
21437 else
21438 emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
21439
21440 /* Multiply the shuffle indicies by two. */
21441 t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
21442 OPTAB_DIRECT);
21443
21444 /* Add one to the odd shuffle indicies:
21445 t1 = { A*2, A*2+1, B*2, B*2+1, ... }. */
21446 for (i = 0; i < w / 2; ++i)
21447 {
21448 vec[i * 2] = const0_rtx;
21449 vec[i * 2 + 1] = const1_rtx;
21450 }
21451 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21452 vt = validize_mem (force_const_mem (maskmode, vt));
21453 t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
21454 OPTAB_DIRECT);
21455
21456 /* Continue as if V8SImode (resp. V32QImode) was used initially. */
21457 operands[3] = mask = t1;
21458 target = gen_reg_rtx (mode);
21459 op0 = gen_lowpart (mode, op0);
21460 op1 = gen_lowpart (mode, op1);
21461 }
21462
21463 switch (mode)
21464 {
21465 case V8SImode:
21466 /* The VPERMD and VPERMPS instructions already properly ignore
21467 the high bits of the shuffle elements. No need for us to
21468 perform an AND ourselves. */
21469 if (one_operand_shuffle)
21470 {
21471 emit_insn (gen_avx2_permvarv8si (target, op0, mask));
21472 if (target != operands[0])
21473 emit_move_insn (operands[0],
21474 gen_lowpart (GET_MODE (operands[0]), target));
21475 }
21476 else
21477 {
21478 t1 = gen_reg_rtx (V8SImode);
21479 t2 = gen_reg_rtx (V8SImode);
21480 emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
21481 emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
21482 goto merge_two;
21483 }
21484 return;
21485
21486 case V8SFmode:
21487 mask = gen_lowpart (V8SImode, mask);
21488 if (one_operand_shuffle)
21489 emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
21490 else
21491 {
21492 t1 = gen_reg_rtx (V8SFmode);
21493 t2 = gen_reg_rtx (V8SFmode);
21494 emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
21495 emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
21496 goto merge_two;
21497 }
21498 return;
21499
21500 case V4SImode:
21501 /* By combining the two 128-bit input vectors into one 256-bit
21502 input vector, we can use VPERMD and VPERMPS for the full
21503 two-operand shuffle. */
21504 t1 = gen_reg_rtx (V8SImode);
21505 t2 = gen_reg_rtx (V8SImode);
21506 emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
21507 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21508 emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
21509 emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
21510 return;
21511
21512 case V4SFmode:
21513 t1 = gen_reg_rtx (V8SFmode);
21514 t2 = gen_reg_rtx (V8SImode);
21515 mask = gen_lowpart (V4SImode, mask);
21516 emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
21517 emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
21518 emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
21519 emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
21520 return;
21521
21522 case V32QImode:
21523 t1 = gen_reg_rtx (V32QImode);
21524 t2 = gen_reg_rtx (V32QImode);
21525 t3 = gen_reg_rtx (V32QImode);
21526 vt2 = GEN_INT (128);
21527 for (i = 0; i < 32; i++)
21528 vec[i] = vt2;
21529 vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21530 vt = force_reg (V32QImode, vt);
21531 for (i = 0; i < 32; i++)
21532 vec[i] = i < 16 ? vt2 : const0_rtx;
21533 vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
21534 vt2 = force_reg (V32QImode, vt2);
21535 /* From mask create two adjusted masks, which contain the same
21536 bits as mask in the low 7 bits of each vector element.
21537 The first mask will have the most significant bit clear
21538 if it requests element from the same 128-bit lane
21539 and MSB set if it requests element from the other 128-bit lane.
21540 The second mask will have the opposite values of the MSB,
21541 and additionally will have its 128-bit lanes swapped.
21542 E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
21543 t1 { 07 92 9e 09 ... | 17 19 85 1f ... } and
21544 t3 { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
21545 stands for other 12 bytes. */
21546 /* The bit whether element is from the same lane or the other
21547 lane is bit 4, so shift it up by 3 to the MSB position. */
21548 t5 = gen_reg_rtx (V4DImode);
21549 emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
21550 GEN_INT (3)));
21551 /* Clear MSB bits from the mask just in case it had them set. */
21552 emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
21553 /* After this t1 will have MSB set for elements from other lane. */
21554 emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
21555 /* Clear bits other than MSB. */
21556 emit_insn (gen_andv32qi3 (t1, t1, vt));
21557 /* Or in the lower bits from mask into t3. */
21558 emit_insn (gen_iorv32qi3 (t3, t1, t2));
21559 /* And invert MSB bits in t1, so MSB is set for elements from the same
21560 lane. */
21561 emit_insn (gen_xorv32qi3 (t1, t1, vt));
21562 /* Swap 128-bit lanes in t3. */
21563 t6 = gen_reg_rtx (V4DImode);
21564 emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
21565 const2_rtx, GEN_INT (3),
21566 const0_rtx, const1_rtx));
21567 /* And or in the lower bits from mask into t1. */
21568 emit_insn (gen_iorv32qi3 (t1, t1, t2));
21569 if (one_operand_shuffle)
21570 {
21571 /* Each of these shuffles will put 0s in places where
21572 element from the other 128-bit lane is needed, otherwise
21573 will shuffle in the requested value. */
21574 emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
21575 gen_lowpart (V32QImode, t6)));
21576 emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
21577 /* For t3 the 128-bit lanes are swapped again. */
21578 t7 = gen_reg_rtx (V4DImode);
21579 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
21580 const2_rtx, GEN_INT (3),
21581 const0_rtx, const1_rtx));
21582 /* And oring both together leads to the result. */
21583 emit_insn (gen_iorv32qi3 (target, t1,
21584 gen_lowpart (V32QImode, t7)));
21585 if (target != operands[0])
21586 emit_move_insn (operands[0],
21587 gen_lowpart (GET_MODE (operands[0]), target));
21588 return;
21589 }
21590
21591 t4 = gen_reg_rtx (V32QImode);
21592 /* Similarly to the above one_operand_shuffle code,
21593 just for repeated twice for each operand. merge_two:
21594 code will merge the two results together. */
21595 emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
21596 gen_lowpart (V32QImode, t6)));
21597 emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
21598 gen_lowpart (V32QImode, t6)));
21599 emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
21600 emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
21601 t7 = gen_reg_rtx (V4DImode);
21602 emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
21603 const2_rtx, GEN_INT (3),
21604 const0_rtx, const1_rtx));
21605 t8 = gen_reg_rtx (V4DImode);
21606 emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
21607 const2_rtx, GEN_INT (3),
21608 const0_rtx, const1_rtx));
21609 emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
21610 emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
21611 t1 = t4;
21612 t2 = t3;
21613 goto merge_two;
21614
21615 default:
21616 gcc_assert (GET_MODE_SIZE (mode) <= 16);
21617 break;
21618 }
21619 }
21620
21621 if (TARGET_XOP)
21622 {
21623 /* The XOP VPPERM insn supports three inputs. By ignoring the
21624 one_operand_shuffle special case, we avoid creating another
21625 set of constant vectors in memory. */
21626 one_operand_shuffle = false;
21627
21628 /* mask = mask & {2*w-1, ...} */
21629 vt = GEN_INT (2*w - 1);
21630 }
21631 else
21632 {
21633 /* mask = mask & {w-1, ...} */
21634 vt = GEN_INT (w - 1);
21635 }
21636
21637 for (i = 0; i < w; i++)
21638 vec[i] = vt;
21639 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21640 mask = expand_simple_binop (maskmode, AND, mask, vt,
21641 NULL_RTX, 0, OPTAB_DIRECT);
21642
21643 /* For non-QImode operations, convert the word permutation control
21644 into a byte permutation control. */
21645 if (mode != V16QImode)
21646 {
21647 mask = expand_simple_binop (maskmode, ASHIFT, mask,
21648 GEN_INT (exact_log2 (e)),
21649 NULL_RTX, 0, OPTAB_DIRECT);
21650
21651 /* Convert mask to vector of chars. */
21652 mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
21653
21654 /* Replicate each of the input bytes into byte positions:
21655 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
21656 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
21657 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}. */
21658 for (i = 0; i < 16; ++i)
21659 vec[i] = GEN_INT (i/e * e);
21660 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21661 vt = validize_mem (force_const_mem (V16QImode, vt));
21662 if (TARGET_XOP)
21663 emit_insn (gen_xop_pperm (mask, mask, mask, vt));
21664 else
21665 emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
21666
21667 /* Convert it into the byte positions by doing
21668 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...} */
21669 for (i = 0; i < 16; ++i)
21670 vec[i] = GEN_INT (i % e);
21671 vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
21672 vt = validize_mem (force_const_mem (V16QImode, vt));
21673 emit_insn (gen_addv16qi3 (mask, mask, vt));
21674 }
21675
21676 /* The actual shuffle operations all operate on V16QImode. */
21677 op0 = gen_lowpart (V16QImode, op0);
21678 op1 = gen_lowpart (V16QImode, op1);
21679
21680 if (TARGET_XOP)
21681 {
21682 if (GET_MODE (target) != V16QImode)
21683 target = gen_reg_rtx (V16QImode);
21684 emit_insn (gen_xop_pperm (target, op0, op1, mask));
21685 if (target != operands[0])
21686 emit_move_insn (operands[0],
21687 gen_lowpart (GET_MODE (operands[0]), target));
21688 }
21689 else if (one_operand_shuffle)
21690 {
21691 if (GET_MODE (target) != V16QImode)
21692 target = gen_reg_rtx (V16QImode);
21693 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
21694 if (target != operands[0])
21695 emit_move_insn (operands[0],
21696 gen_lowpart (GET_MODE (operands[0]), target));
21697 }
21698 else
21699 {
21700 rtx xops[6];
21701 bool ok;
21702
21703 /* Shuffle the two input vectors independently. */
21704 t1 = gen_reg_rtx (V16QImode);
21705 t2 = gen_reg_rtx (V16QImode);
21706 emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
21707 emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
21708
21709 merge_two:
21710 /* Then merge them together. The key is whether any given control
21711 element contained a bit set that indicates the second word. */
21712 mask = operands[3];
21713 vt = GEN_INT (w);
21714 if (maskmode == V2DImode && !TARGET_SSE4_1)
21715 {
21716 /* Without SSE4.1, we don't have V2DImode EQ. Perform one
21717 more shuffle to convert the V2DI input mask into a V4SI
21718 input mask. At which point the masking that expand_int_vcond
21719 will work as desired. */
21720 rtx t3 = gen_reg_rtx (V4SImode);
21721 emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
21722 const0_rtx, const0_rtx,
21723 const2_rtx, const2_rtx));
21724 mask = t3;
21725 maskmode = V4SImode;
21726 e = w = 4;
21727 }
21728
21729 for (i = 0; i < w; i++)
21730 vec[i] = vt;
21731 vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
21732 vt = force_reg (maskmode, vt);
21733 mask = expand_simple_binop (maskmode, AND, mask, vt,
21734 NULL_RTX, 0, OPTAB_DIRECT);
21735
21736 if (GET_MODE (target) != mode)
21737 target = gen_reg_rtx (mode);
21738 xops[0] = target;
21739 xops[1] = gen_lowpart (mode, t2);
21740 xops[2] = gen_lowpart (mode, t1);
21741 xops[3] = gen_rtx_EQ (maskmode, mask, vt);
21742 xops[4] = mask;
21743 xops[5] = vt;
21744 ok = ix86_expand_int_vcond (xops);
21745 gcc_assert (ok);
21746 if (target != operands[0])
21747 emit_move_insn (operands[0],
21748 gen_lowpart (GET_MODE (operands[0]), target));
21749 }
21750 }
21751
21752 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
21753 true if we should do zero extension, else sign extension. HIGH_P is
21754 true if we want the N/2 high elements, else the low elements. */
21755
21756 void
21757 ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
21758 {
21759 enum machine_mode imode = GET_MODE (src);
21760 rtx tmp;
21761
21762 if (TARGET_SSE4_1)
21763 {
21764 rtx (*unpack)(rtx, rtx);
21765 rtx (*extract)(rtx, rtx) = NULL;
21766 enum machine_mode halfmode = BLKmode;
21767
21768 switch (imode)
21769 {
21770 case V32QImode:
21771 if (unsigned_p)
21772 unpack = gen_avx2_zero_extendv16qiv16hi2;
21773 else
21774 unpack = gen_avx2_sign_extendv16qiv16hi2;
21775 halfmode = V16QImode;
21776 extract
21777 = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
21778 break;
21779 case V32HImode:
21780 if (unsigned_p)
21781 unpack = gen_avx512f_zero_extendv16hiv16si2;
21782 else
21783 unpack = gen_avx512f_sign_extendv16hiv16si2;
21784 halfmode = V16HImode;
21785 extract
21786 = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
21787 break;
21788 case V16HImode:
21789 if (unsigned_p)
21790 unpack = gen_avx2_zero_extendv8hiv8si2;
21791 else
21792 unpack = gen_avx2_sign_extendv8hiv8si2;
21793 halfmode = V8HImode;
21794 extract
21795 = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
21796 break;
21797 case V16SImode:
21798 if (unsigned_p)
21799 unpack = gen_avx512f_zero_extendv8siv8di2;
21800 else
21801 unpack = gen_avx512f_sign_extendv8siv8di2;
21802 halfmode = V8SImode;
21803 extract
21804 = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
21805 break;
21806 case V8SImode:
21807 if (unsigned_p)
21808 unpack = gen_avx2_zero_extendv4siv4di2;
21809 else
21810 unpack = gen_avx2_sign_extendv4siv4di2;
21811 halfmode = V4SImode;
21812 extract
21813 = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
21814 break;
21815 case V16QImode:
21816 if (unsigned_p)
21817 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
21818 else
21819 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
21820 break;
21821 case V8HImode:
21822 if (unsigned_p)
21823 unpack = gen_sse4_1_zero_extendv4hiv4si2;
21824 else
21825 unpack = gen_sse4_1_sign_extendv4hiv4si2;
21826 break;
21827 case V4SImode:
21828 if (unsigned_p)
21829 unpack = gen_sse4_1_zero_extendv2siv2di2;
21830 else
21831 unpack = gen_sse4_1_sign_extendv2siv2di2;
21832 break;
21833 default:
21834 gcc_unreachable ();
21835 }
21836
21837 if (GET_MODE_SIZE (imode) >= 32)
21838 {
21839 tmp = gen_reg_rtx (halfmode);
21840 emit_insn (extract (tmp, src));
21841 }
21842 else if (high_p)
21843 {
21844 /* Shift higher 8 bytes to lower 8 bytes. */
21845 tmp = gen_reg_rtx (V1TImode);
21846 emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
21847 GEN_INT (64)));
21848 tmp = gen_lowpart (imode, tmp);
21849 }
21850 else
21851 tmp = src;
21852
21853 emit_insn (unpack (dest, tmp));
21854 }
21855 else
21856 {
21857 rtx (*unpack)(rtx, rtx, rtx);
21858
21859 switch (imode)
21860 {
21861 case V16QImode:
21862 if (high_p)
21863 unpack = gen_vec_interleave_highv16qi;
21864 else
21865 unpack = gen_vec_interleave_lowv16qi;
21866 break;
21867 case V8HImode:
21868 if (high_p)
21869 unpack = gen_vec_interleave_highv8hi;
21870 else
21871 unpack = gen_vec_interleave_lowv8hi;
21872 break;
21873 case V4SImode:
21874 if (high_p)
21875 unpack = gen_vec_interleave_highv4si;
21876 else
21877 unpack = gen_vec_interleave_lowv4si;
21878 break;
21879 default:
21880 gcc_unreachable ();
21881 }
21882
21883 if (unsigned_p)
21884 tmp = force_reg (imode, CONST0_RTX (imode));
21885 else
21886 tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
21887 src, pc_rtx, pc_rtx);
21888
21889 rtx tmp2 = gen_reg_rtx (imode);
21890 emit_insn (unpack (tmp2, src, tmp));
21891 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
21892 }
21893 }
21894
21895 /* Expand conditional increment or decrement using adb/sbb instructions.
21896 The default case using setcc followed by the conditional move can be
21897 done by generic code. */
21898 bool
21899 ix86_expand_int_addcc (rtx operands[])
21900 {
21901 enum rtx_code code = GET_CODE (operands[1]);
21902 rtx flags;
21903 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
21904 rtx compare_op;
21905 rtx val = const0_rtx;
21906 bool fpcmp = false;
21907 enum machine_mode mode;
21908 rtx op0 = XEXP (operands[1], 0);
21909 rtx op1 = XEXP (operands[1], 1);
21910
21911 if (operands[3] != const1_rtx
21912 && operands[3] != constm1_rtx)
21913 return false;
21914 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
21915 return false;
21916 code = GET_CODE (compare_op);
21917
21918 flags = XEXP (compare_op, 0);
21919
21920 if (GET_MODE (flags) == CCFPmode
21921 || GET_MODE (flags) == CCFPUmode)
21922 {
21923 fpcmp = true;
21924 code = ix86_fp_compare_code_to_integer (code);
21925 }
21926
21927 if (code != LTU)
21928 {
21929 val = constm1_rtx;
21930 if (fpcmp)
21931 PUT_CODE (compare_op,
21932 reverse_condition_maybe_unordered
21933 (GET_CODE (compare_op)));
21934 else
21935 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
21936 }
21937
21938 mode = GET_MODE (operands[0]);
21939
21940 /* Construct either adc or sbb insn. */
21941 if ((code == LTU) == (operands[3] == constm1_rtx))
21942 {
21943 switch (mode)
21944 {
21945 case QImode:
21946 insn = gen_subqi3_carry;
21947 break;
21948 case HImode:
21949 insn = gen_subhi3_carry;
21950 break;
21951 case SImode:
21952 insn = gen_subsi3_carry;
21953 break;
21954 case DImode:
21955 insn = gen_subdi3_carry;
21956 break;
21957 default:
21958 gcc_unreachable ();
21959 }
21960 }
21961 else
21962 {
21963 switch (mode)
21964 {
21965 case QImode:
21966 insn = gen_addqi3_carry;
21967 break;
21968 case HImode:
21969 insn = gen_addhi3_carry;
21970 break;
21971 case SImode:
21972 insn = gen_addsi3_carry;
21973 break;
21974 case DImode:
21975 insn = gen_adddi3_carry;
21976 break;
21977 default:
21978 gcc_unreachable ();
21979 }
21980 }
21981 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
21982
21983 return true;
21984 }
21985
21986
21987 /* Split operands 0 and 1 into half-mode parts. Similar to split_double_mode,
21988 but works for floating pointer parameters and nonoffsetable memories.
21989 For pushes, it returns just stack offsets; the values will be saved
21990 in the right order. Maximally three parts are generated. */
21991
21992 static int
21993 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
21994 {
21995 int size;
21996
21997 if (!TARGET_64BIT)
21998 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
21999 else
22000 size = (GET_MODE_SIZE (mode) + 4) / 8;
22001
22002 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22003 gcc_assert (size >= 2 && size <= 4);
22004
22005 /* Optimize constant pool reference to immediates. This is used by fp
22006 moves, that force all constants to memory to allow combining. */
22007 if (MEM_P (operand) && MEM_READONLY_P (operand))
22008 {
22009 rtx tmp = maybe_get_pool_constant (operand);
22010 if (tmp)
22011 operand = tmp;
22012 }
22013
22014 if (MEM_P (operand) && !offsettable_memref_p (operand))
22015 {
22016 /* The only non-offsetable memories we handle are pushes. */
22017 int ok = push_operand (operand, VOIDmode);
22018
22019 gcc_assert (ok);
22020
22021 operand = copy_rtx (operand);
22022 PUT_MODE (operand, word_mode);
22023 parts[0] = parts[1] = parts[2] = parts[3] = operand;
22024 return size;
22025 }
22026
22027 if (GET_CODE (operand) == CONST_VECTOR)
22028 {
22029 enum machine_mode imode = int_mode_for_mode (mode);
22030 /* Caution: if we looked through a constant pool memory above,
22031 the operand may actually have a different mode now. That's
22032 ok, since we want to pun this all the way back to an integer. */
22033 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22034 gcc_assert (operand != NULL);
22035 mode = imode;
22036 }
22037
22038 if (!TARGET_64BIT)
22039 {
22040 if (mode == DImode)
22041 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22042 else
22043 {
22044 int i;
22045
22046 if (REG_P (operand))
22047 {
22048 gcc_assert (reload_completed);
22049 for (i = 0; i < size; i++)
22050 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22051 }
22052 else if (offsettable_memref_p (operand))
22053 {
22054 operand = adjust_address (operand, SImode, 0);
22055 parts[0] = operand;
22056 for (i = 1; i < size; i++)
22057 parts[i] = adjust_address (operand, SImode, 4 * i);
22058 }
22059 else if (GET_CODE (operand) == CONST_DOUBLE)
22060 {
22061 REAL_VALUE_TYPE r;
22062 long l[4];
22063
22064 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22065 switch (mode)
22066 {
22067 case TFmode:
22068 real_to_target (l, &r, mode);
22069 parts[3] = gen_int_mode (l[3], SImode);
22070 parts[2] = gen_int_mode (l[2], SImode);
22071 break;
22072 case XFmode:
22073 /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22074 long double may not be 80-bit. */
22075 real_to_target (l, &r, mode);
22076 parts[2] = gen_int_mode (l[2], SImode);
22077 break;
22078 case DFmode:
22079 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22080 break;
22081 default:
22082 gcc_unreachable ();
22083 }
22084 parts[1] = gen_int_mode (l[1], SImode);
22085 parts[0] = gen_int_mode (l[0], SImode);
22086 }
22087 else
22088 gcc_unreachable ();
22089 }
22090 }
22091 else
22092 {
22093 if (mode == TImode)
22094 split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22095 if (mode == XFmode || mode == TFmode)
22096 {
22097 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22098 if (REG_P (operand))
22099 {
22100 gcc_assert (reload_completed);
22101 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22102 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22103 }
22104 else if (offsettable_memref_p (operand))
22105 {
22106 operand = adjust_address (operand, DImode, 0);
22107 parts[0] = operand;
22108 parts[1] = adjust_address (operand, upper_mode, 8);
22109 }
22110 else if (GET_CODE (operand) == CONST_DOUBLE)
22111 {
22112 REAL_VALUE_TYPE r;
22113 long l[4];
22114
22115 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22116 real_to_target (l, &r, mode);
22117
22118 /* Do not use shift by 32 to avoid warning on 32bit systems. */
22119 if (HOST_BITS_PER_WIDE_INT >= 64)
22120 parts[0]
22121 = gen_int_mode
22122 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22123 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22124 DImode);
22125 else
22126 parts[0] = immed_double_const (l[0], l[1], DImode);
22127
22128 if (upper_mode == SImode)
22129 parts[1] = gen_int_mode (l[2], SImode);
22130 else if (HOST_BITS_PER_WIDE_INT >= 64)
22131 parts[1]
22132 = gen_int_mode
22133 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22134 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22135 DImode);
22136 else
22137 parts[1] = immed_double_const (l[2], l[3], DImode);
22138 }
22139 else
22140 gcc_unreachable ();
22141 }
22142 }
22143
22144 return size;
22145 }
22146
22147 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22148 Return false when normal moves are needed; true when all required
22149 insns have been emitted. Operands 2-4 contain the input values
22150 int the correct order; operands 5-7 contain the output values. */
22151
22152 void
22153 ix86_split_long_move (rtx operands[])
22154 {
22155 rtx part[2][4];
22156 int nparts, i, j;
22157 int push = 0;
22158 int collisions = 0;
22159 enum machine_mode mode = GET_MODE (operands[0]);
22160 bool collisionparts[4];
22161
22162 /* The DFmode expanders may ask us to move double.
22163 For 64bit target this is single move. By hiding the fact
22164 here we simplify i386.md splitters. */
22165 if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22166 {
22167 /* Optimize constant pool reference to immediates. This is used by
22168 fp moves, that force all constants to memory to allow combining. */
22169
22170 if (MEM_P (operands[1])
22171 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22172 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22173 operands[1] = get_pool_constant (XEXP (operands[1], 0));
22174 if (push_operand (operands[0], VOIDmode))
22175 {
22176 operands[0] = copy_rtx (operands[0]);
22177 PUT_MODE (operands[0], word_mode);
22178 }
22179 else
22180 operands[0] = gen_lowpart (DImode, operands[0]);
22181 operands[1] = gen_lowpart (DImode, operands[1]);
22182 emit_move_insn (operands[0], operands[1]);
22183 return;
22184 }
22185
22186 /* The only non-offsettable memory we handle is push. */
22187 if (push_operand (operands[0], VOIDmode))
22188 push = 1;
22189 else
22190 gcc_assert (!MEM_P (operands[0])
22191 || offsettable_memref_p (operands[0]));
22192
22193 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
22194 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
22195
22196 /* When emitting push, take care for source operands on the stack. */
22197 if (push && MEM_P (operands[1])
22198 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
22199 {
22200 rtx src_base = XEXP (part[1][nparts - 1], 0);
22201
22202 /* Compensate for the stack decrement by 4. */
22203 if (!TARGET_64BIT && nparts == 3
22204 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
22205 src_base = plus_constant (Pmode, src_base, 4);
22206
22207 /* src_base refers to the stack pointer and is
22208 automatically decreased by emitted push. */
22209 for (i = 0; i < nparts; i++)
22210 part[1][i] = change_address (part[1][i],
22211 GET_MODE (part[1][i]), src_base);
22212 }
22213
22214 /* We need to do copy in the right order in case an address register
22215 of the source overlaps the destination. */
22216 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
22217 {
22218 rtx tmp;
22219
22220 for (i = 0; i < nparts; i++)
22221 {
22222 collisionparts[i]
22223 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
22224 if (collisionparts[i])
22225 collisions++;
22226 }
22227
22228 /* Collision in the middle part can be handled by reordering. */
22229 if (collisions == 1 && nparts == 3 && collisionparts [1])
22230 {
22231 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22232 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22233 }
22234 else if (collisions == 1
22235 && nparts == 4
22236 && (collisionparts [1] || collisionparts [2]))
22237 {
22238 if (collisionparts [1])
22239 {
22240 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
22241 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
22242 }
22243 else
22244 {
22245 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
22246 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
22247 }
22248 }
22249
22250 /* If there are more collisions, we can't handle it by reordering.
22251 Do an lea to the last part and use only one colliding move. */
22252 else if (collisions > 1)
22253 {
22254 rtx base;
22255
22256 collisions = 1;
22257
22258 base = part[0][nparts - 1];
22259
22260 /* Handle the case when the last part isn't valid for lea.
22261 Happens in 64-bit mode storing the 12-byte XFmode. */
22262 if (GET_MODE (base) != Pmode)
22263 base = gen_rtx_REG (Pmode, REGNO (base));
22264
22265 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
22266 part[1][0] = replace_equiv_address (part[1][0], base);
22267 for (i = 1; i < nparts; i++)
22268 {
22269 tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
22270 part[1][i] = replace_equiv_address (part[1][i], tmp);
22271 }
22272 }
22273 }
22274
22275 if (push)
22276 {
22277 if (!TARGET_64BIT)
22278 {
22279 if (nparts == 3)
22280 {
22281 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
22282 emit_insn (ix86_gen_add3 (stack_pointer_rtx,
22283 stack_pointer_rtx, GEN_INT (-4)));
22284 emit_move_insn (part[0][2], part[1][2]);
22285 }
22286 else if (nparts == 4)
22287 {
22288 emit_move_insn (part[0][3], part[1][3]);
22289 emit_move_insn (part[0][2], part[1][2]);
22290 }
22291 }
22292 else
22293 {
22294 /* In 64bit mode we don't have 32bit push available. In case this is
22295 register, it is OK - we will just use larger counterpart. We also
22296 retype memory - these comes from attempt to avoid REX prefix on
22297 moving of second half of TFmode value. */
22298 if (GET_MODE (part[1][1]) == SImode)
22299 {
22300 switch (GET_CODE (part[1][1]))
22301 {
22302 case MEM:
22303 part[1][1] = adjust_address (part[1][1], DImode, 0);
22304 break;
22305
22306 case REG:
22307 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
22308 break;
22309
22310 default:
22311 gcc_unreachable ();
22312 }
22313
22314 if (GET_MODE (part[1][0]) == SImode)
22315 part[1][0] = part[1][1];
22316 }
22317 }
22318 emit_move_insn (part[0][1], part[1][1]);
22319 emit_move_insn (part[0][0], part[1][0]);
22320 return;
22321 }
22322
22323 /* Choose correct order to not overwrite the source before it is copied. */
22324 if ((REG_P (part[0][0])
22325 && REG_P (part[1][1])
22326 && (REGNO (part[0][0]) == REGNO (part[1][1])
22327 || (nparts == 3
22328 && REGNO (part[0][0]) == REGNO (part[1][2]))
22329 || (nparts == 4
22330 && REGNO (part[0][0]) == REGNO (part[1][3]))))
22331 || (collisions > 0
22332 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
22333 {
22334 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
22335 {
22336 operands[2 + i] = part[0][j];
22337 operands[6 + i] = part[1][j];
22338 }
22339 }
22340 else
22341 {
22342 for (i = 0; i < nparts; i++)
22343 {
22344 operands[2 + i] = part[0][i];
22345 operands[6 + i] = part[1][i];
22346 }
22347 }
22348
22349 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
22350 if (optimize_insn_for_size_p ())
22351 {
22352 for (j = 0; j < nparts - 1; j++)
22353 if (CONST_INT_P (operands[6 + j])
22354 && operands[6 + j] != const0_rtx
22355 && REG_P (operands[2 + j]))
22356 for (i = j; i < nparts - 1; i++)
22357 if (CONST_INT_P (operands[7 + i])
22358 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
22359 operands[7 + i] = operands[2 + j];
22360 }
22361
22362 for (i = 0; i < nparts; i++)
22363 emit_move_insn (operands[2 + i], operands[6 + i]);
22364
22365 return;
22366 }
22367
22368 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
22369 left shift by a constant, either using a single shift or
22370 a sequence of add instructions. */
22371
22372 static void
22373 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
22374 {
22375 rtx (*insn)(rtx, rtx, rtx);
22376
22377 if (count == 1
22378 || (count * ix86_cost->add <= ix86_cost->shift_const
22379 && !optimize_insn_for_size_p ()))
22380 {
22381 insn = mode == DImode ? gen_addsi3 : gen_adddi3;
22382 while (count-- > 0)
22383 emit_insn (insn (operand, operand, operand));
22384 }
22385 else
22386 {
22387 insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22388 emit_insn (insn (operand, operand, GEN_INT (count)));
22389 }
22390 }
22391
22392 void
22393 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
22394 {
22395 rtx (*gen_ashl3)(rtx, rtx, rtx);
22396 rtx (*gen_shld)(rtx, rtx, rtx);
22397 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22398
22399 rtx low[2], high[2];
22400 int count;
22401
22402 if (CONST_INT_P (operands[2]))
22403 {
22404 split_double_mode (mode, operands, 2, low, high);
22405 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22406
22407 if (count >= half_width)
22408 {
22409 emit_move_insn (high[0], low[1]);
22410 emit_move_insn (low[0], const0_rtx);
22411
22412 if (count > half_width)
22413 ix86_expand_ashl_const (high[0], count - half_width, mode);
22414 }
22415 else
22416 {
22417 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22418
22419 if (!rtx_equal_p (operands[0], operands[1]))
22420 emit_move_insn (operands[0], operands[1]);
22421
22422 emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
22423 ix86_expand_ashl_const (low[0], count, mode);
22424 }
22425 return;
22426 }
22427
22428 split_double_mode (mode, operands, 1, low, high);
22429
22430 gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
22431
22432 if (operands[1] == const1_rtx)
22433 {
22434 /* Assuming we've chosen a QImode capable registers, then 1 << N
22435 can be done with two 32/64-bit shifts, no branches, no cmoves. */
22436 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
22437 {
22438 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
22439
22440 ix86_expand_clear (low[0]);
22441 ix86_expand_clear (high[0]);
22442 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
22443
22444 d = gen_lowpart (QImode, low[0]);
22445 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22446 s = gen_rtx_EQ (QImode, flags, const0_rtx);
22447 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22448
22449 d = gen_lowpart (QImode, high[0]);
22450 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
22451 s = gen_rtx_NE (QImode, flags, const0_rtx);
22452 emit_insn (gen_rtx_SET (VOIDmode, d, s));
22453 }
22454
22455 /* Otherwise, we can get the same results by manually performing
22456 a bit extract operation on bit 5/6, and then performing the two
22457 shifts. The two methods of getting 0/1 into low/high are exactly
22458 the same size. Avoiding the shift in the bit extract case helps
22459 pentium4 a bit; no one else seems to care much either way. */
22460 else
22461 {
22462 enum machine_mode half_mode;
22463 rtx (*gen_lshr3)(rtx, rtx, rtx);
22464 rtx (*gen_and3)(rtx, rtx, rtx);
22465 rtx (*gen_xor3)(rtx, rtx, rtx);
22466 HOST_WIDE_INT bits;
22467 rtx x;
22468
22469 if (mode == DImode)
22470 {
22471 half_mode = SImode;
22472 gen_lshr3 = gen_lshrsi3;
22473 gen_and3 = gen_andsi3;
22474 gen_xor3 = gen_xorsi3;
22475 bits = 5;
22476 }
22477 else
22478 {
22479 half_mode = DImode;
22480 gen_lshr3 = gen_lshrdi3;
22481 gen_and3 = gen_anddi3;
22482 gen_xor3 = gen_xordi3;
22483 bits = 6;
22484 }
22485
22486 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
22487 x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
22488 else
22489 x = gen_lowpart (half_mode, operands[2]);
22490 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
22491
22492 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
22493 emit_insn (gen_and3 (high[0], high[0], const1_rtx));
22494 emit_move_insn (low[0], high[0]);
22495 emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
22496 }
22497
22498 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22499 emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
22500 return;
22501 }
22502
22503 if (operands[1] == constm1_rtx)
22504 {
22505 /* For -1 << N, we can avoid the shld instruction, because we
22506 know that we're shifting 0...31/63 ones into a -1. */
22507 emit_move_insn (low[0], constm1_rtx);
22508 if (optimize_insn_for_size_p ())
22509 emit_move_insn (high[0], low[0]);
22510 else
22511 emit_move_insn (high[0], constm1_rtx);
22512 }
22513 else
22514 {
22515 gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
22516
22517 if (!rtx_equal_p (operands[0], operands[1]))
22518 emit_move_insn (operands[0], operands[1]);
22519
22520 split_double_mode (mode, operands, 1, low, high);
22521 emit_insn (gen_shld (high[0], low[0], operands[2]));
22522 }
22523
22524 emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
22525
22526 if (TARGET_CMOVE && scratch)
22527 {
22528 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22529 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22530
22531 ix86_expand_clear (scratch);
22532 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
22533 }
22534 else
22535 {
22536 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22537 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22538
22539 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
22540 }
22541 }
22542
22543 void
22544 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
22545 {
22546 rtx (*gen_ashr3)(rtx, rtx, rtx)
22547 = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
22548 rtx (*gen_shrd)(rtx, rtx, rtx);
22549 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22550
22551 rtx low[2], high[2];
22552 int count;
22553
22554 if (CONST_INT_P (operands[2]))
22555 {
22556 split_double_mode (mode, operands, 2, low, high);
22557 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22558
22559 if (count == GET_MODE_BITSIZE (mode) - 1)
22560 {
22561 emit_move_insn (high[0], high[1]);
22562 emit_insn (gen_ashr3 (high[0], high[0],
22563 GEN_INT (half_width - 1)));
22564 emit_move_insn (low[0], high[0]);
22565
22566 }
22567 else if (count >= half_width)
22568 {
22569 emit_move_insn (low[0], high[1]);
22570 emit_move_insn (high[0], low[0]);
22571 emit_insn (gen_ashr3 (high[0], high[0],
22572 GEN_INT (half_width - 1)));
22573
22574 if (count > half_width)
22575 emit_insn (gen_ashr3 (low[0], low[0],
22576 GEN_INT (count - half_width)));
22577 }
22578 else
22579 {
22580 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22581
22582 if (!rtx_equal_p (operands[0], operands[1]))
22583 emit_move_insn (operands[0], operands[1]);
22584
22585 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22586 emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
22587 }
22588 }
22589 else
22590 {
22591 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22592
22593 if (!rtx_equal_p (operands[0], operands[1]))
22594 emit_move_insn (operands[0], operands[1]);
22595
22596 split_double_mode (mode, operands, 1, low, high);
22597
22598 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22599 emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
22600
22601 if (TARGET_CMOVE && scratch)
22602 {
22603 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22604 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22605
22606 emit_move_insn (scratch, high[0]);
22607 emit_insn (gen_ashr3 (scratch, scratch,
22608 GEN_INT (half_width - 1)));
22609 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22610 scratch));
22611 }
22612 else
22613 {
22614 rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
22615 = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
22616
22617 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
22618 }
22619 }
22620 }
22621
22622 void
22623 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
22624 {
22625 rtx (*gen_lshr3)(rtx, rtx, rtx)
22626 = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
22627 rtx (*gen_shrd)(rtx, rtx, rtx);
22628 int half_width = GET_MODE_BITSIZE (mode) >> 1;
22629
22630 rtx low[2], high[2];
22631 int count;
22632
22633 if (CONST_INT_P (operands[2]))
22634 {
22635 split_double_mode (mode, operands, 2, low, high);
22636 count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
22637
22638 if (count >= half_width)
22639 {
22640 emit_move_insn (low[0], high[1]);
22641 ix86_expand_clear (high[0]);
22642
22643 if (count > half_width)
22644 emit_insn (gen_lshr3 (low[0], low[0],
22645 GEN_INT (count - half_width)));
22646 }
22647 else
22648 {
22649 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22650
22651 if (!rtx_equal_p (operands[0], operands[1]))
22652 emit_move_insn (operands[0], operands[1]);
22653
22654 emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
22655 emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
22656 }
22657 }
22658 else
22659 {
22660 gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
22661
22662 if (!rtx_equal_p (operands[0], operands[1]))
22663 emit_move_insn (operands[0], operands[1]);
22664
22665 split_double_mode (mode, operands, 1, low, high);
22666
22667 emit_insn (gen_shrd (low[0], high[0], operands[2]));
22668 emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
22669
22670 if (TARGET_CMOVE && scratch)
22671 {
22672 rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
22673 = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
22674
22675 ix86_expand_clear (scratch);
22676 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
22677 scratch));
22678 }
22679 else
22680 {
22681 rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
22682 = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
22683
22684 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
22685 }
22686 }
22687 }
22688
22689 /* Predict just emitted jump instruction to be taken with probability PROB. */
22690 static void
22691 predict_jump (int prob)
22692 {
22693 rtx insn = get_last_insn ();
22694 gcc_assert (JUMP_P (insn));
22695 add_int_reg_note (insn, REG_BR_PROB, prob);
22696 }
22697
22698 /* Helper function for the string operations below. Dest VARIABLE whether
22699 it is aligned to VALUE bytes. If true, jump to the label. */
22700 static rtx
22701 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
22702 {
22703 rtx label = gen_label_rtx ();
22704 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
22705 if (GET_MODE (variable) == DImode)
22706 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
22707 else
22708 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
22709 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
22710 1, label);
22711 if (epilogue)
22712 predict_jump (REG_BR_PROB_BASE * 50 / 100);
22713 else
22714 predict_jump (REG_BR_PROB_BASE * 90 / 100);
22715 return label;
22716 }
22717
22718 /* Adjust COUNTER by the VALUE. */
22719 static void
22720 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
22721 {
22722 rtx (*gen_add)(rtx, rtx, rtx)
22723 = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
22724
22725 emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
22726 }
22727
22728 /* Zero extend possibly SImode EXP to Pmode register. */
22729 rtx
22730 ix86_zero_extend_to_Pmode (rtx exp)
22731 {
22732 return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
22733 }
22734
22735 /* Divide COUNTREG by SCALE. */
22736 static rtx
22737 scale_counter (rtx countreg, int scale)
22738 {
22739 rtx sc;
22740
22741 if (scale == 1)
22742 return countreg;
22743 if (CONST_INT_P (countreg))
22744 return GEN_INT (INTVAL (countreg) / scale);
22745 gcc_assert (REG_P (countreg));
22746
22747 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
22748 GEN_INT (exact_log2 (scale)),
22749 NULL, 1, OPTAB_DIRECT);
22750 return sc;
22751 }
22752
22753 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
22754 DImode for constant loop counts. */
22755
22756 static enum machine_mode
22757 counter_mode (rtx count_exp)
22758 {
22759 if (GET_MODE (count_exp) != VOIDmode)
22760 return GET_MODE (count_exp);
22761 if (!CONST_INT_P (count_exp))
22762 return Pmode;
22763 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
22764 return DImode;
22765 return SImode;
22766 }
22767
22768 /* Copy the address to a Pmode register. This is used for x32 to
22769 truncate DImode TLS address to a SImode register. */
22770
22771 static rtx
22772 ix86_copy_addr_to_reg (rtx addr)
22773 {
22774 if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
22775 return copy_addr_to_reg (addr);
22776 else
22777 {
22778 gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
22779 return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
22780 }
22781 }
22782
22783 /* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
22784 to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
22785 specified in bytes. When ISSETMEM is TRUE, output the equivalent loop to set
22786 memory by VALUE (supposed to be in MODE).
22787
22788 The size is rounded down to whole number of chunk size moved at once.
22789 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
22790
22791
22792 static void
22793 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
22794 rtx destptr, rtx srcptr, rtx value,
22795 rtx count, enum machine_mode mode, int unroll,
22796 int expected_size, bool issetmem)
22797 {
22798 rtx out_label, top_label, iter, tmp;
22799 enum machine_mode iter_mode = counter_mode (count);
22800 int piece_size_n = GET_MODE_SIZE (mode) * unroll;
22801 rtx piece_size = GEN_INT (piece_size_n);
22802 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
22803 rtx size;
22804 int i;
22805
22806 top_label = gen_label_rtx ();
22807 out_label = gen_label_rtx ();
22808 iter = gen_reg_rtx (iter_mode);
22809
22810 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
22811 NULL, 1, OPTAB_DIRECT);
22812 /* Those two should combine. */
22813 if (piece_size == const1_rtx)
22814 {
22815 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
22816 true, out_label);
22817 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22818 }
22819 emit_move_insn (iter, const0_rtx);
22820
22821 emit_label (top_label);
22822
22823 tmp = convert_modes (Pmode, iter_mode, iter, true);
22824
22825 /* This assert could be relaxed - in this case we'll need to compute
22826 smallest power of two, containing in PIECE_SIZE_N and pass it to
22827 offset_address. */
22828 gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
22829 destmem = offset_address (destmem, tmp, piece_size_n);
22830 destmem = adjust_address (destmem, mode, 0);
22831
22832 if (!issetmem)
22833 {
22834 srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
22835 srcmem = adjust_address (srcmem, mode, 0);
22836
22837 /* When unrolling for chips that reorder memory reads and writes,
22838 we can save registers by using single temporary.
22839 Also using 4 temporaries is overkill in 32bit mode. */
22840 if (!TARGET_64BIT && 0)
22841 {
22842 for (i = 0; i < unroll; i++)
22843 {
22844 if (i)
22845 {
22846 destmem =
22847 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22848 srcmem =
22849 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22850 }
22851 emit_move_insn (destmem, srcmem);
22852 }
22853 }
22854 else
22855 {
22856 rtx tmpreg[4];
22857 gcc_assert (unroll <= 4);
22858 for (i = 0; i < unroll; i++)
22859 {
22860 tmpreg[i] = gen_reg_rtx (mode);
22861 if (i)
22862 {
22863 srcmem =
22864 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
22865 }
22866 emit_move_insn (tmpreg[i], srcmem);
22867 }
22868 for (i = 0; i < unroll; i++)
22869 {
22870 if (i)
22871 {
22872 destmem =
22873 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22874 }
22875 emit_move_insn (destmem, tmpreg[i]);
22876 }
22877 }
22878 }
22879 else
22880 for (i = 0; i < unroll; i++)
22881 {
22882 if (i)
22883 destmem =
22884 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
22885 emit_move_insn (destmem, value);
22886 }
22887
22888 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
22889 true, OPTAB_LIB_WIDEN);
22890 if (tmp != iter)
22891 emit_move_insn (iter, tmp);
22892
22893 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
22894 true, top_label);
22895 if (expected_size != -1)
22896 {
22897 expected_size /= GET_MODE_SIZE (mode) * unroll;
22898 if (expected_size == 0)
22899 predict_jump (0);
22900 else if (expected_size > REG_BR_PROB_BASE)
22901 predict_jump (REG_BR_PROB_BASE - 1);
22902 else
22903 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
22904 }
22905 else
22906 predict_jump (REG_BR_PROB_BASE * 80 / 100);
22907 iter = ix86_zero_extend_to_Pmode (iter);
22908 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
22909 true, OPTAB_LIB_WIDEN);
22910 if (tmp != destptr)
22911 emit_move_insn (destptr, tmp);
22912 if (!issetmem)
22913 {
22914 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
22915 true, OPTAB_LIB_WIDEN);
22916 if (tmp != srcptr)
22917 emit_move_insn (srcptr, tmp);
22918 }
22919 emit_label (out_label);
22920 }
22921
22922 /* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
22923 When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
22924 When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
22925 For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
22926 ORIG_VALUE is the original value passed to memset to fill the memory with.
22927 Other arguments have same meaning as for previous function. */
22928
22929 static void
22930 expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
22931 rtx destptr, rtx srcptr, rtx value, rtx orig_value,
22932 rtx count,
22933 enum machine_mode mode, bool issetmem)
22934 {
22935 rtx destexp;
22936 rtx srcexp;
22937 rtx countreg;
22938 HOST_WIDE_INT rounded_count;
22939
22940 /* If possible, it is shorter to use rep movs.
22941 TODO: Maybe it is better to move this logic to decide_alg. */
22942 if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
22943 && (!issetmem || orig_value == const0_rtx))
22944 mode = SImode;
22945
22946 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
22947 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
22948
22949 countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
22950 GET_MODE_SIZE (mode)));
22951 if (mode != QImode)
22952 {
22953 destexp = gen_rtx_ASHIFT (Pmode, countreg,
22954 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22955 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
22956 }
22957 else
22958 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
22959 if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
22960 {
22961 rounded_count = (INTVAL (count)
22962 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22963 destmem = shallow_copy_rtx (destmem);
22964 set_mem_size (destmem, rounded_count);
22965 }
22966 else if (MEM_SIZE_KNOWN_P (destmem))
22967 clear_mem_size (destmem);
22968
22969 if (issetmem)
22970 {
22971 value = force_reg (mode, gen_lowpart (mode, value));
22972 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
22973 }
22974 else
22975 {
22976 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
22977 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
22978 if (mode != QImode)
22979 {
22980 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
22981 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
22982 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
22983 }
22984 else
22985 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
22986 if (CONST_INT_P (count))
22987 {
22988 rounded_count = (INTVAL (count)
22989 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
22990 srcmem = shallow_copy_rtx (srcmem);
22991 set_mem_size (srcmem, rounded_count);
22992 }
22993 else
22994 {
22995 if (MEM_SIZE_KNOWN_P (srcmem))
22996 clear_mem_size (srcmem);
22997 }
22998 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
22999 destexp, srcexp));
23000 }
23001 }
23002
23003 /* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23004 DESTMEM.
23005 SRC is passed by pointer to be updated on return.
23006 Return value is updated DST. */
23007 static rtx
23008 emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23009 HOST_WIDE_INT size_to_move)
23010 {
23011 rtx dst = destmem, src = *srcmem, adjust, tempreg;
23012 enum insn_code code;
23013 enum machine_mode move_mode;
23014 int piece_size, i;
23015
23016 /* Find the widest mode in which we could perform moves.
23017 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23018 it until move of such size is supported. */
23019 piece_size = 1 << floor_log2 (size_to_move);
23020 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23021 code = optab_handler (mov_optab, move_mode);
23022 while (code == CODE_FOR_nothing && piece_size > 1)
23023 {
23024 piece_size >>= 1;
23025 move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23026 code = optab_handler (mov_optab, move_mode);
23027 }
23028
23029 /* Find the corresponding vector mode with the same size as MOVE_MODE.
23030 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
23031 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23032 {
23033 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23034 move_mode = mode_for_vector (word_mode, nunits);
23035 code = optab_handler (mov_optab, move_mode);
23036 if (code == CODE_FOR_nothing)
23037 {
23038 move_mode = word_mode;
23039 piece_size = GET_MODE_SIZE (move_mode);
23040 code = optab_handler (mov_optab, move_mode);
23041 }
23042 }
23043 gcc_assert (code != CODE_FOR_nothing);
23044
23045 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23046 src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23047
23048 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23049 gcc_assert (size_to_move % piece_size == 0);
23050 adjust = GEN_INT (piece_size);
23051 for (i = 0; i < size_to_move; i += piece_size)
23052 {
23053 /* We move from memory to memory, so we'll need to do it via
23054 a temporary register. */
23055 tempreg = gen_reg_rtx (move_mode);
23056 emit_insn (GEN_FCN (code) (tempreg, src));
23057 emit_insn (GEN_FCN (code) (dst, tempreg));
23058
23059 emit_move_insn (destptr,
23060 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23061 emit_move_insn (srcptr,
23062 gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23063
23064 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23065 piece_size);
23066 src = adjust_automodify_address_nv (src, move_mode, srcptr,
23067 piece_size);
23068 }
23069
23070 /* Update DST and SRC rtx. */
23071 *srcmem = src;
23072 return dst;
23073 }
23074
23075 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
23076 static void
23077 expand_movmem_epilogue (rtx destmem, rtx srcmem,
23078 rtx destptr, rtx srcptr, rtx count, int max_size)
23079 {
23080 rtx src, dest;
23081 if (CONST_INT_P (count))
23082 {
23083 HOST_WIDE_INT countval = INTVAL (count);
23084 HOST_WIDE_INT epilogue_size = countval % max_size;
23085 int i;
23086
23087 /* For now MAX_SIZE should be a power of 2. This assert could be
23088 relaxed, but it'll require a bit more complicated epilogue
23089 expanding. */
23090 gcc_assert ((max_size & (max_size - 1)) == 0);
23091 for (i = max_size; i >= 1; i >>= 1)
23092 {
23093 if (epilogue_size & i)
23094 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23095 }
23096 return;
23097 }
23098 if (max_size > 8)
23099 {
23100 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23101 count, 1, OPTAB_DIRECT);
23102 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23103 count, QImode, 1, 4, false);
23104 return;
23105 }
23106
23107 /* When there are stringops, we can cheaply increase dest and src pointers.
23108 Otherwise we save code size by maintaining offset (zero is readily
23109 available from preceding rep operation) and using x86 addressing modes.
23110 */
23111 if (TARGET_SINGLE_STRINGOP)
23112 {
23113 if (max_size > 4)
23114 {
23115 rtx label = ix86_expand_aligntest (count, 4, true);
23116 src = change_address (srcmem, SImode, srcptr);
23117 dest = change_address (destmem, SImode, destptr);
23118 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23119 emit_label (label);
23120 LABEL_NUSES (label) = 1;
23121 }
23122 if (max_size > 2)
23123 {
23124 rtx label = ix86_expand_aligntest (count, 2, true);
23125 src = change_address (srcmem, HImode, srcptr);
23126 dest = change_address (destmem, HImode, destptr);
23127 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23128 emit_label (label);
23129 LABEL_NUSES (label) = 1;
23130 }
23131 if (max_size > 1)
23132 {
23133 rtx label = ix86_expand_aligntest (count, 1, true);
23134 src = change_address (srcmem, QImode, srcptr);
23135 dest = change_address (destmem, QImode, destptr);
23136 emit_insn (gen_strmov (destptr, dest, srcptr, src));
23137 emit_label (label);
23138 LABEL_NUSES (label) = 1;
23139 }
23140 }
23141 else
23142 {
23143 rtx offset = force_reg (Pmode, const0_rtx);
23144 rtx tmp;
23145
23146 if (max_size > 4)
23147 {
23148 rtx label = ix86_expand_aligntest (count, 4, true);
23149 src = change_address (srcmem, SImode, srcptr);
23150 dest = change_address (destmem, SImode, destptr);
23151 emit_move_insn (dest, src);
23152 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
23153 true, OPTAB_LIB_WIDEN);
23154 if (tmp != offset)
23155 emit_move_insn (offset, tmp);
23156 emit_label (label);
23157 LABEL_NUSES (label) = 1;
23158 }
23159 if (max_size > 2)
23160 {
23161 rtx label = ix86_expand_aligntest (count, 2, true);
23162 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23163 src = change_address (srcmem, HImode, tmp);
23164 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23165 dest = change_address (destmem, HImode, tmp);
23166 emit_move_insn (dest, src);
23167 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
23168 true, OPTAB_LIB_WIDEN);
23169 if (tmp != offset)
23170 emit_move_insn (offset, tmp);
23171 emit_label (label);
23172 LABEL_NUSES (label) = 1;
23173 }
23174 if (max_size > 1)
23175 {
23176 rtx label = ix86_expand_aligntest (count, 1, true);
23177 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
23178 src = change_address (srcmem, QImode, tmp);
23179 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
23180 dest = change_address (destmem, QImode, tmp);
23181 emit_move_insn (dest, src);
23182 emit_label (label);
23183 LABEL_NUSES (label) = 1;
23184 }
23185 }
23186 }
23187
23188 /* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
23189 with value PROMOTED_VAL.
23190 SRC is passed by pointer to be updated on return.
23191 Return value is updated DST. */
23192 static rtx
23193 emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
23194 HOST_WIDE_INT size_to_move)
23195 {
23196 rtx dst = destmem, adjust;
23197 enum insn_code code;
23198 enum machine_mode move_mode;
23199 int piece_size, i;
23200
23201 /* Find the widest mode in which we could perform moves.
23202 Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23203 it until move of such size is supported. */
23204 move_mode = GET_MODE (promoted_val);
23205 if (move_mode == VOIDmode)
23206 move_mode = QImode;
23207 if (size_to_move < GET_MODE_SIZE (move_mode))
23208 {
23209 move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
23210 promoted_val = gen_lowpart (move_mode, promoted_val);
23211 }
23212 piece_size = GET_MODE_SIZE (move_mode);
23213 code = optab_handler (mov_optab, move_mode);
23214 gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
23215
23216 dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23217
23218 /* Emit moves. We'll need SIZE_TO_MOVE/PIECE_SIZES moves. */
23219 gcc_assert (size_to_move % piece_size == 0);
23220 adjust = GEN_INT (piece_size);
23221 for (i = 0; i < size_to_move; i += piece_size)
23222 {
23223 if (piece_size <= GET_MODE_SIZE (word_mode))
23224 {
23225 emit_insn (gen_strset (destptr, dst, promoted_val));
23226 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23227 piece_size);
23228 continue;
23229 }
23230
23231 emit_insn (GEN_FCN (code) (dst, promoted_val));
23232
23233 emit_move_insn (destptr,
23234 gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23235
23236 dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23237 piece_size);
23238 }
23239
23240 /* Update DST rtx. */
23241 return dst;
23242 }
23243 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23244 static void
23245 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
23246 rtx count, int max_size)
23247 {
23248 count =
23249 expand_simple_binop (counter_mode (count), AND, count,
23250 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
23251 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
23252 gen_lowpart (QImode, value), count, QImode,
23253 1, max_size / 2, true);
23254 }
23255
23256 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
23257 static void
23258 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
23259 rtx count, int max_size)
23260 {
23261 rtx dest;
23262
23263 if (CONST_INT_P (count))
23264 {
23265 HOST_WIDE_INT countval = INTVAL (count);
23266 HOST_WIDE_INT epilogue_size = countval % max_size;
23267 int i;
23268
23269 /* For now MAX_SIZE should be a power of 2. This assert could be
23270 relaxed, but it'll require a bit more complicated epilogue
23271 expanding. */
23272 gcc_assert ((max_size & (max_size - 1)) == 0);
23273 for (i = max_size; i >= 1; i >>= 1)
23274 {
23275 if (epilogue_size & i)
23276 {
23277 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23278 destmem = emit_memset (destmem, destptr, vec_value, i);
23279 else
23280 destmem = emit_memset (destmem, destptr, value, i);
23281 }
23282 }
23283 return;
23284 }
23285 if (max_size > 32)
23286 {
23287 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
23288 return;
23289 }
23290 if (max_size > 16)
23291 {
23292 rtx label = ix86_expand_aligntest (count, 16, true);
23293 if (TARGET_64BIT)
23294 {
23295 dest = change_address (destmem, DImode, destptr);
23296 emit_insn (gen_strset (destptr, dest, value));
23297 dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
23298 emit_insn (gen_strset (destptr, dest, value));
23299 }
23300 else
23301 {
23302 dest = change_address (destmem, SImode, destptr);
23303 emit_insn (gen_strset (destptr, dest, value));
23304 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23305 emit_insn (gen_strset (destptr, dest, value));
23306 dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
23307 emit_insn (gen_strset (destptr, dest, value));
23308 dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
23309 emit_insn (gen_strset (destptr, dest, value));
23310 }
23311 emit_label (label);
23312 LABEL_NUSES (label) = 1;
23313 }
23314 if (max_size > 8)
23315 {
23316 rtx label = ix86_expand_aligntest (count, 8, true);
23317 if (TARGET_64BIT)
23318 {
23319 dest = change_address (destmem, DImode, destptr);
23320 emit_insn (gen_strset (destptr, dest, value));
23321 }
23322 else
23323 {
23324 dest = change_address (destmem, SImode, destptr);
23325 emit_insn (gen_strset (destptr, dest, value));
23326 dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
23327 emit_insn (gen_strset (destptr, dest, value));
23328 }
23329 emit_label (label);
23330 LABEL_NUSES (label) = 1;
23331 }
23332 if (max_size > 4)
23333 {
23334 rtx label = ix86_expand_aligntest (count, 4, true);
23335 dest = change_address (destmem, SImode, destptr);
23336 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
23337 emit_label (label);
23338 LABEL_NUSES (label) = 1;
23339 }
23340 if (max_size > 2)
23341 {
23342 rtx label = ix86_expand_aligntest (count, 2, true);
23343 dest = change_address (destmem, HImode, destptr);
23344 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
23345 emit_label (label);
23346 LABEL_NUSES (label) = 1;
23347 }
23348 if (max_size > 1)
23349 {
23350 rtx label = ix86_expand_aligntest (count, 1, true);
23351 dest = change_address (destmem, QImode, destptr);
23352 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
23353 emit_label (label);
23354 LABEL_NUSES (label) = 1;
23355 }
23356 }
23357
23358 /* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
23359 DESTMEM to align it to DESIRED_ALIGNMENT. Original alignment is ALIGN.
23360 Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
23361 ignored.
23362 Return value is updated DESTMEM. */
23363 static rtx
23364 expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
23365 rtx destptr, rtx srcptr, rtx value,
23366 rtx vec_value, rtx count, int align,
23367 int desired_alignment, bool issetmem)
23368 {
23369 int i;
23370 for (i = 1; i < desired_alignment; i <<= 1)
23371 {
23372 if (align <= i)
23373 {
23374 rtx label = ix86_expand_aligntest (destptr, i, false);
23375 if (issetmem)
23376 {
23377 if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
23378 destmem = emit_memset (destmem, destptr, vec_value, i);
23379 else
23380 destmem = emit_memset (destmem, destptr, value, i);
23381 }
23382 else
23383 destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23384 ix86_adjust_counter (count, i);
23385 emit_label (label);
23386 LABEL_NUSES (label) = 1;
23387 set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
23388 }
23389 }
23390 return destmem;
23391 }
23392
23393 /* Test if COUNT&SIZE is nonzero and if so, expand movme
23394 or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
23395 and jump to DONE_LABEL. */
23396 static void
23397 expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
23398 rtx destptr, rtx srcptr,
23399 rtx value, rtx vec_value,
23400 rtx count, int size,
23401 rtx done_label, bool issetmem)
23402 {
23403 rtx label = ix86_expand_aligntest (count, size, false);
23404 enum machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
23405 rtx modesize;
23406 int n;
23407
23408 /* If we do not have vector value to copy, we must reduce size. */
23409 if (issetmem)
23410 {
23411 if (!vec_value)
23412 {
23413 if (GET_MODE (value) == VOIDmode && size > 8)
23414 mode = Pmode;
23415 else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
23416 mode = GET_MODE (value);
23417 }
23418 else
23419 mode = GET_MODE (vec_value), value = vec_value;
23420 }
23421 else
23422 {
23423 /* Choose appropriate vector mode. */
23424 if (size >= 32)
23425 mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
23426 else if (size >= 16)
23427 mode = TARGET_SSE ? V16QImode : DImode;
23428 srcmem = change_address (srcmem, mode, srcptr);
23429 }
23430 destmem = change_address (destmem, mode, destptr);
23431 modesize = GEN_INT (GET_MODE_SIZE (mode));
23432 gcc_assert (GET_MODE_SIZE (mode) <= size);
23433 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23434 {
23435 if (issetmem)
23436 emit_move_insn (destmem, gen_lowpart (mode, value));
23437 else
23438 {
23439 emit_move_insn (destmem, srcmem);
23440 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23441 }
23442 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23443 }
23444
23445 destmem = offset_address (destmem, count, 1);
23446 destmem = offset_address (destmem, GEN_INT (-2 * size),
23447 GET_MODE_SIZE (mode));
23448 if (!issetmem)
23449 {
23450 srcmem = offset_address (srcmem, count, 1);
23451 srcmem = offset_address (srcmem, GEN_INT (-2 * size),
23452 GET_MODE_SIZE (mode));
23453 }
23454 for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
23455 {
23456 if (issetmem)
23457 emit_move_insn (destmem, gen_lowpart (mode, value));
23458 else
23459 {
23460 emit_move_insn (destmem, srcmem);
23461 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23462 }
23463 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23464 }
23465 emit_jump_insn (gen_jump (done_label));
23466 emit_barrier ();
23467
23468 emit_label (label);
23469 LABEL_NUSES (label) = 1;
23470 }
23471
23472 /* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
23473 and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
23474 bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
23475 proceed with an loop copying SIZE bytes at once. Do moves in MODE.
23476 DONE_LABEL is a label after the whole copying sequence. The label is created
23477 on demand if *DONE_LABEL is NULL.
23478 MIN_SIZE is minimal size of block copied. This value gets adjusted for new
23479 bounds after the initial copies.
23480
23481 DESTMEM/SRCMEM are memory expressions pointing to the copies block,
23482 DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
23483 we will dispatch to a library call for large blocks.
23484
23485 In pseudocode we do:
23486
23487 if (COUNT < SIZE)
23488 {
23489 Assume that SIZE is 4. Bigger sizes are handled analogously
23490 if (COUNT & 4)
23491 {
23492 copy 4 bytes from SRCPTR to DESTPTR
23493 copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
23494 goto done_label
23495 }
23496 if (!COUNT)
23497 goto done_label;
23498 copy 1 byte from SRCPTR to DESTPTR
23499 if (COUNT & 2)
23500 {
23501 copy 2 bytes from SRCPTR to DESTPTR
23502 copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
23503 }
23504 }
23505 else
23506 {
23507 copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
23508 copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
23509
23510 OLD_DESPTR = DESTPTR;
23511 Align DESTPTR up to DESIRED_ALIGN
23512 SRCPTR += DESTPTR - OLD_DESTPTR
23513 COUNT -= DEST_PTR - OLD_DESTPTR
23514 if (DYNAMIC_CHECK)
23515 Round COUNT down to multiple of SIZE
23516 << optional caller supplied zero size guard is here >>
23517 << optional caller suppplied dynamic check is here >>
23518 << caller supplied main copy loop is here >>
23519 }
23520 done_label:
23521 */
23522 static void
23523 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
23524 rtx *destptr, rtx *srcptr,
23525 enum machine_mode mode,
23526 rtx value, rtx vec_value,
23527 rtx *count,
23528 rtx *done_label,
23529 int size,
23530 int desired_align,
23531 int align,
23532 unsigned HOST_WIDE_INT *min_size,
23533 bool dynamic_check,
23534 bool issetmem)
23535 {
23536 rtx loop_label = NULL, label;
23537 int n;
23538 rtx modesize;
23539 int prolog_size = 0;
23540 rtx mode_value;
23541
23542 /* Chose proper value to copy. */
23543 if (issetmem && VECTOR_MODE_P (mode))
23544 mode_value = vec_value;
23545 else
23546 mode_value = value;
23547 gcc_assert (GET_MODE_SIZE (mode) <= size);
23548
23549 /* See if block is big or small, handle small blocks. */
23550 if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
23551 {
23552 int size2 = size;
23553 loop_label = gen_label_rtx ();
23554
23555 if (!*done_label)
23556 *done_label = gen_label_rtx ();
23557
23558 emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
23559 1, loop_label);
23560 size2 >>= 1;
23561
23562 /* Handle sizes > 3. */
23563 for (;size2 > 2; size2 >>= 1)
23564 expand_small_movmem_or_setmem (destmem, srcmem,
23565 *destptr, *srcptr,
23566 value, vec_value,
23567 *count,
23568 size2, *done_label, issetmem);
23569 /* Nothing to copy? Jump to DONE_LABEL if so */
23570 emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
23571 1, *done_label);
23572
23573 /* Do a byte copy. */
23574 destmem = change_address (destmem, QImode, *destptr);
23575 if (issetmem)
23576 emit_move_insn (destmem, gen_lowpart (QImode, value));
23577 else
23578 {
23579 srcmem = change_address (srcmem, QImode, *srcptr);
23580 emit_move_insn (destmem, srcmem);
23581 }
23582
23583 /* Handle sizes 2 and 3. */
23584 label = ix86_expand_aligntest (*count, 2, false);
23585 destmem = change_address (destmem, HImode, *destptr);
23586 destmem = offset_address (destmem, *count, 1);
23587 destmem = offset_address (destmem, GEN_INT (-2), 2);
23588 if (issetmem)
23589 emit_move_insn (destmem, gen_lowpart (HImode, value));
23590 else
23591 {
23592 srcmem = change_address (srcmem, HImode, *srcptr);
23593 srcmem = offset_address (srcmem, *count, 1);
23594 srcmem = offset_address (srcmem, GEN_INT (-2), 2);
23595 emit_move_insn (destmem, srcmem);
23596 }
23597
23598 emit_label (label);
23599 LABEL_NUSES (label) = 1;
23600 emit_jump_insn (gen_jump (*done_label));
23601 emit_barrier ();
23602 }
23603 else
23604 gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
23605 || UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
23606
23607 /* Start memcpy for COUNT >= SIZE. */
23608 if (loop_label)
23609 {
23610 emit_label (loop_label);
23611 LABEL_NUSES (loop_label) = 1;
23612 }
23613
23614 /* Copy first desired_align bytes. */
23615 if (!issetmem)
23616 srcmem = change_address (srcmem, mode, *srcptr);
23617 destmem = change_address (destmem, mode, *destptr);
23618 modesize = GEN_INT (GET_MODE_SIZE (mode));
23619 for (n = 0; prolog_size < desired_align - align; n++)
23620 {
23621 if (issetmem)
23622 emit_move_insn (destmem, mode_value);
23623 else
23624 {
23625 emit_move_insn (destmem, srcmem);
23626 srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
23627 }
23628 destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
23629 prolog_size += GET_MODE_SIZE (mode);
23630 }
23631
23632
23633 /* Copy last SIZE bytes. */
23634 destmem = offset_address (destmem, *count, 1);
23635 destmem = offset_address (destmem,
23636 GEN_INT (-size - prolog_size),
23637 1);
23638 if (issetmem)
23639 emit_move_insn (destmem, mode_value);
23640 else
23641 {
23642 srcmem = offset_address (srcmem, *count, 1);
23643 srcmem = offset_address (srcmem,
23644 GEN_INT (-size - prolog_size),
23645 1);
23646 emit_move_insn (destmem, srcmem);
23647 }
23648 for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
23649 {
23650 destmem = offset_address (destmem, modesize, 1);
23651 if (issetmem)
23652 emit_move_insn (destmem, mode_value);
23653 else
23654 {
23655 srcmem = offset_address (srcmem, modesize, 1);
23656 emit_move_insn (destmem, srcmem);
23657 }
23658 }
23659
23660 /* Align destination. */
23661 if (desired_align > 1 && desired_align > align)
23662 {
23663 rtx saveddest = *destptr;
23664
23665 gcc_assert (desired_align <= size);
23666 /* Align destptr up, place it to new register. */
23667 *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
23668 GEN_INT (prolog_size),
23669 NULL_RTX, 1, OPTAB_DIRECT);
23670 *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
23671 GEN_INT (-desired_align),
23672 *destptr, 1, OPTAB_DIRECT);
23673 /* See how many bytes we skipped. */
23674 saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
23675 *destptr,
23676 saveddest, 1, OPTAB_DIRECT);
23677 /* Adjust srcptr and count. */
23678 if (!issetmem)
23679 *srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr, saveddest,
23680 *srcptr, 1, OPTAB_DIRECT);
23681 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23682 saveddest, *count, 1, OPTAB_DIRECT);
23683 /* We copied at most size + prolog_size. */
23684 if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
23685 *min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
23686 else
23687 *min_size = 0;
23688
23689 /* Our loops always round down the bock size, but for dispatch to library
23690 we need precise value. */
23691 if (dynamic_check)
23692 *count = expand_simple_binop (GET_MODE (*count), AND, *count,
23693 GEN_INT (-size), *count, 1, OPTAB_DIRECT);
23694 }
23695 else
23696 {
23697 gcc_assert (prolog_size == 0);
23698 /* Decrease count, so we won't end up copying last word twice. */
23699 if (!CONST_INT_P (*count))
23700 *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
23701 constm1_rtx, *count, 1, OPTAB_DIRECT);
23702 else
23703 *count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
23704 if (*min_size)
23705 *min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
23706 }
23707 }
23708
23709
23710 /* This function is like the previous one, except here we know how many bytes
23711 need to be copied. That allows us to update alignment not only of DST, which
23712 is returned, but also of SRC, which is passed as a pointer for that
23713 reason. */
23714 static rtx
23715 expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
23716 rtx srcreg, rtx value, rtx vec_value,
23717 int desired_align, int align_bytes,
23718 bool issetmem)
23719 {
23720 rtx src = NULL;
23721 rtx orig_dst = dst;
23722 rtx orig_src = NULL;
23723 int piece_size = 1;
23724 int copied_bytes = 0;
23725
23726 if (!issetmem)
23727 {
23728 gcc_assert (srcp != NULL);
23729 src = *srcp;
23730 orig_src = src;
23731 }
23732
23733 for (piece_size = 1;
23734 piece_size <= desired_align && copied_bytes < align_bytes;
23735 piece_size <<= 1)
23736 {
23737 if (align_bytes & piece_size)
23738 {
23739 if (issetmem)
23740 {
23741 if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
23742 dst = emit_memset (dst, destreg, vec_value, piece_size);
23743 else
23744 dst = emit_memset (dst, destreg, value, piece_size);
23745 }
23746 else
23747 dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
23748 copied_bytes += piece_size;
23749 }
23750 }
23751 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
23752 set_mem_align (dst, desired_align * BITS_PER_UNIT);
23753 if (MEM_SIZE_KNOWN_P (orig_dst))
23754 set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
23755
23756 if (!issetmem)
23757 {
23758 int src_align_bytes = get_mem_align_offset (src, desired_align
23759 * BITS_PER_UNIT);
23760 if (src_align_bytes >= 0)
23761 src_align_bytes = desired_align - src_align_bytes;
23762 if (src_align_bytes >= 0)
23763 {
23764 unsigned int src_align;
23765 for (src_align = desired_align; src_align >= 2; src_align >>= 1)
23766 {
23767 if ((src_align_bytes & (src_align - 1))
23768 == (align_bytes & (src_align - 1)))
23769 break;
23770 }
23771 if (src_align > (unsigned int) desired_align)
23772 src_align = desired_align;
23773 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
23774 set_mem_align (src, src_align * BITS_PER_UNIT);
23775 }
23776 if (MEM_SIZE_KNOWN_P (orig_src))
23777 set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
23778 *srcp = src;
23779 }
23780
23781 return dst;
23782 }
23783
23784 /* Return true if ALG can be used in current context.
23785 Assume we expand memset if MEMSET is true. */
23786 static bool
23787 alg_usable_p (enum stringop_alg alg, bool memset)
23788 {
23789 if (alg == no_stringop)
23790 return false;
23791 if (alg == vector_loop)
23792 return TARGET_SSE || TARGET_AVX;
23793 /* Algorithms using the rep prefix want at least edi and ecx;
23794 additionally, memset wants eax and memcpy wants esi. Don't
23795 consider such algorithms if the user has appropriated those
23796 registers for their own purposes. */
23797 if (alg == rep_prefix_1_byte
23798 || alg == rep_prefix_4_byte
23799 || alg == rep_prefix_8_byte)
23800 return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
23801 || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
23802 return true;
23803 }
23804
23805 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
23806 static enum stringop_alg
23807 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
23808 unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
23809 bool memset, bool zero_memset, int *dynamic_check, bool *noalign)
23810 {
23811 const struct stringop_algs * algs;
23812 bool optimize_for_speed;
23813 int max = -1;
23814 const struct processor_costs *cost;
23815 int i;
23816 bool any_alg_usable_p = false;
23817
23818 *noalign = false;
23819 *dynamic_check = -1;
23820
23821 /* Even if the string operation call is cold, we still might spend a lot
23822 of time processing large blocks. */
23823 if (optimize_function_for_size_p (cfun)
23824 || (optimize_insn_for_size_p ()
23825 && (max_size < 256
23826 || (expected_size != -1 && expected_size < 256))))
23827 optimize_for_speed = false;
23828 else
23829 optimize_for_speed = true;
23830
23831 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
23832 if (memset)
23833 algs = &cost->memset[TARGET_64BIT != 0];
23834 else
23835 algs = &cost->memcpy[TARGET_64BIT != 0];
23836
23837 /* See maximal size for user defined algorithm. */
23838 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23839 {
23840 enum stringop_alg candidate = algs->size[i].alg;
23841 bool usable = alg_usable_p (candidate, memset);
23842 any_alg_usable_p |= usable;
23843
23844 if (candidate != libcall && candidate && usable)
23845 max = algs->size[i].max;
23846 }
23847
23848 /* If expected size is not known but max size is small enough
23849 so inline version is a win, set expected size into
23850 the range. */
23851 if (max > 1 && (unsigned HOST_WIDE_INT) max >= max_size
23852 && expected_size == -1)
23853 expected_size = min_size / 2 + max_size / 2;
23854
23855 /* If user specified the algorithm, honnor it if possible. */
23856 if (ix86_stringop_alg != no_stringop
23857 && alg_usable_p (ix86_stringop_alg, memset))
23858 return ix86_stringop_alg;
23859 /* rep; movq or rep; movl is the smallest variant. */
23860 else if (!optimize_for_speed)
23861 {
23862 *noalign = true;
23863 if (!count || (count & 3) || (memset && !zero_memset))
23864 return alg_usable_p (rep_prefix_1_byte, memset)
23865 ? rep_prefix_1_byte : loop_1_byte;
23866 else
23867 return alg_usable_p (rep_prefix_4_byte, memset)
23868 ? rep_prefix_4_byte : loop;
23869 }
23870 /* Very tiny blocks are best handled via the loop, REP is expensive to
23871 setup. */
23872 else if (expected_size != -1 && expected_size < 4)
23873 return loop_1_byte;
23874 else if (expected_size != -1)
23875 {
23876 enum stringop_alg alg = libcall;
23877 bool alg_noalign = false;
23878 for (i = 0; i < MAX_STRINGOP_ALGS; i++)
23879 {
23880 /* We get here if the algorithms that were not libcall-based
23881 were rep-prefix based and we are unable to use rep prefixes
23882 based on global register usage. Break out of the loop and
23883 use the heuristic below. */
23884 if (algs->size[i].max == 0)
23885 break;
23886 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
23887 {
23888 enum stringop_alg candidate = algs->size[i].alg;
23889
23890 if (candidate != libcall && alg_usable_p (candidate, memset))
23891 {
23892 alg = candidate;
23893 alg_noalign = algs->size[i].noalign;
23894 }
23895 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
23896 last non-libcall inline algorithm. */
23897 if (TARGET_INLINE_ALL_STRINGOPS)
23898 {
23899 /* When the current size is best to be copied by a libcall,
23900 but we are still forced to inline, run the heuristic below
23901 that will pick code for medium sized blocks. */
23902 if (alg != libcall)
23903 {
23904 *noalign = alg_noalign;
23905 return alg;
23906 }
23907 break;
23908 }
23909 else if (alg_usable_p (candidate, memset))
23910 {
23911 *noalign = algs->size[i].noalign;
23912 return candidate;
23913 }
23914 }
23915 }
23916 }
23917 /* When asked to inline the call anyway, try to pick meaningful choice.
23918 We look for maximal size of block that is faster to copy by hand and
23919 take blocks of at most of that size guessing that average size will
23920 be roughly half of the block.
23921
23922 If this turns out to be bad, we might simply specify the preferred
23923 choice in ix86_costs. */
23924 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23925 && (algs->unknown_size == libcall
23926 || !alg_usable_p (algs->unknown_size, memset)))
23927 {
23928 enum stringop_alg alg;
23929
23930 /* If there aren't any usable algorithms, then recursing on
23931 smaller sizes isn't going to find anything. Just return the
23932 simple byte-at-a-time copy loop. */
23933 if (!any_alg_usable_p)
23934 {
23935 /* Pick something reasonable. */
23936 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23937 *dynamic_check = 128;
23938 return loop_1_byte;
23939 }
23940 if (max == -1)
23941 max = 4096;
23942 alg = decide_alg (count, max / 2, min_size, max_size, memset,
23943 zero_memset, dynamic_check, noalign);
23944 gcc_assert (*dynamic_check == -1);
23945 gcc_assert (alg != libcall);
23946 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
23947 *dynamic_check = max;
23948 return alg;
23949 }
23950 return (alg_usable_p (algs->unknown_size, memset)
23951 ? algs->unknown_size : libcall);
23952 }
23953
23954 /* Decide on alignment. We know that the operand is already aligned to ALIGN
23955 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
23956 static int
23957 decide_alignment (int align,
23958 enum stringop_alg alg,
23959 int expected_size,
23960 enum machine_mode move_mode)
23961 {
23962 int desired_align = 0;
23963
23964 gcc_assert (alg != no_stringop);
23965
23966 if (alg == libcall)
23967 return 0;
23968 if (move_mode == VOIDmode)
23969 return 0;
23970
23971 desired_align = GET_MODE_SIZE (move_mode);
23972 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
23973 copying whole cacheline at once. */
23974 if (TARGET_PENTIUMPRO
23975 && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
23976 desired_align = 8;
23977
23978 if (optimize_size)
23979 desired_align = 1;
23980 if (desired_align < align)
23981 desired_align = align;
23982 if (expected_size != -1 && expected_size < 4)
23983 desired_align = align;
23984
23985 return desired_align;
23986 }
23987
23988
23989 /* Helper function for memcpy. For QImode value 0xXY produce
23990 0xXYXYXYXY of wide specified by MODE. This is essentially
23991 a * 0x10101010, but we can do slightly better than
23992 synth_mult by unwinding the sequence by hand on CPUs with
23993 slow multiply. */
23994 static rtx
23995 promote_duplicated_reg (enum machine_mode mode, rtx val)
23996 {
23997 enum machine_mode valmode = GET_MODE (val);
23998 rtx tmp;
23999 int nops = mode == DImode ? 3 : 2;
24000
24001 gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24002 if (val == const0_rtx)
24003 return copy_to_mode_reg (mode, CONST0_RTX (mode));
24004 if (CONST_INT_P (val))
24005 {
24006 HOST_WIDE_INT v = INTVAL (val) & 255;
24007
24008 v |= v << 8;
24009 v |= v << 16;
24010 if (mode == DImode)
24011 v |= (v << 16) << 16;
24012 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24013 }
24014
24015 if (valmode == VOIDmode)
24016 valmode = QImode;
24017 if (valmode != QImode)
24018 val = gen_lowpart (QImode, val);
24019 if (mode == QImode)
24020 return val;
24021 if (!TARGET_PARTIAL_REG_STALL)
24022 nops--;
24023 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24024 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24025 <= (ix86_cost->shift_const + ix86_cost->add) * nops
24026 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24027 {
24028 rtx reg = convert_modes (mode, QImode, val, true);
24029 tmp = promote_duplicated_reg (mode, const1_rtx);
24030 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24031 OPTAB_DIRECT);
24032 }
24033 else
24034 {
24035 rtx reg = convert_modes (mode, QImode, val, true);
24036
24037 if (!TARGET_PARTIAL_REG_STALL)
24038 if (mode == SImode)
24039 emit_insn (gen_movsi_insv_1 (reg, reg));
24040 else
24041 emit_insn (gen_movdi_insv_1 (reg, reg));
24042 else
24043 {
24044 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24045 NULL, 1, OPTAB_DIRECT);
24046 reg =
24047 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24048 }
24049 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24050 NULL, 1, OPTAB_DIRECT);
24051 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24052 if (mode == SImode)
24053 return reg;
24054 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24055 NULL, 1, OPTAB_DIRECT);
24056 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24057 return reg;
24058 }
24059 }
24060
24061 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24062 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24063 alignment from ALIGN to DESIRED_ALIGN. */
24064 static rtx
24065 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24066 int align)
24067 {
24068 rtx promoted_val;
24069
24070 if (TARGET_64BIT
24071 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24072 promoted_val = promote_duplicated_reg (DImode, val);
24073 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24074 promoted_val = promote_duplicated_reg (SImode, val);
24075 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24076 promoted_val = promote_duplicated_reg (HImode, val);
24077 else
24078 promoted_val = val;
24079
24080 return promoted_val;
24081 }
24082
24083 /* Expand string move (memcpy) ot store (memset) operation. Use i386 string
24084 operations when profitable. The code depends upon architecture, block size
24085 and alignment, but always has one of the following overall structures:
24086
24087 Aligned move sequence:
24088
24089 1) Prologue guard: Conditional that jumps up to epilogues for small
24090 blocks that can be handled by epilogue alone. This is faster
24091 but also needed for correctness, since prologue assume the block
24092 is larger than the desired alignment.
24093
24094 Optional dynamic check for size and libcall for large
24095 blocks is emitted here too, with -minline-stringops-dynamically.
24096
24097 2) Prologue: copy first few bytes in order to get destination
24098 aligned to DESIRED_ALIGN. It is emitted only when ALIGN is less
24099 than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24100 copied. We emit either a jump tree on power of two sized
24101 blocks, or a byte loop.
24102
24103 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24104 with specified algorithm.
24105
24106 4) Epilogue: code copying tail of the block that is too small to be
24107 handled by main body (or up to size guarded by prologue guard).
24108
24109 Misaligned move sequence
24110
24111 1) missaligned move prologue/epilogue containing:
24112 a) Prologue handling small memory blocks and jumping to done_label
24113 (skipped if blocks are known to be large enough)
24114 b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24115 needed by single possibly misaligned move
24116 (skipped if alignment is not needed)
24117 c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24118
24119 2) Zero size guard dispatching to done_label, if needed
24120
24121 3) dispatch to library call, if needed,
24122
24123 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24124 with specified algorithm. */
24125 bool
24126 ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24127 rtx align_exp, rtx expected_align_exp,
24128 rtx expected_size_exp, rtx min_size_exp,
24129 rtx max_size_exp, rtx probable_max_size_exp,
24130 bool issetmem)
24131 {
24132 rtx destreg;
24133 rtx srcreg = NULL;
24134 rtx label = NULL;
24135 rtx tmp;
24136 rtx jump_around_label = NULL;
24137 HOST_WIDE_INT align = 1;
24138 unsigned HOST_WIDE_INT count = 0;
24139 HOST_WIDE_INT expected_size = -1;
24140 int size_needed = 0, epilogue_size_needed;
24141 int desired_align = 0, align_bytes = 0;
24142 enum stringop_alg alg;
24143 rtx promoted_val = NULL;
24144 rtx vec_promoted_val = NULL;
24145 bool force_loopy_epilogue = false;
24146 int dynamic_check;
24147 bool need_zero_guard = false;
24148 bool noalign;
24149 enum machine_mode move_mode = VOIDmode;
24150 int unroll_factor = 1;
24151 /* TODO: Once value ranges are available, fill in proper data. */
24152 unsigned HOST_WIDE_INT min_size = 0;
24153 unsigned HOST_WIDE_INT max_size = -1;
24154 unsigned HOST_WIDE_INT probable_max_size = -1;
24155 bool misaligned_prologue_used = false;
24156
24157 if (CONST_INT_P (align_exp))
24158 align = INTVAL (align_exp);
24159 /* i386 can do misaligned access on reasonably increased cost. */
24160 if (CONST_INT_P (expected_align_exp)
24161 && INTVAL (expected_align_exp) > align)
24162 align = INTVAL (expected_align_exp);
24163 /* ALIGN is the minimum of destination and source alignment, but we care here
24164 just about destination alignment. */
24165 else if (!issetmem
24166 && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
24167 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
24168
24169 if (CONST_INT_P (count_exp))
24170 {
24171 min_size = max_size = probable_max_size = count = expected_size
24172 = INTVAL (count_exp);
24173 /* When COUNT is 0, there is nothing to do. */
24174 if (!count)
24175 return true;
24176 }
24177 else
24178 {
24179 if (min_size_exp)
24180 min_size = INTVAL (min_size_exp);
24181 if (max_size_exp)
24182 max_size = INTVAL (max_size_exp);
24183 if (probable_max_size_exp)
24184 probable_max_size = INTVAL (probable_max_size_exp);
24185 if (CONST_INT_P (expected_size_exp))
24186 expected_size = INTVAL (expected_size_exp);
24187 }
24188
24189 /* Make sure we don't need to care about overflow later on. */
24190 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
24191 return false;
24192
24193 /* Step 0: Decide on preferred algorithm, desired alignment and
24194 size of chunks to be copied by main loop. */
24195 alg = decide_alg (count, expected_size, min_size, probable_max_size,
24196 issetmem,
24197 issetmem && val_exp == const0_rtx,
24198 &dynamic_check, &noalign);
24199 if (alg == libcall)
24200 return false;
24201 gcc_assert (alg != no_stringop);
24202
24203 /* For now vector-version of memset is generated only for memory zeroing, as
24204 creating of promoted vector value is very cheap in this case. */
24205 if (issetmem && alg == vector_loop && val_exp != const0_rtx)
24206 alg = unrolled_loop;
24207
24208 if (!count)
24209 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
24210 destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
24211 if (!issetmem)
24212 srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
24213
24214 unroll_factor = 1;
24215 move_mode = word_mode;
24216 switch (alg)
24217 {
24218 case libcall:
24219 case no_stringop:
24220 case last_alg:
24221 gcc_unreachable ();
24222 case loop_1_byte:
24223 need_zero_guard = true;
24224 move_mode = QImode;
24225 break;
24226 case loop:
24227 need_zero_guard = true;
24228 break;
24229 case unrolled_loop:
24230 need_zero_guard = true;
24231 unroll_factor = (TARGET_64BIT ? 4 : 2);
24232 break;
24233 case vector_loop:
24234 need_zero_guard = true;
24235 unroll_factor = 4;
24236 /* Find the widest supported mode. */
24237 move_mode = word_mode;
24238 while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
24239 != CODE_FOR_nothing)
24240 move_mode = GET_MODE_WIDER_MODE (move_mode);
24241
24242 /* Find the corresponding vector mode with the same size as MOVE_MODE.
24243 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */
24244 if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
24245 {
24246 int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
24247 move_mode = mode_for_vector (word_mode, nunits);
24248 if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
24249 move_mode = word_mode;
24250 }
24251 gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
24252 break;
24253 case rep_prefix_8_byte:
24254 move_mode = DImode;
24255 break;
24256 case rep_prefix_4_byte:
24257 move_mode = SImode;
24258 break;
24259 case rep_prefix_1_byte:
24260 move_mode = QImode;
24261 break;
24262 }
24263 size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
24264 epilogue_size_needed = size_needed;
24265
24266 desired_align = decide_alignment (align, alg, expected_size, move_mode);
24267 if (!TARGET_ALIGN_STRINGOPS || noalign)
24268 align = desired_align;
24269
24270 /* Step 1: Prologue guard. */
24271
24272 /* Alignment code needs count to be in register. */
24273 if (CONST_INT_P (count_exp) && desired_align > align)
24274 {
24275 if (INTVAL (count_exp) > desired_align
24276 && INTVAL (count_exp) > size_needed)
24277 {
24278 align_bytes
24279 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
24280 if (align_bytes <= 0)
24281 align_bytes = 0;
24282 else
24283 align_bytes = desired_align - align_bytes;
24284 }
24285 if (align_bytes == 0)
24286 count_exp = force_reg (counter_mode (count_exp), count_exp);
24287 }
24288 gcc_assert (desired_align >= 1 && align >= 1);
24289
24290 /* Misaligned move sequences handle both prologue and epilogue at once.
24291 Default code generation results in a smaller code for large alignments
24292 and also avoids redundant job when sizes are known precisely. */
24293 misaligned_prologue_used
24294 = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
24295 && MAX (desired_align, epilogue_size_needed) <= 32
24296 && desired_align <= epilogue_size_needed
24297 && ((desired_align > align && !align_bytes)
24298 || (!count && epilogue_size_needed > 1)));
24299
24300 /* Do the cheap promotion to allow better CSE across the
24301 main loop and epilogue (ie one load of the big constant in the
24302 front of all code.
24303 For now the misaligned move sequences do not have fast path
24304 without broadcasting. */
24305 if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
24306 {
24307 if (alg == vector_loop)
24308 {
24309 gcc_assert (val_exp == const0_rtx);
24310 vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
24311 promoted_val = promote_duplicated_reg_to_size (val_exp,
24312 GET_MODE_SIZE (word_mode),
24313 desired_align, align);
24314 }
24315 else
24316 {
24317 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24318 desired_align, align);
24319 }
24320 }
24321 /* Misaligned move sequences handles both prologues and epilogues at once.
24322 Default code generation results in smaller code for large alignments and
24323 also avoids redundant job when sizes are known precisely. */
24324 if (misaligned_prologue_used)
24325 {
24326 /* Misaligned move prologue handled small blocks by itself. */
24327 expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
24328 (dst, src, &destreg, &srcreg,
24329 move_mode, promoted_val, vec_promoted_val,
24330 &count_exp,
24331 &jump_around_label,
24332 desired_align < align
24333 ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
24334 desired_align, align, &min_size, dynamic_check, issetmem);
24335 if (!issetmem)
24336 src = change_address (src, BLKmode, srcreg);
24337 dst = change_address (dst, BLKmode, destreg);
24338 set_mem_align (dst, desired_align * BITS_PER_UNIT);
24339 epilogue_size_needed = 0;
24340 if (need_zero_guard && !min_size)
24341 {
24342 /* It is possible that we copied enough so the main loop will not
24343 execute. */
24344 gcc_assert (size_needed > 1);
24345 if (jump_around_label == NULL_RTX)
24346 jump_around_label = gen_label_rtx ();
24347 emit_cmp_and_jump_insns (count_exp,
24348 GEN_INT (size_needed),
24349 LTU, 0, counter_mode (count_exp), 1, jump_around_label);
24350 if (expected_size == -1
24351 || expected_size < (desired_align - align) / 2 + size_needed)
24352 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24353 else
24354 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24355 }
24356 }
24357 /* Ensure that alignment prologue won't copy past end of block. */
24358 else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
24359 {
24360 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
24361 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
24362 Make sure it is power of 2. */
24363 epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
24364
24365 /* To improve performance of small blocks, we jump around the VAL
24366 promoting mode. This mean that if the promoted VAL is not constant,
24367 we might not use it in the epilogue and have to use byte
24368 loop variant. */
24369 if (issetmem && epilogue_size_needed > 2 && !promoted_val)
24370 force_loopy_epilogue = true;
24371 if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24372 || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24373 {
24374 /* If main algorithm works on QImode, no epilogue is needed.
24375 For small sizes just don't align anything. */
24376 if (size_needed == 1)
24377 desired_align = align;
24378 else
24379 goto epilogue;
24380 }
24381 else if (!count
24382 && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
24383 {
24384 label = gen_label_rtx ();
24385 emit_cmp_and_jump_insns (count_exp,
24386 GEN_INT (epilogue_size_needed),
24387 LTU, 0, counter_mode (count_exp), 1, label);
24388 if (expected_size == -1 || expected_size < epilogue_size_needed)
24389 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24390 else
24391 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24392 }
24393 }
24394
24395 /* Emit code to decide on runtime whether library call or inline should be
24396 used. */
24397 if (dynamic_check != -1)
24398 {
24399 if (!issetmem && CONST_INT_P (count_exp))
24400 {
24401 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
24402 {
24403 emit_block_move_via_libcall (dst, src, count_exp, false);
24404 count_exp = const0_rtx;
24405 goto epilogue;
24406 }
24407 }
24408 else
24409 {
24410 rtx hot_label = gen_label_rtx ();
24411 if (jump_around_label == NULL_RTX)
24412 jump_around_label = gen_label_rtx ();
24413 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
24414 LEU, 0, counter_mode (count_exp),
24415 1, hot_label);
24416 predict_jump (REG_BR_PROB_BASE * 90 / 100);
24417 if (issetmem)
24418 set_storage_via_libcall (dst, count_exp, val_exp, false);
24419 else
24420 emit_block_move_via_libcall (dst, src, count_exp, false);
24421 emit_jump (jump_around_label);
24422 emit_label (hot_label);
24423 }
24424 }
24425
24426 /* Step 2: Alignment prologue. */
24427 /* Do the expensive promotion once we branched off the small blocks. */
24428 if (issetmem && !promoted_val)
24429 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
24430 desired_align, align);
24431
24432 if (desired_align > align && !misaligned_prologue_used)
24433 {
24434 if (align_bytes == 0)
24435 {
24436 /* Except for the first move in prologue, we no longer know
24437 constant offset in aliasing info. It don't seems to worth
24438 the pain to maintain it for the first move, so throw away
24439 the info early. */
24440 dst = change_address (dst, BLKmode, destreg);
24441 if (!issetmem)
24442 src = change_address (src, BLKmode, srcreg);
24443 dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
24444 promoted_val, vec_promoted_val,
24445 count_exp, align, desired_align,
24446 issetmem);
24447 /* At most desired_align - align bytes are copied. */
24448 if (min_size < (unsigned)(desired_align - align))
24449 min_size = 0;
24450 else
24451 min_size -= desired_align - align;
24452 }
24453 else
24454 {
24455 /* If we know how many bytes need to be stored before dst is
24456 sufficiently aligned, maintain aliasing info accurately. */
24457 dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
24458 srcreg,
24459 promoted_val,
24460 vec_promoted_val,
24461 desired_align,
24462 align_bytes,
24463 issetmem);
24464
24465 count_exp = plus_constant (counter_mode (count_exp),
24466 count_exp, -align_bytes);
24467 count -= align_bytes;
24468 min_size -= align_bytes;
24469 max_size -= align_bytes;
24470 }
24471 if (need_zero_guard
24472 && !min_size
24473 && (count < (unsigned HOST_WIDE_INT) size_needed
24474 || (align_bytes == 0
24475 && count < ((unsigned HOST_WIDE_INT) size_needed
24476 + desired_align - align))))
24477 {
24478 /* It is possible that we copied enough so the main loop will not
24479 execute. */
24480 gcc_assert (size_needed > 1);
24481 if (label == NULL_RTX)
24482 label = gen_label_rtx ();
24483 emit_cmp_and_jump_insns (count_exp,
24484 GEN_INT (size_needed),
24485 LTU, 0, counter_mode (count_exp), 1, label);
24486 if (expected_size == -1
24487 || expected_size < (desired_align - align) / 2 + size_needed)
24488 predict_jump (REG_BR_PROB_BASE * 20 / 100);
24489 else
24490 predict_jump (REG_BR_PROB_BASE * 60 / 100);
24491 }
24492 }
24493 if (label && size_needed == 1)
24494 {
24495 emit_label (label);
24496 LABEL_NUSES (label) = 1;
24497 label = NULL;
24498 epilogue_size_needed = 1;
24499 if (issetmem)
24500 promoted_val = val_exp;
24501 }
24502 else if (label == NULL_RTX && !misaligned_prologue_used)
24503 epilogue_size_needed = size_needed;
24504
24505 /* Step 3: Main loop. */
24506
24507 switch (alg)
24508 {
24509 case libcall:
24510 case no_stringop:
24511 case last_alg:
24512 gcc_unreachable ();
24513 case loop_1_byte:
24514 case loop:
24515 case unrolled_loop:
24516 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
24517 count_exp, move_mode, unroll_factor,
24518 expected_size, issetmem);
24519 break;
24520 case vector_loop:
24521 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
24522 vec_promoted_val, count_exp, move_mode,
24523 unroll_factor, expected_size, issetmem);
24524 break;
24525 case rep_prefix_8_byte:
24526 case rep_prefix_4_byte:
24527 case rep_prefix_1_byte:
24528 expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
24529 val_exp, count_exp, move_mode, issetmem);
24530 break;
24531 }
24532 /* Adjust properly the offset of src and dest memory for aliasing. */
24533 if (CONST_INT_P (count_exp))
24534 {
24535 if (!issetmem)
24536 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
24537 (count / size_needed) * size_needed);
24538 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
24539 (count / size_needed) * size_needed);
24540 }
24541 else
24542 {
24543 if (!issetmem)
24544 src = change_address (src, BLKmode, srcreg);
24545 dst = change_address (dst, BLKmode, destreg);
24546 }
24547
24548 /* Step 4: Epilogue to copy the remaining bytes. */
24549 epilogue:
24550 if (label)
24551 {
24552 /* When the main loop is done, COUNT_EXP might hold original count,
24553 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
24554 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
24555 bytes. Compensate if needed. */
24556
24557 if (size_needed < epilogue_size_needed)
24558 {
24559 tmp =
24560 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
24561 GEN_INT (size_needed - 1), count_exp, 1,
24562 OPTAB_DIRECT);
24563 if (tmp != count_exp)
24564 emit_move_insn (count_exp, tmp);
24565 }
24566 emit_label (label);
24567 LABEL_NUSES (label) = 1;
24568 }
24569
24570 if (count_exp != const0_rtx && epilogue_size_needed > 1)
24571 {
24572 if (force_loopy_epilogue)
24573 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
24574 epilogue_size_needed);
24575 else
24576 {
24577 if (issetmem)
24578 expand_setmem_epilogue (dst, destreg, promoted_val,
24579 vec_promoted_val, count_exp,
24580 epilogue_size_needed);
24581 else
24582 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
24583 epilogue_size_needed);
24584 }
24585 }
24586 if (jump_around_label)
24587 emit_label (jump_around_label);
24588 return true;
24589 }
24590
24591
24592 /* Expand the appropriate insns for doing strlen if not just doing
24593 repnz; scasb
24594
24595 out = result, initialized with the start address
24596 align_rtx = alignment of the address.
24597 scratch = scratch register, initialized with the startaddress when
24598 not aligned, otherwise undefined
24599
24600 This is just the body. It needs the initializations mentioned above and
24601 some address computing at the end. These things are done in i386.md. */
24602
24603 static void
24604 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
24605 {
24606 int align;
24607 rtx tmp;
24608 rtx align_2_label = NULL_RTX;
24609 rtx align_3_label = NULL_RTX;
24610 rtx align_4_label = gen_label_rtx ();
24611 rtx end_0_label = gen_label_rtx ();
24612 rtx mem;
24613 rtx tmpreg = gen_reg_rtx (SImode);
24614 rtx scratch = gen_reg_rtx (SImode);
24615 rtx cmp;
24616
24617 align = 0;
24618 if (CONST_INT_P (align_rtx))
24619 align = INTVAL (align_rtx);
24620
24621 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
24622
24623 /* Is there a known alignment and is it less than 4? */
24624 if (align < 4)
24625 {
24626 rtx scratch1 = gen_reg_rtx (Pmode);
24627 emit_move_insn (scratch1, out);
24628 /* Is there a known alignment and is it not 2? */
24629 if (align != 2)
24630 {
24631 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
24632 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
24633
24634 /* Leave just the 3 lower bits. */
24635 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
24636 NULL_RTX, 0, OPTAB_WIDEN);
24637
24638 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24639 Pmode, 1, align_4_label);
24640 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
24641 Pmode, 1, align_2_label);
24642 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
24643 Pmode, 1, align_3_label);
24644 }
24645 else
24646 {
24647 /* Since the alignment is 2, we have to check 2 or 0 bytes;
24648 check if is aligned to 4 - byte. */
24649
24650 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
24651 NULL_RTX, 0, OPTAB_WIDEN);
24652
24653 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
24654 Pmode, 1, align_4_label);
24655 }
24656
24657 mem = change_address (src, QImode, out);
24658
24659 /* Now compare the bytes. */
24660
24661 /* Compare the first n unaligned byte on a byte per byte basis. */
24662 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
24663 QImode, 1, end_0_label);
24664
24665 /* Increment the address. */
24666 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24667
24668 /* Not needed with an alignment of 2 */
24669 if (align != 2)
24670 {
24671 emit_label (align_2_label);
24672
24673 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24674 end_0_label);
24675
24676 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24677
24678 emit_label (align_3_label);
24679 }
24680
24681 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
24682 end_0_label);
24683
24684 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
24685 }
24686
24687 /* Generate loop to check 4 bytes at a time. It is not a good idea to
24688 align this loop. It gives only huge programs, but does not help to
24689 speed up. */
24690 emit_label (align_4_label);
24691
24692 mem = change_address (src, SImode, out);
24693 emit_move_insn (scratch, mem);
24694 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
24695
24696 /* This formula yields a nonzero result iff one of the bytes is zero.
24697 This saves three branches inside loop and many cycles. */
24698
24699 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
24700 emit_insn (gen_one_cmplsi2 (scratch, scratch));
24701 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
24702 emit_insn (gen_andsi3 (tmpreg, tmpreg,
24703 gen_int_mode (0x80808080, SImode)));
24704 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
24705 align_4_label);
24706
24707 if (TARGET_CMOVE)
24708 {
24709 rtx reg = gen_reg_rtx (SImode);
24710 rtx reg2 = gen_reg_rtx (Pmode);
24711 emit_move_insn (reg, tmpreg);
24712 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
24713
24714 /* If zero is not in the first two bytes, move two bytes forward. */
24715 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24716 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24717 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24718 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
24719 gen_rtx_IF_THEN_ELSE (SImode, tmp,
24720 reg,
24721 tmpreg)));
24722 /* Emit lea manually to avoid clobbering of flags. */
24723 emit_insn (gen_rtx_SET (SImode, reg2,
24724 gen_rtx_PLUS (Pmode, out, const2_rtx)));
24725
24726 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24727 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
24728 emit_insn (gen_rtx_SET (VOIDmode, out,
24729 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
24730 reg2,
24731 out)));
24732 }
24733 else
24734 {
24735 rtx end_2_label = gen_label_rtx ();
24736 /* Is zero in the first two bytes? */
24737
24738 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
24739 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24740 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
24741 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
24742 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
24743 pc_rtx);
24744 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
24745 JUMP_LABEL (tmp) = end_2_label;
24746
24747 /* Not in the first two. Move two bytes forward. */
24748 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
24749 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
24750
24751 emit_label (end_2_label);
24752
24753 }
24754
24755 /* Avoid branch in fixing the byte. */
24756 tmpreg = gen_lowpart (QImode, tmpreg);
24757 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
24758 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
24759 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
24760 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
24761
24762 emit_label (end_0_label);
24763 }
24764
24765 /* Expand strlen. */
24766
24767 bool
24768 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
24769 {
24770 rtx addr, scratch1, scratch2, scratch3, scratch4;
24771
24772 /* The generic case of strlen expander is long. Avoid it's
24773 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
24774
24775 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24776 && !TARGET_INLINE_ALL_STRINGOPS
24777 && !optimize_insn_for_size_p ()
24778 && (!CONST_INT_P (align) || INTVAL (align) < 4))
24779 return false;
24780
24781 addr = force_reg (Pmode, XEXP (src, 0));
24782 scratch1 = gen_reg_rtx (Pmode);
24783
24784 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
24785 && !optimize_insn_for_size_p ())
24786 {
24787 /* Well it seems that some optimizer does not combine a call like
24788 foo(strlen(bar), strlen(bar));
24789 when the move and the subtraction is done here. It does calculate
24790 the length just once when these instructions are done inside of
24791 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
24792 often used and I use one fewer register for the lifetime of
24793 output_strlen_unroll() this is better. */
24794
24795 emit_move_insn (out, addr);
24796
24797 ix86_expand_strlensi_unroll_1 (out, src, align);
24798
24799 /* strlensi_unroll_1 returns the address of the zero at the end of
24800 the string, like memchr(), so compute the length by subtracting
24801 the start address. */
24802 emit_insn (ix86_gen_sub3 (out, out, addr));
24803 }
24804 else
24805 {
24806 rtx unspec;
24807
24808 /* Can't use this if the user has appropriated eax, ecx, or edi. */
24809 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
24810 return false;
24811
24812 scratch2 = gen_reg_rtx (Pmode);
24813 scratch3 = gen_reg_rtx (Pmode);
24814 scratch4 = force_reg (Pmode, constm1_rtx);
24815
24816 emit_move_insn (scratch3, addr);
24817 eoschar = force_reg (QImode, eoschar);
24818
24819 src = replace_equiv_address_nv (src, scratch3);
24820
24821 /* If .md starts supporting :P, this can be done in .md. */
24822 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
24823 scratch4), UNSPEC_SCAS);
24824 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
24825 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
24826 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
24827 }
24828 return true;
24829 }
24830
24831 /* For given symbol (function) construct code to compute address of it's PLT
24832 entry in large x86-64 PIC model. */
24833 static rtx
24834 construct_plt_address (rtx symbol)
24835 {
24836 rtx tmp, unspec;
24837
24838 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
24839 gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
24840 gcc_assert (Pmode == DImode);
24841
24842 tmp = gen_reg_rtx (Pmode);
24843 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
24844
24845 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
24846 emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
24847 return tmp;
24848 }
24849
24850 rtx
24851 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
24852 rtx callarg2,
24853 rtx pop, bool sibcall)
24854 {
24855 unsigned int const cregs_size
24856 = ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
24857 rtx vec[3 + cregs_size];
24858 rtx use = NULL, call;
24859 unsigned int vec_len = 0;
24860
24861 if (pop == const0_rtx)
24862 pop = NULL;
24863 gcc_assert (!TARGET_64BIT || !pop);
24864
24865 if (TARGET_MACHO && !TARGET_64BIT)
24866 {
24867 #if TARGET_MACHO
24868 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
24869 fnaddr = machopic_indirect_call_target (fnaddr);
24870 #endif
24871 }
24872 else
24873 {
24874 /* Static functions and indirect calls don't need the pic register. */
24875 if (flag_pic
24876 && (!TARGET_64BIT
24877 || (ix86_cmodel == CM_LARGE_PIC
24878 && DEFAULT_ABI != MS_ABI))
24879 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24880 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
24881 use_reg (&use, pic_offset_table_rtx);
24882 }
24883
24884 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
24885 {
24886 rtx al = gen_rtx_REG (QImode, AX_REG);
24887 emit_move_insn (al, callarg2);
24888 use_reg (&use, al);
24889 }
24890
24891 if (ix86_cmodel == CM_LARGE_PIC
24892 && !TARGET_PECOFF
24893 && MEM_P (fnaddr)
24894 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
24895 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
24896 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
24897 else if (sibcall
24898 ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
24899 : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
24900 {
24901 fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
24902 fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
24903 }
24904
24905 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
24906 if (retval)
24907 call = gen_rtx_SET (VOIDmode, retval, call);
24908 vec[vec_len++] = call;
24909
24910 if (pop)
24911 {
24912 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
24913 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
24914 vec[vec_len++] = pop;
24915 }
24916
24917 if (TARGET_64BIT_MS_ABI
24918 && (!callarg2 || INTVAL (callarg2) != -2))
24919 {
24920 unsigned i;
24921
24922 vec[vec_len++] = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
24923 UNSPEC_MS_TO_SYSV_CALL);
24924
24925 for (i = 0; i < cregs_size; i++)
24926 {
24927 int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
24928 enum machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
24929
24930 vec[vec_len++]
24931 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (mode, regno));
24932 }
24933 }
24934
24935 if (vec_len > 1)
24936 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
24937 call = emit_call_insn (call);
24938 if (use)
24939 CALL_INSN_FUNCTION_USAGE (call) = use;
24940
24941 return call;
24942 }
24943
24944 /* Output the assembly for a call instruction. */
24945
24946 const char *
24947 ix86_output_call_insn (rtx insn, rtx call_op)
24948 {
24949 bool direct_p = constant_call_address_operand (call_op, VOIDmode);
24950 bool seh_nop_p = false;
24951 const char *xasm;
24952
24953 if (SIBLING_CALL_P (insn))
24954 {
24955 if (direct_p)
24956 xasm = "jmp\t%P0";
24957 /* SEH epilogue detection requires the indirect branch case
24958 to include REX.W. */
24959 else if (TARGET_SEH)
24960 xasm = "rex.W jmp %A0";
24961 else
24962 xasm = "jmp\t%A0";
24963
24964 output_asm_insn (xasm, &call_op);
24965 return "";
24966 }
24967
24968 /* SEH unwinding can require an extra nop to be emitted in several
24969 circumstances. Determine if we have one of those. */
24970 if (TARGET_SEH)
24971 {
24972 rtx i;
24973
24974 for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
24975 {
24976 /* If we get to another real insn, we don't need the nop. */
24977 if (INSN_P (i))
24978 break;
24979
24980 /* If we get to the epilogue note, prevent a catch region from
24981 being adjacent to the standard epilogue sequence. If non-
24982 call-exceptions, we'll have done this during epilogue emission. */
24983 if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
24984 && !flag_non_call_exceptions
24985 && !can_throw_internal (insn))
24986 {
24987 seh_nop_p = true;
24988 break;
24989 }
24990 }
24991
24992 /* If we didn't find a real insn following the call, prevent the
24993 unwinder from looking into the next function. */
24994 if (i == NULL)
24995 seh_nop_p = true;
24996 }
24997
24998 if (direct_p)
24999 xasm = "call\t%P0";
25000 else
25001 xasm = "call\t%A0";
25002
25003 output_asm_insn (xasm, &call_op);
25004
25005 if (seh_nop_p)
25006 return "nop";
25007
25008 return "";
25009 }
25010 \f
25011 /* Clear stack slot assignments remembered from previous functions.
25012 This is called from INIT_EXPANDERS once before RTL is emitted for each
25013 function. */
25014
25015 static struct machine_function *
25016 ix86_init_machine_status (void)
25017 {
25018 struct machine_function *f;
25019
25020 f = ggc_alloc_cleared_machine_function ();
25021 f->use_fast_prologue_epilogue_nregs = -1;
25022 f->call_abi = ix86_abi;
25023
25024 return f;
25025 }
25026
25027 /* Return a MEM corresponding to a stack slot with mode MODE.
25028 Allocate a new slot if necessary.
25029
25030 The RTL for a function can have several slots available: N is
25031 which slot to use. */
25032
25033 rtx
25034 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
25035 {
25036 struct stack_local_entry *s;
25037
25038 gcc_assert (n < MAX_386_STACK_LOCALS);
25039
25040 for (s = ix86_stack_locals; s; s = s->next)
25041 if (s->mode == mode && s->n == n)
25042 return validize_mem (copy_rtx (s->rtl));
25043
25044 s = ggc_alloc_stack_local_entry ();
25045 s->n = n;
25046 s->mode = mode;
25047 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25048
25049 s->next = ix86_stack_locals;
25050 ix86_stack_locals = s;
25051 return validize_mem (s->rtl);
25052 }
25053
25054 static void
25055 ix86_instantiate_decls (void)
25056 {
25057 struct stack_local_entry *s;
25058
25059 for (s = ix86_stack_locals; s; s = s->next)
25060 if (s->rtl != NULL_RTX)
25061 instantiate_decl_rtl (s->rtl);
25062 }
25063 \f
25064 /* Check whether x86 address PARTS is a pc-relative address. */
25065
25066 static bool
25067 rip_relative_addr_p (struct ix86_address *parts)
25068 {
25069 rtx base, index, disp;
25070
25071 base = parts->base;
25072 index = parts->index;
25073 disp = parts->disp;
25074
25075 if (disp && !base && !index)
25076 {
25077 if (TARGET_64BIT)
25078 {
25079 rtx symbol = disp;
25080
25081 if (GET_CODE (disp) == CONST)
25082 symbol = XEXP (disp, 0);
25083 if (GET_CODE (symbol) == PLUS
25084 && CONST_INT_P (XEXP (symbol, 1)))
25085 symbol = XEXP (symbol, 0);
25086
25087 if (GET_CODE (symbol) == LABEL_REF
25088 || (GET_CODE (symbol) == SYMBOL_REF
25089 && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25090 || (GET_CODE (symbol) == UNSPEC
25091 && (XINT (symbol, 1) == UNSPEC_GOTPCREL
25092 || XINT (symbol, 1) == UNSPEC_PCREL
25093 || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
25094 return true;
25095 }
25096 }
25097 return false;
25098 }
25099
25100 /* Calculate the length of the memory address in the instruction encoding.
25101 Includes addr32 prefix, does not include the one-byte modrm, opcode,
25102 or other prefixes. We never generate addr32 prefix for LEA insn. */
25103
25104 int
25105 memory_address_length (rtx addr, bool lea)
25106 {
25107 struct ix86_address parts;
25108 rtx base, index, disp;
25109 int len;
25110 int ok;
25111
25112 if (GET_CODE (addr) == PRE_DEC
25113 || GET_CODE (addr) == POST_INC
25114 || GET_CODE (addr) == PRE_MODIFY
25115 || GET_CODE (addr) == POST_MODIFY)
25116 return 0;
25117
25118 ok = ix86_decompose_address (addr, &parts);
25119 gcc_assert (ok);
25120
25121 len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
25122
25123 /* If this is not LEA instruction, add the length of addr32 prefix. */
25124 if (TARGET_64BIT && !lea
25125 && (SImode_address_operand (addr, VOIDmode)
25126 || (parts.base && GET_MODE (parts.base) == SImode)
25127 || (parts.index && GET_MODE (parts.index) == SImode)))
25128 len++;
25129
25130 base = parts.base;
25131 index = parts.index;
25132 disp = parts.disp;
25133
25134 if (base && GET_CODE (base) == SUBREG)
25135 base = SUBREG_REG (base);
25136 if (index && GET_CODE (index) == SUBREG)
25137 index = SUBREG_REG (index);
25138
25139 gcc_assert (base == NULL_RTX || REG_P (base));
25140 gcc_assert (index == NULL_RTX || REG_P (index));
25141
25142 /* Rule of thumb:
25143 - esp as the base always wants an index,
25144 - ebp as the base always wants a displacement,
25145 - r12 as the base always wants an index,
25146 - r13 as the base always wants a displacement. */
25147
25148 /* Register Indirect. */
25149 if (base && !index && !disp)
25150 {
25151 /* esp (for its index) and ebp (for its displacement) need
25152 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
25153 code. */
25154 if (base == arg_pointer_rtx
25155 || base == frame_pointer_rtx
25156 || REGNO (base) == SP_REG
25157 || REGNO (base) == BP_REG
25158 || REGNO (base) == R12_REG
25159 || REGNO (base) == R13_REG)
25160 len++;
25161 }
25162
25163 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
25164 is not disp32, but disp32(%rip), so for disp32
25165 SIB byte is needed, unless print_operand_address
25166 optimizes it into disp32(%rip) or (%rip) is implied
25167 by UNSPEC. */
25168 else if (disp && !base && !index)
25169 {
25170 len += 4;
25171 if (rip_relative_addr_p (&parts))
25172 len++;
25173 }
25174 else
25175 {
25176 /* Find the length of the displacement constant. */
25177 if (disp)
25178 {
25179 if (base && satisfies_constraint_K (disp))
25180 len += 1;
25181 else
25182 len += 4;
25183 }
25184 /* ebp always wants a displacement. Similarly r13. */
25185 else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
25186 len++;
25187
25188 /* An index requires the two-byte modrm form.... */
25189 if (index
25190 /* ...like esp (or r12), which always wants an index. */
25191 || base == arg_pointer_rtx
25192 || base == frame_pointer_rtx
25193 || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
25194 len++;
25195 }
25196
25197 return len;
25198 }
25199
25200 /* Compute default value for "length_immediate" attribute. When SHORTFORM
25201 is set, expect that insn have 8bit immediate alternative. */
25202 int
25203 ix86_attr_length_immediate_default (rtx insn, bool shortform)
25204 {
25205 int len = 0;
25206 int i;
25207 extract_insn_cached (insn);
25208 for (i = recog_data.n_operands - 1; i >= 0; --i)
25209 if (CONSTANT_P (recog_data.operand[i]))
25210 {
25211 enum attr_mode mode = get_attr_mode (insn);
25212
25213 gcc_assert (!len);
25214 if (shortform && CONST_INT_P (recog_data.operand[i]))
25215 {
25216 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
25217 switch (mode)
25218 {
25219 case MODE_QI:
25220 len = 1;
25221 continue;
25222 case MODE_HI:
25223 ival = trunc_int_for_mode (ival, HImode);
25224 break;
25225 case MODE_SI:
25226 ival = trunc_int_for_mode (ival, SImode);
25227 break;
25228 default:
25229 break;
25230 }
25231 if (IN_RANGE (ival, -128, 127))
25232 {
25233 len = 1;
25234 continue;
25235 }
25236 }
25237 switch (mode)
25238 {
25239 case MODE_QI:
25240 len = 1;
25241 break;
25242 case MODE_HI:
25243 len = 2;
25244 break;
25245 case MODE_SI:
25246 len = 4;
25247 break;
25248 /* Immediates for DImode instructions are encoded
25249 as 32bit sign extended values. */
25250 case MODE_DI:
25251 len = 4;
25252 break;
25253 default:
25254 fatal_insn ("unknown insn mode", insn);
25255 }
25256 }
25257 return len;
25258 }
25259
25260 /* Compute default value for "length_address" attribute. */
25261 int
25262 ix86_attr_length_address_default (rtx insn)
25263 {
25264 int i;
25265
25266 if (get_attr_type (insn) == TYPE_LEA)
25267 {
25268 rtx set = PATTERN (insn), addr;
25269
25270 if (GET_CODE (set) == PARALLEL)
25271 set = XVECEXP (set, 0, 0);
25272
25273 gcc_assert (GET_CODE (set) == SET);
25274
25275 addr = SET_SRC (set);
25276
25277 return memory_address_length (addr, true);
25278 }
25279
25280 extract_insn_cached (insn);
25281 for (i = recog_data.n_operands - 1; i >= 0; --i)
25282 if (MEM_P (recog_data.operand[i]))
25283 {
25284 constrain_operands_cached (reload_completed);
25285 if (which_alternative != -1)
25286 {
25287 const char *constraints = recog_data.constraints[i];
25288 int alt = which_alternative;
25289
25290 while (*constraints == '=' || *constraints == '+')
25291 constraints++;
25292 while (alt-- > 0)
25293 while (*constraints++ != ',')
25294 ;
25295 /* Skip ignored operands. */
25296 if (*constraints == 'X')
25297 continue;
25298 }
25299 return memory_address_length (XEXP (recog_data.operand[i], 0), false);
25300 }
25301 return 0;
25302 }
25303
25304 /* Compute default value for "length_vex" attribute. It includes
25305 2 or 3 byte VEX prefix and 1 opcode byte. */
25306
25307 int
25308 ix86_attr_length_vex_default (rtx insn, bool has_0f_opcode, bool has_vex_w)
25309 {
25310 int i;
25311
25312 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
25313 byte VEX prefix. */
25314 if (!has_0f_opcode || has_vex_w)
25315 return 3 + 1;
25316
25317 /* We can always use 2 byte VEX prefix in 32bit. */
25318 if (!TARGET_64BIT)
25319 return 2 + 1;
25320
25321 extract_insn_cached (insn);
25322
25323 for (i = recog_data.n_operands - 1; i >= 0; --i)
25324 if (REG_P (recog_data.operand[i]))
25325 {
25326 /* REX.W bit uses 3 byte VEX prefix. */
25327 if (GET_MODE (recog_data.operand[i]) == DImode
25328 && GENERAL_REG_P (recog_data.operand[i]))
25329 return 3 + 1;
25330 }
25331 else
25332 {
25333 /* REX.X or REX.B bits use 3 byte VEX prefix. */
25334 if (MEM_P (recog_data.operand[i])
25335 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
25336 return 3 + 1;
25337 }
25338
25339 return 2 + 1;
25340 }
25341 \f
25342 /* Return the maximum number of instructions a cpu can issue. */
25343
25344 static int
25345 ix86_issue_rate (void)
25346 {
25347 switch (ix86_tune)
25348 {
25349 case PROCESSOR_PENTIUM:
25350 case PROCESSOR_BONNELL:
25351 case PROCESSOR_SILVERMONT:
25352 case PROCESSOR_INTEL:
25353 case PROCESSOR_K6:
25354 case PROCESSOR_BTVER2:
25355 case PROCESSOR_PENTIUM4:
25356 case PROCESSOR_NOCONA:
25357 return 2;
25358
25359 case PROCESSOR_PENTIUMPRO:
25360 case PROCESSOR_ATHLON:
25361 case PROCESSOR_K8:
25362 case PROCESSOR_AMDFAM10:
25363 case PROCESSOR_GENERIC:
25364 case PROCESSOR_BTVER1:
25365 return 3;
25366
25367 case PROCESSOR_BDVER1:
25368 case PROCESSOR_BDVER2:
25369 case PROCESSOR_BDVER3:
25370 case PROCESSOR_BDVER4:
25371 case PROCESSOR_CORE2:
25372 case PROCESSOR_NEHALEM:
25373 case PROCESSOR_SANDYBRIDGE:
25374 case PROCESSOR_HASWELL:
25375 return 4;
25376
25377 default:
25378 return 1;
25379 }
25380 }
25381
25382 /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
25383 by DEP_INSN and nothing set by DEP_INSN. */
25384
25385 static bool
25386 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
25387 {
25388 rtx set, set2;
25389
25390 /* Simplify the test for uninteresting insns. */
25391 if (insn_type != TYPE_SETCC
25392 && insn_type != TYPE_ICMOV
25393 && insn_type != TYPE_FCMOV
25394 && insn_type != TYPE_IBR)
25395 return false;
25396
25397 if ((set = single_set (dep_insn)) != 0)
25398 {
25399 set = SET_DEST (set);
25400 set2 = NULL_RTX;
25401 }
25402 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
25403 && XVECLEN (PATTERN (dep_insn), 0) == 2
25404 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
25405 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
25406 {
25407 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25408 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
25409 }
25410 else
25411 return false;
25412
25413 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
25414 return false;
25415
25416 /* This test is true if the dependent insn reads the flags but
25417 not any other potentially set register. */
25418 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
25419 return false;
25420
25421 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
25422 return false;
25423
25424 return true;
25425 }
25426
25427 /* Return true iff USE_INSN has a memory address with operands set by
25428 SET_INSN. */
25429
25430 bool
25431 ix86_agi_dependent (rtx set_insn, rtx use_insn)
25432 {
25433 int i;
25434 extract_insn_cached (use_insn);
25435 for (i = recog_data.n_operands - 1; i >= 0; --i)
25436 if (MEM_P (recog_data.operand[i]))
25437 {
25438 rtx addr = XEXP (recog_data.operand[i], 0);
25439 return modified_in_p (addr, set_insn) != 0;
25440 }
25441 return false;
25442 }
25443
25444 /* Helper function for exact_store_load_dependency.
25445 Return true if addr is found in insn. */
25446 static bool
25447 exact_dependency_1 (rtx addr, rtx insn)
25448 {
25449 enum rtx_code code;
25450 const char *format_ptr;
25451 int i, j;
25452
25453 code = GET_CODE (insn);
25454 switch (code)
25455 {
25456 case MEM:
25457 if (rtx_equal_p (addr, insn))
25458 return true;
25459 break;
25460 case REG:
25461 CASE_CONST_ANY:
25462 case SYMBOL_REF:
25463 case CODE_LABEL:
25464 case PC:
25465 case CC0:
25466 case EXPR_LIST:
25467 return false;
25468 default:
25469 break;
25470 }
25471
25472 format_ptr = GET_RTX_FORMAT (code);
25473 for (i = 0; i < GET_RTX_LENGTH (code); i++)
25474 {
25475 switch (*format_ptr++)
25476 {
25477 case 'e':
25478 if (exact_dependency_1 (addr, XEXP (insn, i)))
25479 return true;
25480 break;
25481 case 'E':
25482 for (j = 0; j < XVECLEN (insn, i); j++)
25483 if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
25484 return true;
25485 break;
25486 }
25487 }
25488 return false;
25489 }
25490
25491 /* Return true if there exists exact dependency for store & load, i.e.
25492 the same memory address is used in them. */
25493 static bool
25494 exact_store_load_dependency (rtx store, rtx load)
25495 {
25496 rtx set1, set2;
25497
25498 set1 = single_set (store);
25499 if (!set1)
25500 return false;
25501 if (!MEM_P (SET_DEST (set1)))
25502 return false;
25503 set2 = single_set (load);
25504 if (!set2)
25505 return false;
25506 if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
25507 return true;
25508 return false;
25509 }
25510
25511 static int
25512 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
25513 {
25514 enum attr_type insn_type, dep_insn_type;
25515 enum attr_memory memory;
25516 rtx set, set2;
25517 int dep_insn_code_number;
25518
25519 /* Anti and output dependencies have zero cost on all CPUs. */
25520 if (REG_NOTE_KIND (link) != 0)
25521 return 0;
25522
25523 dep_insn_code_number = recog_memoized (dep_insn);
25524
25525 /* If we can't recognize the insns, we can't really do anything. */
25526 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
25527 return cost;
25528
25529 insn_type = get_attr_type (insn);
25530 dep_insn_type = get_attr_type (dep_insn);
25531
25532 switch (ix86_tune)
25533 {
25534 case PROCESSOR_PENTIUM:
25535 /* Address Generation Interlock adds a cycle of latency. */
25536 if (insn_type == TYPE_LEA)
25537 {
25538 rtx addr = PATTERN (insn);
25539
25540 if (GET_CODE (addr) == PARALLEL)
25541 addr = XVECEXP (addr, 0, 0);
25542
25543 gcc_assert (GET_CODE (addr) == SET);
25544
25545 addr = SET_SRC (addr);
25546 if (modified_in_p (addr, dep_insn))
25547 cost += 1;
25548 }
25549 else if (ix86_agi_dependent (dep_insn, insn))
25550 cost += 1;
25551
25552 /* ??? Compares pair with jump/setcc. */
25553 if (ix86_flags_dependent (insn, dep_insn, insn_type))
25554 cost = 0;
25555
25556 /* Floating point stores require value to be ready one cycle earlier. */
25557 if (insn_type == TYPE_FMOV
25558 && get_attr_memory (insn) == MEMORY_STORE
25559 && !ix86_agi_dependent (dep_insn, insn))
25560 cost += 1;
25561 break;
25562
25563 case PROCESSOR_PENTIUMPRO:
25564 /* INT->FP conversion is expensive. */
25565 if (get_attr_fp_int_src (dep_insn))
25566 cost += 5;
25567
25568 /* There is one cycle extra latency between an FP op and a store. */
25569 if (insn_type == TYPE_FMOV
25570 && (set = single_set (dep_insn)) != NULL_RTX
25571 && (set2 = single_set (insn)) != NULL_RTX
25572 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
25573 && MEM_P (SET_DEST (set2)))
25574 cost += 1;
25575
25576 memory = get_attr_memory (insn);
25577
25578 /* Show ability of reorder buffer to hide latency of load by executing
25579 in parallel with previous instruction in case
25580 previous instruction is not needed to compute the address. */
25581 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25582 && !ix86_agi_dependent (dep_insn, insn))
25583 {
25584 /* Claim moves to take one cycle, as core can issue one load
25585 at time and the next load can start cycle later. */
25586 if (dep_insn_type == TYPE_IMOV
25587 || dep_insn_type == TYPE_FMOV)
25588 cost = 1;
25589 else if (cost > 1)
25590 cost--;
25591 }
25592 break;
25593
25594 case PROCESSOR_K6:
25595 /* The esp dependency is resolved before
25596 the instruction is really finished. */
25597 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25598 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25599 return 1;
25600
25601 /* INT->FP conversion is expensive. */
25602 if (get_attr_fp_int_src (dep_insn))
25603 cost += 5;
25604
25605 memory = get_attr_memory (insn);
25606
25607 /* Show ability of reorder buffer to hide latency of load by executing
25608 in parallel with previous instruction in case
25609 previous instruction is not needed to compute the address. */
25610 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25611 && !ix86_agi_dependent (dep_insn, insn))
25612 {
25613 /* Claim moves to take one cycle, as core can issue one load
25614 at time and the next load can start cycle later. */
25615 if (dep_insn_type == TYPE_IMOV
25616 || dep_insn_type == TYPE_FMOV)
25617 cost = 1;
25618 else if (cost > 2)
25619 cost -= 2;
25620 else
25621 cost = 1;
25622 }
25623 break;
25624
25625 case PROCESSOR_AMDFAM10:
25626 case PROCESSOR_BDVER1:
25627 case PROCESSOR_BDVER2:
25628 case PROCESSOR_BDVER3:
25629 case PROCESSOR_BDVER4:
25630 case PROCESSOR_BTVER1:
25631 case PROCESSOR_BTVER2:
25632 case PROCESSOR_GENERIC:
25633 /* Stack engine allows to execute push&pop instructions in parall. */
25634 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25635 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25636 return 0;
25637 /* FALLTHRU */
25638
25639 case PROCESSOR_ATHLON:
25640 case PROCESSOR_K8:
25641 memory = get_attr_memory (insn);
25642
25643 /* Show ability of reorder buffer to hide latency of load by executing
25644 in parallel with previous instruction in case
25645 previous instruction is not needed to compute the address. */
25646 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25647 && !ix86_agi_dependent (dep_insn, insn))
25648 {
25649 enum attr_unit unit = get_attr_unit (insn);
25650 int loadcost = 3;
25651
25652 /* Because of the difference between the length of integer and
25653 floating unit pipeline preparation stages, the memory operands
25654 for floating point are cheaper.
25655
25656 ??? For Athlon it the difference is most probably 2. */
25657 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
25658 loadcost = 3;
25659 else
25660 loadcost = TARGET_ATHLON ? 2 : 0;
25661
25662 if (cost >= loadcost)
25663 cost -= loadcost;
25664 else
25665 cost = 0;
25666 }
25667 break;
25668
25669 case PROCESSOR_CORE2:
25670 case PROCESSOR_NEHALEM:
25671 case PROCESSOR_SANDYBRIDGE:
25672 case PROCESSOR_HASWELL:
25673 /* Stack engine allows to execute push&pop instructions in parall. */
25674 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
25675 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
25676 return 0;
25677
25678 memory = get_attr_memory (insn);
25679
25680 /* Show ability of reorder buffer to hide latency of load by executing
25681 in parallel with previous instruction in case
25682 previous instruction is not needed to compute the address. */
25683 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25684 && !ix86_agi_dependent (dep_insn, insn))
25685 {
25686 if (cost >= 4)
25687 cost -= 4;
25688 else
25689 cost = 0;
25690 }
25691 break;
25692
25693 case PROCESSOR_SILVERMONT:
25694 case PROCESSOR_INTEL:
25695 if (!reload_completed)
25696 return cost;
25697
25698 /* Increase cost of integer loads. */
25699 memory = get_attr_memory (dep_insn);
25700 if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
25701 {
25702 enum attr_unit unit = get_attr_unit (dep_insn);
25703 if (unit == UNIT_INTEGER && cost == 1)
25704 {
25705 if (memory == MEMORY_LOAD)
25706 cost = 3;
25707 else
25708 {
25709 /* Increase cost of ld/st for short int types only
25710 because of store forwarding issue. */
25711 rtx set = single_set (dep_insn);
25712 if (set && (GET_MODE (SET_DEST (set)) == QImode
25713 || GET_MODE (SET_DEST (set)) == HImode))
25714 {
25715 /* Increase cost of store/load insn if exact
25716 dependence exists and it is load insn. */
25717 enum attr_memory insn_memory = get_attr_memory (insn);
25718 if (insn_memory == MEMORY_LOAD
25719 && exact_store_load_dependency (dep_insn, insn))
25720 cost = 3;
25721 }
25722 }
25723 }
25724 }
25725
25726 default:
25727 break;
25728 }
25729
25730 return cost;
25731 }
25732
25733 /* How many alternative schedules to try. This should be as wide as the
25734 scheduling freedom in the DFA, but no wider. Making this value too
25735 large results extra work for the scheduler. */
25736
25737 static int
25738 ia32_multipass_dfa_lookahead (void)
25739 {
25740 switch (ix86_tune)
25741 {
25742 case PROCESSOR_PENTIUM:
25743 return 2;
25744
25745 case PROCESSOR_PENTIUMPRO:
25746 case PROCESSOR_K6:
25747 return 1;
25748
25749 case PROCESSOR_BDVER1:
25750 case PROCESSOR_BDVER2:
25751 case PROCESSOR_BDVER3:
25752 case PROCESSOR_BDVER4:
25753 /* We use lookahead value 4 for BD both before and after reload
25754 schedules. Plan is to have value 8 included for O3. */
25755 return 4;
25756
25757 case PROCESSOR_CORE2:
25758 case PROCESSOR_NEHALEM:
25759 case PROCESSOR_SANDYBRIDGE:
25760 case PROCESSOR_HASWELL:
25761 case PROCESSOR_BONNELL:
25762 case PROCESSOR_SILVERMONT:
25763 case PROCESSOR_INTEL:
25764 /* Generally, we want haifa-sched:max_issue() to look ahead as far
25765 as many instructions can be executed on a cycle, i.e.,
25766 issue_rate. I wonder why tuning for many CPUs does not do this. */
25767 if (reload_completed)
25768 return ix86_issue_rate ();
25769 /* Don't use lookahead for pre-reload schedule to save compile time. */
25770 return 0;
25771
25772 default:
25773 return 0;
25774 }
25775 }
25776
25777 /* Return true if target platform supports macro-fusion. */
25778
25779 static bool
25780 ix86_macro_fusion_p ()
25781 {
25782 return TARGET_FUSE_CMP_AND_BRANCH;
25783 }
25784
25785 /* Check whether current microarchitecture support macro fusion
25786 for insn pair "CONDGEN + CONDJMP". Refer to
25787 "Intel Architectures Optimization Reference Manual". */
25788
25789 static bool
25790 ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
25791 {
25792 rtx src, dest;
25793 rtx single_set = single_set (condgen);
25794 enum rtx_code ccode;
25795 rtx compare_set = NULL_RTX, test_if, cond;
25796 rtx alu_set = NULL_RTX, addr = NULL_RTX;
25797
25798 if (get_attr_type (condgen) != TYPE_TEST
25799 && get_attr_type (condgen) != TYPE_ICMP
25800 && get_attr_type (condgen) != TYPE_INCDEC
25801 && get_attr_type (condgen) != TYPE_ALU)
25802 return false;
25803
25804 if (single_set == NULL_RTX
25805 && !TARGET_FUSE_ALU_AND_BRANCH)
25806 return false;
25807
25808 if (single_set != NULL_RTX)
25809 compare_set = single_set;
25810 else
25811 {
25812 int i;
25813 rtx pat = PATTERN (condgen);
25814 for (i = 0; i < XVECLEN (pat, 0); i++)
25815 if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
25816 {
25817 rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
25818 if (GET_CODE (set_src) == COMPARE)
25819 compare_set = XVECEXP (pat, 0, i);
25820 else
25821 alu_set = XVECEXP (pat, 0, i);
25822 }
25823 }
25824 if (compare_set == NULL_RTX)
25825 return false;
25826 src = SET_SRC (compare_set);
25827 if (GET_CODE (src) != COMPARE)
25828 return false;
25829
25830 /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
25831 supported. */
25832 if ((MEM_P (XEXP (src, 0))
25833 && CONST_INT_P (XEXP (src, 1)))
25834 || (MEM_P (XEXP (src, 1))
25835 && CONST_INT_P (XEXP (src, 0))))
25836 return false;
25837
25838 /* No fusion for RIP-relative address. */
25839 if (MEM_P (XEXP (src, 0)))
25840 addr = XEXP (XEXP (src, 0), 0);
25841 else if (MEM_P (XEXP (src, 1)))
25842 addr = XEXP (XEXP (src, 1), 0);
25843
25844 if (addr) {
25845 ix86_address parts;
25846 int ok = ix86_decompose_address (addr, &parts);
25847 gcc_assert (ok);
25848
25849 if (rip_relative_addr_p (&parts))
25850 return false;
25851 }
25852
25853 test_if = SET_SRC (pc_set (condjmp));
25854 cond = XEXP (test_if, 0);
25855 ccode = GET_CODE (cond);
25856 /* Check whether conditional jump use Sign or Overflow Flags. */
25857 if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
25858 && (ccode == GE
25859 || ccode == GT
25860 || ccode == LE
25861 || ccode == LT))
25862 return false;
25863
25864 /* Return true for TYPE_TEST and TYPE_ICMP. */
25865 if (get_attr_type (condgen) == TYPE_TEST
25866 || get_attr_type (condgen) == TYPE_ICMP)
25867 return true;
25868
25869 /* The following is the case that macro-fusion for alu + jmp. */
25870 if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
25871 return false;
25872
25873 /* No fusion for alu op with memory destination operand. */
25874 dest = SET_DEST (alu_set);
25875 if (MEM_P (dest))
25876 return false;
25877
25878 /* Macro-fusion for inc/dec + unsigned conditional jump is not
25879 supported. */
25880 if (get_attr_type (condgen) == TYPE_INCDEC
25881 && (ccode == GEU
25882 || ccode == GTU
25883 || ccode == LEU
25884 || ccode == LTU))
25885 return false;
25886
25887 return true;
25888 }
25889
25890 /* Try to reorder ready list to take advantage of Atom pipelined IMUL
25891 execution. It is applied if
25892 (1) IMUL instruction is on the top of list;
25893 (2) There exists the only producer of independent IMUL instruction in
25894 ready list.
25895 Return index of IMUL producer if it was found and -1 otherwise. */
25896 static int
25897 do_reorder_for_imul (rtx *ready, int n_ready)
25898 {
25899 rtx insn, set, insn1, insn2;
25900 sd_iterator_def sd_it;
25901 dep_t dep;
25902 int index = -1;
25903 int i;
25904
25905 if (!TARGET_BONNELL)
25906 return index;
25907
25908 /* Check that IMUL instruction is on the top of ready list. */
25909 insn = ready[n_ready - 1];
25910 set = single_set (insn);
25911 if (!set)
25912 return index;
25913 if (!(GET_CODE (SET_SRC (set)) == MULT
25914 && GET_MODE (SET_SRC (set)) == SImode))
25915 return index;
25916
25917 /* Search for producer of independent IMUL instruction. */
25918 for (i = n_ready - 2; i >= 0; i--)
25919 {
25920 insn = ready[i];
25921 if (!NONDEBUG_INSN_P (insn))
25922 continue;
25923 /* Skip IMUL instruction. */
25924 insn2 = PATTERN (insn);
25925 if (GET_CODE (insn2) == PARALLEL)
25926 insn2 = XVECEXP (insn2, 0, 0);
25927 if (GET_CODE (insn2) == SET
25928 && GET_CODE (SET_SRC (insn2)) == MULT
25929 && GET_MODE (SET_SRC (insn2)) == SImode)
25930 continue;
25931
25932 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
25933 {
25934 rtx con;
25935 con = DEP_CON (dep);
25936 if (!NONDEBUG_INSN_P (con))
25937 continue;
25938 insn1 = PATTERN (con);
25939 if (GET_CODE (insn1) == PARALLEL)
25940 insn1 = XVECEXP (insn1, 0, 0);
25941
25942 if (GET_CODE (insn1) == SET
25943 && GET_CODE (SET_SRC (insn1)) == MULT
25944 && GET_MODE (SET_SRC (insn1)) == SImode)
25945 {
25946 sd_iterator_def sd_it1;
25947 dep_t dep1;
25948 /* Check if there is no other dependee for IMUL. */
25949 index = i;
25950 FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
25951 {
25952 rtx pro;
25953 pro = DEP_PRO (dep1);
25954 if (!NONDEBUG_INSN_P (pro))
25955 continue;
25956 if (pro != insn)
25957 index = -1;
25958 }
25959 if (index >= 0)
25960 break;
25961 }
25962 }
25963 if (index >= 0)
25964 break;
25965 }
25966 return index;
25967 }
25968
25969 /* Try to find the best candidate on the top of ready list if two insns
25970 have the same priority - candidate is best if its dependees were
25971 scheduled earlier. Applied for Silvermont only.
25972 Return true if top 2 insns must be interchanged. */
25973 static bool
25974 swap_top_of_ready_list (rtx *ready, int n_ready)
25975 {
25976 rtx top = ready[n_ready - 1];
25977 rtx next = ready[n_ready - 2];
25978 rtx set;
25979 sd_iterator_def sd_it;
25980 dep_t dep;
25981 int clock1 = -1;
25982 int clock2 = -1;
25983 #define INSN_TICK(INSN) (HID (INSN)->tick)
25984
25985 if (!TARGET_SILVERMONT && !TARGET_INTEL)
25986 return false;
25987
25988 if (!NONDEBUG_INSN_P (top))
25989 return false;
25990 if (!NONJUMP_INSN_P (top))
25991 return false;
25992 if (!NONDEBUG_INSN_P (next))
25993 return false;
25994 if (!NONJUMP_INSN_P (next))
25995 return false;
25996 set = single_set (top);
25997 if (!set)
25998 return false;
25999 set = single_set (next);
26000 if (!set)
26001 return false;
26002
26003 if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26004 {
26005 if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26006 return false;
26007 /* Determine winner more precise. */
26008 FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26009 {
26010 rtx pro;
26011 pro = DEP_PRO (dep);
26012 if (!NONDEBUG_INSN_P (pro))
26013 continue;
26014 if (INSN_TICK (pro) > clock1)
26015 clock1 = INSN_TICK (pro);
26016 }
26017 FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26018 {
26019 rtx pro;
26020 pro = DEP_PRO (dep);
26021 if (!NONDEBUG_INSN_P (pro))
26022 continue;
26023 if (INSN_TICK (pro) > clock2)
26024 clock2 = INSN_TICK (pro);
26025 }
26026
26027 if (clock1 == clock2)
26028 {
26029 /* Determine winner - load must win. */
26030 enum attr_memory memory1, memory2;
26031 memory1 = get_attr_memory (top);
26032 memory2 = get_attr_memory (next);
26033 if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26034 return true;
26035 }
26036 return (bool) (clock2 < clock1);
26037 }
26038 return false;
26039 #undef INSN_TICK
26040 }
26041
26042 /* Perform possible reodering of ready list for Atom/Silvermont only.
26043 Return issue rate. */
26044 static int
26045 ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
26046 int clock_var)
26047 {
26048 int issue_rate = -1;
26049 int n_ready = *pn_ready;
26050 int i;
26051 rtx insn;
26052 int index = -1;
26053
26054 /* Set up issue rate. */
26055 issue_rate = ix86_issue_rate ();
26056
26057 /* Do reodering for BONNELL/SILVERMONT only. */
26058 if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26059 return issue_rate;
26060
26061 /* Nothing to do if ready list contains only 1 instruction. */
26062 if (n_ready <= 1)
26063 return issue_rate;
26064
26065 /* Do reodering for post-reload scheduler only. */
26066 if (!reload_completed)
26067 return issue_rate;
26068
26069 if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26070 {
26071 if (sched_verbose > 1)
26072 fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26073 INSN_UID (ready[index]));
26074
26075 /* Put IMUL producer (ready[index]) at the top of ready list. */
26076 insn = ready[index];
26077 for (i = index; i < n_ready - 1; i++)
26078 ready[i] = ready[i + 1];
26079 ready[n_ready - 1] = insn;
26080 return issue_rate;
26081 }
26082 if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
26083 {
26084 if (sched_verbose > 1)
26085 fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
26086 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
26087 /* Swap 2 top elements of ready list. */
26088 insn = ready[n_ready - 1];
26089 ready[n_ready - 1] = ready[n_ready - 2];
26090 ready[n_ready - 2] = insn;
26091 }
26092 return issue_rate;
26093 }
26094
26095 static bool
26096 ix86_class_likely_spilled_p (reg_class_t);
26097
26098 /* Returns true if lhs of insn is HW function argument register and set up
26099 is_spilled to true if it is likely spilled HW register. */
26100 static bool
26101 insn_is_function_arg (rtx insn, bool* is_spilled)
26102 {
26103 rtx dst;
26104
26105 if (!NONDEBUG_INSN_P (insn))
26106 return false;
26107 /* Call instructions are not movable, ignore it. */
26108 if (CALL_P (insn))
26109 return false;
26110 insn = PATTERN (insn);
26111 if (GET_CODE (insn) == PARALLEL)
26112 insn = XVECEXP (insn, 0, 0);
26113 if (GET_CODE (insn) != SET)
26114 return false;
26115 dst = SET_DEST (insn);
26116 if (REG_P (dst) && HARD_REGISTER_P (dst)
26117 && ix86_function_arg_regno_p (REGNO (dst)))
26118 {
26119 /* Is it likely spilled HW register? */
26120 if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
26121 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
26122 *is_spilled = true;
26123 return true;
26124 }
26125 return false;
26126 }
26127
26128 /* Add output dependencies for chain of function adjacent arguments if only
26129 there is a move to likely spilled HW register. Return first argument
26130 if at least one dependence was added or NULL otherwise. */
26131 static rtx
26132 add_parameter_dependencies (rtx call, rtx head)
26133 {
26134 rtx insn;
26135 rtx last = call;
26136 rtx first_arg = NULL;
26137 bool is_spilled = false;
26138
26139 head = PREV_INSN (head);
26140
26141 /* Find nearest to call argument passing instruction. */
26142 while (true)
26143 {
26144 last = PREV_INSN (last);
26145 if (last == head)
26146 return NULL;
26147 if (!NONDEBUG_INSN_P (last))
26148 continue;
26149 if (insn_is_function_arg (last, &is_spilled))
26150 break;
26151 return NULL;
26152 }
26153
26154 first_arg = last;
26155 while (true)
26156 {
26157 insn = PREV_INSN (last);
26158 if (!INSN_P (insn))
26159 break;
26160 if (insn == head)
26161 break;
26162 if (!NONDEBUG_INSN_P (insn))
26163 {
26164 last = insn;
26165 continue;
26166 }
26167 if (insn_is_function_arg (insn, &is_spilled))
26168 {
26169 /* Add output depdendence between two function arguments if chain
26170 of output arguments contains likely spilled HW registers. */
26171 if (is_spilled)
26172 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26173 first_arg = last = insn;
26174 }
26175 else
26176 break;
26177 }
26178 if (!is_spilled)
26179 return NULL;
26180 return first_arg;
26181 }
26182
26183 /* Add output or anti dependency from insn to first_arg to restrict its code
26184 motion. */
26185 static void
26186 avoid_func_arg_motion (rtx first_arg, rtx insn)
26187 {
26188 rtx set;
26189 rtx tmp;
26190
26191 set = single_set (insn);
26192 if (!set)
26193 return;
26194 tmp = SET_DEST (set);
26195 if (REG_P (tmp))
26196 {
26197 /* Add output dependency to the first function argument. */
26198 add_dependence (first_arg, insn, REG_DEP_OUTPUT);
26199 return;
26200 }
26201 /* Add anti dependency. */
26202 add_dependence (first_arg, insn, REG_DEP_ANTI);
26203 }
26204
26205 /* Avoid cross block motion of function argument through adding dependency
26206 from the first non-jump instruction in bb. */
26207 static void
26208 add_dependee_for_func_arg (rtx arg, basic_block bb)
26209 {
26210 rtx insn = BB_END (bb);
26211
26212 while (insn)
26213 {
26214 if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
26215 {
26216 rtx set = single_set (insn);
26217 if (set)
26218 {
26219 avoid_func_arg_motion (arg, insn);
26220 return;
26221 }
26222 }
26223 if (insn == BB_HEAD (bb))
26224 return;
26225 insn = PREV_INSN (insn);
26226 }
26227 }
26228
26229 /* Hook for pre-reload schedule - avoid motion of function arguments
26230 passed in likely spilled HW registers. */
26231 static void
26232 ix86_dependencies_evaluation_hook (rtx head, rtx tail)
26233 {
26234 rtx insn;
26235 rtx first_arg = NULL;
26236 if (reload_completed)
26237 return;
26238 while (head != tail && DEBUG_INSN_P (head))
26239 head = NEXT_INSN (head);
26240 for (insn = tail; insn != head; insn = PREV_INSN (insn))
26241 if (INSN_P (insn) && CALL_P (insn))
26242 {
26243 first_arg = add_parameter_dependencies (insn, head);
26244 if (first_arg)
26245 {
26246 /* Add dependee for first argument to predecessors if only
26247 region contains more than one block. */
26248 basic_block bb = BLOCK_FOR_INSN (insn);
26249 int rgn = CONTAINING_RGN (bb->index);
26250 int nr_blks = RGN_NR_BLOCKS (rgn);
26251 /* Skip trivial regions and region head blocks that can have
26252 predecessors outside of region. */
26253 if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
26254 {
26255 edge e;
26256 edge_iterator ei;
26257 /* Assume that region is SCC, i.e. all immediate predecessors
26258 of non-head block are in the same region. */
26259 FOR_EACH_EDGE (e, ei, bb->preds)
26260 {
26261 /* Avoid creating of loop-carried dependencies through
26262 using topological odering in region. */
26263 if (BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
26264 add_dependee_for_func_arg (first_arg, e->src);
26265 }
26266 }
26267 insn = first_arg;
26268 if (insn == head)
26269 break;
26270 }
26271 }
26272 else if (first_arg)
26273 avoid_func_arg_motion (first_arg, insn);
26274 }
26275
26276 /* Hook for pre-reload schedule - set priority of moves from likely spilled
26277 HW registers to maximum, to schedule them at soon as possible. These are
26278 moves from function argument registers at the top of the function entry
26279 and moves from function return value registers after call. */
26280 static int
26281 ix86_adjust_priority (rtx insn, int priority)
26282 {
26283 rtx set;
26284
26285 if (reload_completed)
26286 return priority;
26287
26288 if (!NONDEBUG_INSN_P (insn))
26289 return priority;
26290
26291 set = single_set (insn);
26292 if (set)
26293 {
26294 rtx tmp = SET_SRC (set);
26295 if (REG_P (tmp)
26296 && HARD_REGISTER_P (tmp)
26297 && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
26298 && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
26299 return current_sched_info->sched_max_insns_priority;
26300 }
26301
26302 return priority;
26303 }
26304
26305 /* Model decoder of Core 2/i7.
26306 Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
26307 track the instruction fetch block boundaries and make sure that long
26308 (9+ bytes) instructions are assigned to D0. */
26309
26310 /* Maximum length of an insn that can be handled by
26311 a secondary decoder unit. '8' for Core 2/i7. */
26312 static int core2i7_secondary_decoder_max_insn_size;
26313
26314 /* Ifetch block size, i.e., number of bytes decoder reads per cycle.
26315 '16' for Core 2/i7. */
26316 static int core2i7_ifetch_block_size;
26317
26318 /* Maximum number of instructions decoder can handle per cycle.
26319 '6' for Core 2/i7. */
26320 static int core2i7_ifetch_block_max_insns;
26321
26322 typedef struct ix86_first_cycle_multipass_data_ *
26323 ix86_first_cycle_multipass_data_t;
26324 typedef const struct ix86_first_cycle_multipass_data_ *
26325 const_ix86_first_cycle_multipass_data_t;
26326
26327 /* A variable to store target state across calls to max_issue within
26328 one cycle. */
26329 static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
26330 *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
26331
26332 /* Initialize DATA. */
26333 static void
26334 core2i7_first_cycle_multipass_init (void *_data)
26335 {
26336 ix86_first_cycle_multipass_data_t data
26337 = (ix86_first_cycle_multipass_data_t) _data;
26338
26339 data->ifetch_block_len = 0;
26340 data->ifetch_block_n_insns = 0;
26341 data->ready_try_change = NULL;
26342 data->ready_try_change_size = 0;
26343 }
26344
26345 /* Advancing the cycle; reset ifetch block counts. */
26346 static void
26347 core2i7_dfa_post_advance_cycle (void)
26348 {
26349 ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
26350
26351 gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26352
26353 data->ifetch_block_len = 0;
26354 data->ifetch_block_n_insns = 0;
26355 }
26356
26357 static int min_insn_size (rtx);
26358
26359 /* Filter out insns from ready_try that the core will not be able to issue
26360 on current cycle due to decoder. */
26361 static void
26362 core2i7_first_cycle_multipass_filter_ready_try
26363 (const_ix86_first_cycle_multipass_data_t data,
26364 char *ready_try, int n_ready, bool first_cycle_insn_p)
26365 {
26366 while (n_ready--)
26367 {
26368 rtx insn;
26369 int insn_size;
26370
26371 if (ready_try[n_ready])
26372 continue;
26373
26374 insn = get_ready_element (n_ready);
26375 insn_size = min_insn_size (insn);
26376
26377 if (/* If this is a too long an insn for a secondary decoder ... */
26378 (!first_cycle_insn_p
26379 && insn_size > core2i7_secondary_decoder_max_insn_size)
26380 /* ... or it would not fit into the ifetch block ... */
26381 || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
26382 /* ... or the decoder is full already ... */
26383 || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
26384 /* ... mask the insn out. */
26385 {
26386 ready_try[n_ready] = 1;
26387
26388 if (data->ready_try_change)
26389 bitmap_set_bit (data->ready_try_change, n_ready);
26390 }
26391 }
26392 }
26393
26394 /* Prepare for a new round of multipass lookahead scheduling. */
26395 static void
26396 core2i7_first_cycle_multipass_begin (void *_data, char *ready_try, int n_ready,
26397 bool first_cycle_insn_p)
26398 {
26399 ix86_first_cycle_multipass_data_t data
26400 = (ix86_first_cycle_multipass_data_t) _data;
26401 const_ix86_first_cycle_multipass_data_t prev_data
26402 = ix86_first_cycle_multipass_data;
26403
26404 /* Restore the state from the end of the previous round. */
26405 data->ifetch_block_len = prev_data->ifetch_block_len;
26406 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
26407
26408 /* Filter instructions that cannot be issued on current cycle due to
26409 decoder restrictions. */
26410 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26411 first_cycle_insn_p);
26412 }
26413
26414 /* INSN is being issued in current solution. Account for its impact on
26415 the decoder model. */
26416 static void
26417 core2i7_first_cycle_multipass_issue (void *_data, char *ready_try, int n_ready,
26418 rtx insn, const void *_prev_data)
26419 {
26420 ix86_first_cycle_multipass_data_t data
26421 = (ix86_first_cycle_multipass_data_t) _data;
26422 const_ix86_first_cycle_multipass_data_t prev_data
26423 = (const_ix86_first_cycle_multipass_data_t) _prev_data;
26424
26425 int insn_size = min_insn_size (insn);
26426
26427 data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
26428 data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
26429 gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
26430 && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
26431
26432 /* Allocate or resize the bitmap for storing INSN's effect on ready_try. */
26433 if (!data->ready_try_change)
26434 {
26435 data->ready_try_change = sbitmap_alloc (n_ready);
26436 data->ready_try_change_size = n_ready;
26437 }
26438 else if (data->ready_try_change_size < n_ready)
26439 {
26440 data->ready_try_change = sbitmap_resize (data->ready_try_change,
26441 n_ready, 0);
26442 data->ready_try_change_size = n_ready;
26443 }
26444 bitmap_clear (data->ready_try_change);
26445
26446 /* Filter out insns from ready_try that the core will not be able to issue
26447 on current cycle due to decoder. */
26448 core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
26449 false);
26450 }
26451
26452 /* Revert the effect on ready_try. */
26453 static void
26454 core2i7_first_cycle_multipass_backtrack (const void *_data,
26455 char *ready_try,
26456 int n_ready ATTRIBUTE_UNUSED)
26457 {
26458 const_ix86_first_cycle_multipass_data_t data
26459 = (const_ix86_first_cycle_multipass_data_t) _data;
26460 unsigned int i = 0;
26461 sbitmap_iterator sbi;
26462
26463 gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
26464 EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
26465 {
26466 ready_try[i] = 0;
26467 }
26468 }
26469
26470 /* Save the result of multipass lookahead scheduling for the next round. */
26471 static void
26472 core2i7_first_cycle_multipass_end (const void *_data)
26473 {
26474 const_ix86_first_cycle_multipass_data_t data
26475 = (const_ix86_first_cycle_multipass_data_t) _data;
26476 ix86_first_cycle_multipass_data_t next_data
26477 = ix86_first_cycle_multipass_data;
26478
26479 if (data != NULL)
26480 {
26481 next_data->ifetch_block_len = data->ifetch_block_len;
26482 next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
26483 }
26484 }
26485
26486 /* Deallocate target data. */
26487 static void
26488 core2i7_first_cycle_multipass_fini (void *_data)
26489 {
26490 ix86_first_cycle_multipass_data_t data
26491 = (ix86_first_cycle_multipass_data_t) _data;
26492
26493 if (data->ready_try_change)
26494 {
26495 sbitmap_free (data->ready_try_change);
26496 data->ready_try_change = NULL;
26497 data->ready_try_change_size = 0;
26498 }
26499 }
26500
26501 /* Prepare for scheduling pass. */
26502 static void
26503 ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
26504 int verbose ATTRIBUTE_UNUSED,
26505 int max_uid ATTRIBUTE_UNUSED)
26506 {
26507 /* Install scheduling hooks for current CPU. Some of these hooks are used
26508 in time-critical parts of the scheduler, so we only set them up when
26509 they are actually used. */
26510 switch (ix86_tune)
26511 {
26512 case PROCESSOR_CORE2:
26513 case PROCESSOR_NEHALEM:
26514 case PROCESSOR_SANDYBRIDGE:
26515 case PROCESSOR_HASWELL:
26516 /* Do not perform multipass scheduling for pre-reload schedule
26517 to save compile time. */
26518 if (reload_completed)
26519 {
26520 targetm.sched.dfa_post_advance_cycle
26521 = core2i7_dfa_post_advance_cycle;
26522 targetm.sched.first_cycle_multipass_init
26523 = core2i7_first_cycle_multipass_init;
26524 targetm.sched.first_cycle_multipass_begin
26525 = core2i7_first_cycle_multipass_begin;
26526 targetm.sched.first_cycle_multipass_issue
26527 = core2i7_first_cycle_multipass_issue;
26528 targetm.sched.first_cycle_multipass_backtrack
26529 = core2i7_first_cycle_multipass_backtrack;
26530 targetm.sched.first_cycle_multipass_end
26531 = core2i7_first_cycle_multipass_end;
26532 targetm.sched.first_cycle_multipass_fini
26533 = core2i7_first_cycle_multipass_fini;
26534
26535 /* Set decoder parameters. */
26536 core2i7_secondary_decoder_max_insn_size = 8;
26537 core2i7_ifetch_block_size = 16;
26538 core2i7_ifetch_block_max_insns = 6;
26539 break;
26540 }
26541 /* ... Fall through ... */
26542 default:
26543 targetm.sched.dfa_post_advance_cycle = NULL;
26544 targetm.sched.first_cycle_multipass_init = NULL;
26545 targetm.sched.first_cycle_multipass_begin = NULL;
26546 targetm.sched.first_cycle_multipass_issue = NULL;
26547 targetm.sched.first_cycle_multipass_backtrack = NULL;
26548 targetm.sched.first_cycle_multipass_end = NULL;
26549 targetm.sched.first_cycle_multipass_fini = NULL;
26550 break;
26551 }
26552 }
26553
26554 \f
26555 /* Compute the alignment given to a constant that is being placed in memory.
26556 EXP is the constant and ALIGN is the alignment that the object would
26557 ordinarily have.
26558 The value of this function is used instead of that alignment to align
26559 the object. */
26560
26561 int
26562 ix86_constant_alignment (tree exp, int align)
26563 {
26564 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
26565 || TREE_CODE (exp) == INTEGER_CST)
26566 {
26567 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
26568 return 64;
26569 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
26570 return 128;
26571 }
26572 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
26573 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
26574 return BITS_PER_WORD;
26575
26576 return align;
26577 }
26578
26579 /* Compute the alignment for a static variable.
26580 TYPE is the data type, and ALIGN is the alignment that
26581 the object would ordinarily have. The value of this function is used
26582 instead of that alignment to align the object. */
26583
26584 int
26585 ix86_data_alignment (tree type, int align, bool opt)
26586 {
26587 /* GCC 4.8 and earlier used to incorrectly assume this alignment even
26588 for symbols from other compilation units or symbols that don't need
26589 to bind locally. In order to preserve some ABI compatibility with
26590 those compilers, ensure we don't decrease alignment from what we
26591 used to assume. */
26592
26593 int max_align_compat
26594 = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
26595
26596 /* A data structure, equal or greater than the size of a cache line
26597 (64 bytes in the Pentium 4 and other recent Intel processors, including
26598 processors based on Intel Core microarchitecture) should be aligned
26599 so that its base address is a multiple of a cache line size. */
26600
26601 int max_align
26602 = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
26603
26604 if (max_align < BITS_PER_WORD)
26605 max_align = BITS_PER_WORD;
26606
26607 if (opt
26608 && AGGREGATE_TYPE_P (type)
26609 && TYPE_SIZE (type)
26610 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
26611 {
26612 if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
26613 && align < max_align_compat)
26614 align = max_align_compat;
26615 if (wi::geu_p (TYPE_SIZE (type), max_align)
26616 && align < max_align)
26617 align = max_align;
26618 }
26619
26620 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26621 to 16byte boundary. */
26622 if (TARGET_64BIT)
26623 {
26624 if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
26625 && TYPE_SIZE (type)
26626 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26627 && wi::geu_p (TYPE_SIZE (type), 128)
26628 && align < 128)
26629 return 128;
26630 }
26631
26632 if (!opt)
26633 return align;
26634
26635 if (TREE_CODE (type) == ARRAY_TYPE)
26636 {
26637 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26638 return 64;
26639 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26640 return 128;
26641 }
26642 else if (TREE_CODE (type) == COMPLEX_TYPE)
26643 {
26644
26645 if (TYPE_MODE (type) == DCmode && align < 64)
26646 return 64;
26647 if ((TYPE_MODE (type) == XCmode
26648 || TYPE_MODE (type) == TCmode) && align < 128)
26649 return 128;
26650 }
26651 else if ((TREE_CODE (type) == RECORD_TYPE
26652 || TREE_CODE (type) == UNION_TYPE
26653 || TREE_CODE (type) == QUAL_UNION_TYPE)
26654 && TYPE_FIELDS (type))
26655 {
26656 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26657 return 64;
26658 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26659 return 128;
26660 }
26661 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26662 || TREE_CODE (type) == INTEGER_TYPE)
26663 {
26664 if (TYPE_MODE (type) == DFmode && align < 64)
26665 return 64;
26666 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26667 return 128;
26668 }
26669
26670 return align;
26671 }
26672
26673 /* Compute the alignment for a local variable or a stack slot. EXP is
26674 the data type or decl itself, MODE is the widest mode available and
26675 ALIGN is the alignment that the object would ordinarily have. The
26676 value of this macro is used instead of that alignment to align the
26677 object. */
26678
26679 unsigned int
26680 ix86_local_alignment (tree exp, enum machine_mode mode,
26681 unsigned int align)
26682 {
26683 tree type, decl;
26684
26685 if (exp && DECL_P (exp))
26686 {
26687 type = TREE_TYPE (exp);
26688 decl = exp;
26689 }
26690 else
26691 {
26692 type = exp;
26693 decl = NULL;
26694 }
26695
26696 /* Don't do dynamic stack realignment for long long objects with
26697 -mpreferred-stack-boundary=2. */
26698 if (!TARGET_64BIT
26699 && align == 64
26700 && ix86_preferred_stack_boundary < 64
26701 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
26702 && (!type || !TYPE_USER_ALIGN (type))
26703 && (!decl || !DECL_USER_ALIGN (decl)))
26704 align = 32;
26705
26706 /* If TYPE is NULL, we are allocating a stack slot for caller-save
26707 register in MODE. We will return the largest alignment of XF
26708 and DF. */
26709 if (!type)
26710 {
26711 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
26712 align = GET_MODE_ALIGNMENT (DFmode);
26713 return align;
26714 }
26715
26716 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
26717 to 16byte boundary. Exact wording is:
26718
26719 An array uses the same alignment as its elements, except that a local or
26720 global array variable of length at least 16 bytes or
26721 a C99 variable-length array variable always has alignment of at least 16 bytes.
26722
26723 This was added to allow use of aligned SSE instructions at arrays. This
26724 rule is meant for static storage (where compiler can not do the analysis
26725 by itself). We follow it for automatic variables only when convenient.
26726 We fully control everything in the function compiled and functions from
26727 other unit can not rely on the alignment.
26728
26729 Exclude va_list type. It is the common case of local array where
26730 we can not benefit from the alignment.
26731
26732 TODO: Probably one should optimize for size only when var is not escaping. */
26733 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
26734 && TARGET_SSE)
26735 {
26736 if (AGGREGATE_TYPE_P (type)
26737 && (va_list_type_node == NULL_TREE
26738 || (TYPE_MAIN_VARIANT (type)
26739 != TYPE_MAIN_VARIANT (va_list_type_node)))
26740 && TYPE_SIZE (type)
26741 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
26742 && wi::geu_p (TYPE_SIZE (type), 16)
26743 && align < 128)
26744 return 128;
26745 }
26746 if (TREE_CODE (type) == ARRAY_TYPE)
26747 {
26748 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
26749 return 64;
26750 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
26751 return 128;
26752 }
26753 else if (TREE_CODE (type) == COMPLEX_TYPE)
26754 {
26755 if (TYPE_MODE (type) == DCmode && align < 64)
26756 return 64;
26757 if ((TYPE_MODE (type) == XCmode
26758 || TYPE_MODE (type) == TCmode) && align < 128)
26759 return 128;
26760 }
26761 else if ((TREE_CODE (type) == RECORD_TYPE
26762 || TREE_CODE (type) == UNION_TYPE
26763 || TREE_CODE (type) == QUAL_UNION_TYPE)
26764 && TYPE_FIELDS (type))
26765 {
26766 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
26767 return 64;
26768 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
26769 return 128;
26770 }
26771 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
26772 || TREE_CODE (type) == INTEGER_TYPE)
26773 {
26774
26775 if (TYPE_MODE (type) == DFmode && align < 64)
26776 return 64;
26777 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
26778 return 128;
26779 }
26780 return align;
26781 }
26782
26783 /* Compute the minimum required alignment for dynamic stack realignment
26784 purposes for a local variable, parameter or a stack slot. EXP is
26785 the data type or decl itself, MODE is its mode and ALIGN is the
26786 alignment that the object would ordinarily have. */
26787
26788 unsigned int
26789 ix86_minimum_alignment (tree exp, enum machine_mode mode,
26790 unsigned int align)
26791 {
26792 tree type, decl;
26793
26794 if (exp && DECL_P (exp))
26795 {
26796 type = TREE_TYPE (exp);
26797 decl = exp;
26798 }
26799 else
26800 {
26801 type = exp;
26802 decl = NULL;
26803 }
26804
26805 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
26806 return align;
26807
26808 /* Don't do dynamic stack realignment for long long objects with
26809 -mpreferred-stack-boundary=2. */
26810 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
26811 && (!type || !TYPE_USER_ALIGN (type))
26812 && (!decl || !DECL_USER_ALIGN (decl)))
26813 return 32;
26814
26815 return align;
26816 }
26817 \f
26818 /* Find a location for the static chain incoming to a nested function.
26819 This is a register, unless all free registers are used by arguments. */
26820
26821 static rtx
26822 ix86_static_chain (const_tree fndecl, bool incoming_p)
26823 {
26824 unsigned regno;
26825
26826 if (!DECL_STATIC_CHAIN (fndecl))
26827 return NULL;
26828
26829 if (TARGET_64BIT)
26830 {
26831 /* We always use R10 in 64-bit mode. */
26832 regno = R10_REG;
26833 }
26834 else
26835 {
26836 tree fntype;
26837 unsigned int ccvt;
26838
26839 /* By default in 32-bit mode we use ECX to pass the static chain. */
26840 regno = CX_REG;
26841
26842 fntype = TREE_TYPE (fndecl);
26843 ccvt = ix86_get_callcvt (fntype);
26844 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
26845 {
26846 /* Fastcall functions use ecx/edx for arguments, which leaves
26847 us with EAX for the static chain.
26848 Thiscall functions use ecx for arguments, which also
26849 leaves us with EAX for the static chain. */
26850 regno = AX_REG;
26851 }
26852 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
26853 {
26854 /* Thiscall functions use ecx for arguments, which leaves
26855 us with EAX and EDX for the static chain.
26856 We are using for abi-compatibility EAX. */
26857 regno = AX_REG;
26858 }
26859 else if (ix86_function_regparm (fntype, fndecl) == 3)
26860 {
26861 /* For regparm 3, we have no free call-clobbered registers in
26862 which to store the static chain. In order to implement this,
26863 we have the trampoline push the static chain to the stack.
26864 However, we can't push a value below the return address when
26865 we call the nested function directly, so we have to use an
26866 alternate entry point. For this we use ESI, and have the
26867 alternate entry point push ESI, so that things appear the
26868 same once we're executing the nested function. */
26869 if (incoming_p)
26870 {
26871 if (fndecl == current_function_decl)
26872 ix86_static_chain_on_stack = true;
26873 return gen_frame_mem (SImode,
26874 plus_constant (Pmode,
26875 arg_pointer_rtx, -8));
26876 }
26877 regno = SI_REG;
26878 }
26879 }
26880
26881 return gen_rtx_REG (Pmode, regno);
26882 }
26883
26884 /* Emit RTL insns to initialize the variable parts of a trampoline.
26885 FNDECL is the decl of the target address; M_TRAMP is a MEM for
26886 the trampoline, and CHAIN_VALUE is an RTX for the static chain
26887 to be passed to the target function. */
26888
26889 static void
26890 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
26891 {
26892 rtx mem, fnaddr;
26893 int opcode;
26894 int offset = 0;
26895
26896 fnaddr = XEXP (DECL_RTL (fndecl), 0);
26897
26898 if (TARGET_64BIT)
26899 {
26900 int size;
26901
26902 /* Load the function address to r11. Try to load address using
26903 the shorter movl instead of movabs. We may want to support
26904 movq for kernel mode, but kernel does not use trampolines at
26905 the moment. FNADDR is a 32bit address and may not be in
26906 DImode when ptr_mode == SImode. Always use movl in this
26907 case. */
26908 if (ptr_mode == SImode
26909 || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
26910 {
26911 fnaddr = copy_addr_to_reg (fnaddr);
26912
26913 mem = adjust_address (m_tramp, HImode, offset);
26914 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
26915
26916 mem = adjust_address (m_tramp, SImode, offset + 2);
26917 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
26918 offset += 6;
26919 }
26920 else
26921 {
26922 mem = adjust_address (m_tramp, HImode, offset);
26923 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
26924
26925 mem = adjust_address (m_tramp, DImode, offset + 2);
26926 emit_move_insn (mem, fnaddr);
26927 offset += 10;
26928 }
26929
26930 /* Load static chain using movabs to r10. Use the shorter movl
26931 instead of movabs when ptr_mode == SImode. */
26932 if (ptr_mode == SImode)
26933 {
26934 opcode = 0xba41;
26935 size = 6;
26936 }
26937 else
26938 {
26939 opcode = 0xba49;
26940 size = 10;
26941 }
26942
26943 mem = adjust_address (m_tramp, HImode, offset);
26944 emit_move_insn (mem, gen_int_mode (opcode, HImode));
26945
26946 mem = adjust_address (m_tramp, ptr_mode, offset + 2);
26947 emit_move_insn (mem, chain_value);
26948 offset += size;
26949
26950 /* Jump to r11; the last (unused) byte is a nop, only there to
26951 pad the write out to a single 32-bit store. */
26952 mem = adjust_address (m_tramp, SImode, offset);
26953 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
26954 offset += 4;
26955 }
26956 else
26957 {
26958 rtx disp, chain;
26959
26960 /* Depending on the static chain location, either load a register
26961 with a constant, or push the constant to the stack. All of the
26962 instructions are the same size. */
26963 chain = ix86_static_chain (fndecl, true);
26964 if (REG_P (chain))
26965 {
26966 switch (REGNO (chain))
26967 {
26968 case AX_REG:
26969 opcode = 0xb8; break;
26970 case CX_REG:
26971 opcode = 0xb9; break;
26972 default:
26973 gcc_unreachable ();
26974 }
26975 }
26976 else
26977 opcode = 0x68;
26978
26979 mem = adjust_address (m_tramp, QImode, offset);
26980 emit_move_insn (mem, gen_int_mode (opcode, QImode));
26981
26982 mem = adjust_address (m_tramp, SImode, offset + 1);
26983 emit_move_insn (mem, chain_value);
26984 offset += 5;
26985
26986 mem = adjust_address (m_tramp, QImode, offset);
26987 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
26988
26989 mem = adjust_address (m_tramp, SImode, offset + 1);
26990
26991 /* Compute offset from the end of the jmp to the target function.
26992 In the case in which the trampoline stores the static chain on
26993 the stack, we need to skip the first insn which pushes the
26994 (call-saved) register static chain; this push is 1 byte. */
26995 offset += 5;
26996 disp = expand_binop (SImode, sub_optab, fnaddr,
26997 plus_constant (Pmode, XEXP (m_tramp, 0),
26998 offset - (MEM_P (chain) ? 1 : 0)),
26999 NULL_RTX, 1, OPTAB_DIRECT);
27000 emit_move_insn (mem, disp);
27001 }
27002
27003 gcc_assert (offset <= TRAMPOLINE_SIZE);
27004
27005 #ifdef HAVE_ENABLE_EXECUTE_STACK
27006 #ifdef CHECK_EXECUTE_STACK_ENABLED
27007 if (CHECK_EXECUTE_STACK_ENABLED)
27008 #endif
27009 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27010 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27011 #endif
27012 }
27013 \f
27014 /* The following file contains several enumerations and data structures
27015 built from the definitions in i386-builtin-types.def. */
27016
27017 #include "i386-builtin-types.inc"
27018
27019 /* Table for the ix86 builtin non-function types. */
27020 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27021
27022 /* Retrieve an element from the above table, building some of
27023 the types lazily. */
27024
27025 static tree
27026 ix86_get_builtin_type (enum ix86_builtin_type tcode)
27027 {
27028 unsigned int index;
27029 tree type, itype;
27030
27031 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27032
27033 type = ix86_builtin_type_tab[(int) tcode];
27034 if (type != NULL)
27035 return type;
27036
27037 gcc_assert (tcode > IX86_BT_LAST_PRIM);
27038 if (tcode <= IX86_BT_LAST_VECT)
27039 {
27040 enum machine_mode mode;
27041
27042 index = tcode - IX86_BT_LAST_PRIM - 1;
27043 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27044 mode = ix86_builtin_type_vect_mode[index];
27045
27046 type = build_vector_type_for_mode (itype, mode);
27047 }
27048 else
27049 {
27050 int quals;
27051
27052 index = tcode - IX86_BT_LAST_VECT - 1;
27053 if (tcode <= IX86_BT_LAST_PTR)
27054 quals = TYPE_UNQUALIFIED;
27055 else
27056 quals = TYPE_QUAL_CONST;
27057
27058 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
27059 if (quals != TYPE_UNQUALIFIED)
27060 itype = build_qualified_type (itype, quals);
27061
27062 type = build_pointer_type (itype);
27063 }
27064
27065 ix86_builtin_type_tab[(int) tcode] = type;
27066 return type;
27067 }
27068
27069 /* Table for the ix86 builtin function types. */
27070 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
27071
27072 /* Retrieve an element from the above table, building some of
27073 the types lazily. */
27074
27075 static tree
27076 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
27077 {
27078 tree type;
27079
27080 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
27081
27082 type = ix86_builtin_func_type_tab[(int) tcode];
27083 if (type != NULL)
27084 return type;
27085
27086 if (tcode <= IX86_BT_LAST_FUNC)
27087 {
27088 unsigned start = ix86_builtin_func_start[(int) tcode];
27089 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
27090 tree rtype, atype, args = void_list_node;
27091 unsigned i;
27092
27093 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
27094 for (i = after - 1; i > start; --i)
27095 {
27096 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
27097 args = tree_cons (NULL, atype, args);
27098 }
27099
27100 type = build_function_type (rtype, args);
27101 }
27102 else
27103 {
27104 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
27105 enum ix86_builtin_func_type icode;
27106
27107 icode = ix86_builtin_func_alias_base[index];
27108 type = ix86_get_builtin_func_type (icode);
27109 }
27110
27111 ix86_builtin_func_type_tab[(int) tcode] = type;
27112 return type;
27113 }
27114
27115
27116 /* Codes for all the SSE/MMX builtins. */
27117 enum ix86_builtins
27118 {
27119 IX86_BUILTIN_ADDPS,
27120 IX86_BUILTIN_ADDSS,
27121 IX86_BUILTIN_DIVPS,
27122 IX86_BUILTIN_DIVSS,
27123 IX86_BUILTIN_MULPS,
27124 IX86_BUILTIN_MULSS,
27125 IX86_BUILTIN_SUBPS,
27126 IX86_BUILTIN_SUBSS,
27127
27128 IX86_BUILTIN_CMPEQPS,
27129 IX86_BUILTIN_CMPLTPS,
27130 IX86_BUILTIN_CMPLEPS,
27131 IX86_BUILTIN_CMPGTPS,
27132 IX86_BUILTIN_CMPGEPS,
27133 IX86_BUILTIN_CMPNEQPS,
27134 IX86_BUILTIN_CMPNLTPS,
27135 IX86_BUILTIN_CMPNLEPS,
27136 IX86_BUILTIN_CMPNGTPS,
27137 IX86_BUILTIN_CMPNGEPS,
27138 IX86_BUILTIN_CMPORDPS,
27139 IX86_BUILTIN_CMPUNORDPS,
27140 IX86_BUILTIN_CMPEQSS,
27141 IX86_BUILTIN_CMPLTSS,
27142 IX86_BUILTIN_CMPLESS,
27143 IX86_BUILTIN_CMPNEQSS,
27144 IX86_BUILTIN_CMPNLTSS,
27145 IX86_BUILTIN_CMPNLESS,
27146 IX86_BUILTIN_CMPORDSS,
27147 IX86_BUILTIN_CMPUNORDSS,
27148
27149 IX86_BUILTIN_COMIEQSS,
27150 IX86_BUILTIN_COMILTSS,
27151 IX86_BUILTIN_COMILESS,
27152 IX86_BUILTIN_COMIGTSS,
27153 IX86_BUILTIN_COMIGESS,
27154 IX86_BUILTIN_COMINEQSS,
27155 IX86_BUILTIN_UCOMIEQSS,
27156 IX86_BUILTIN_UCOMILTSS,
27157 IX86_BUILTIN_UCOMILESS,
27158 IX86_BUILTIN_UCOMIGTSS,
27159 IX86_BUILTIN_UCOMIGESS,
27160 IX86_BUILTIN_UCOMINEQSS,
27161
27162 IX86_BUILTIN_CVTPI2PS,
27163 IX86_BUILTIN_CVTPS2PI,
27164 IX86_BUILTIN_CVTSI2SS,
27165 IX86_BUILTIN_CVTSI642SS,
27166 IX86_BUILTIN_CVTSS2SI,
27167 IX86_BUILTIN_CVTSS2SI64,
27168 IX86_BUILTIN_CVTTPS2PI,
27169 IX86_BUILTIN_CVTTSS2SI,
27170 IX86_BUILTIN_CVTTSS2SI64,
27171
27172 IX86_BUILTIN_MAXPS,
27173 IX86_BUILTIN_MAXSS,
27174 IX86_BUILTIN_MINPS,
27175 IX86_BUILTIN_MINSS,
27176
27177 IX86_BUILTIN_LOADUPS,
27178 IX86_BUILTIN_STOREUPS,
27179 IX86_BUILTIN_MOVSS,
27180
27181 IX86_BUILTIN_MOVHLPS,
27182 IX86_BUILTIN_MOVLHPS,
27183 IX86_BUILTIN_LOADHPS,
27184 IX86_BUILTIN_LOADLPS,
27185 IX86_BUILTIN_STOREHPS,
27186 IX86_BUILTIN_STORELPS,
27187
27188 IX86_BUILTIN_MASKMOVQ,
27189 IX86_BUILTIN_MOVMSKPS,
27190 IX86_BUILTIN_PMOVMSKB,
27191
27192 IX86_BUILTIN_MOVNTPS,
27193 IX86_BUILTIN_MOVNTQ,
27194
27195 IX86_BUILTIN_LOADDQU,
27196 IX86_BUILTIN_STOREDQU,
27197
27198 IX86_BUILTIN_PACKSSWB,
27199 IX86_BUILTIN_PACKSSDW,
27200 IX86_BUILTIN_PACKUSWB,
27201
27202 IX86_BUILTIN_PADDB,
27203 IX86_BUILTIN_PADDW,
27204 IX86_BUILTIN_PADDD,
27205 IX86_BUILTIN_PADDQ,
27206 IX86_BUILTIN_PADDSB,
27207 IX86_BUILTIN_PADDSW,
27208 IX86_BUILTIN_PADDUSB,
27209 IX86_BUILTIN_PADDUSW,
27210 IX86_BUILTIN_PSUBB,
27211 IX86_BUILTIN_PSUBW,
27212 IX86_BUILTIN_PSUBD,
27213 IX86_BUILTIN_PSUBQ,
27214 IX86_BUILTIN_PSUBSB,
27215 IX86_BUILTIN_PSUBSW,
27216 IX86_BUILTIN_PSUBUSB,
27217 IX86_BUILTIN_PSUBUSW,
27218
27219 IX86_BUILTIN_PAND,
27220 IX86_BUILTIN_PANDN,
27221 IX86_BUILTIN_POR,
27222 IX86_BUILTIN_PXOR,
27223
27224 IX86_BUILTIN_PAVGB,
27225 IX86_BUILTIN_PAVGW,
27226
27227 IX86_BUILTIN_PCMPEQB,
27228 IX86_BUILTIN_PCMPEQW,
27229 IX86_BUILTIN_PCMPEQD,
27230 IX86_BUILTIN_PCMPGTB,
27231 IX86_BUILTIN_PCMPGTW,
27232 IX86_BUILTIN_PCMPGTD,
27233
27234 IX86_BUILTIN_PMADDWD,
27235
27236 IX86_BUILTIN_PMAXSW,
27237 IX86_BUILTIN_PMAXUB,
27238 IX86_BUILTIN_PMINSW,
27239 IX86_BUILTIN_PMINUB,
27240
27241 IX86_BUILTIN_PMULHUW,
27242 IX86_BUILTIN_PMULHW,
27243 IX86_BUILTIN_PMULLW,
27244
27245 IX86_BUILTIN_PSADBW,
27246 IX86_BUILTIN_PSHUFW,
27247
27248 IX86_BUILTIN_PSLLW,
27249 IX86_BUILTIN_PSLLD,
27250 IX86_BUILTIN_PSLLQ,
27251 IX86_BUILTIN_PSRAW,
27252 IX86_BUILTIN_PSRAD,
27253 IX86_BUILTIN_PSRLW,
27254 IX86_BUILTIN_PSRLD,
27255 IX86_BUILTIN_PSRLQ,
27256 IX86_BUILTIN_PSLLWI,
27257 IX86_BUILTIN_PSLLDI,
27258 IX86_BUILTIN_PSLLQI,
27259 IX86_BUILTIN_PSRAWI,
27260 IX86_BUILTIN_PSRADI,
27261 IX86_BUILTIN_PSRLWI,
27262 IX86_BUILTIN_PSRLDI,
27263 IX86_BUILTIN_PSRLQI,
27264
27265 IX86_BUILTIN_PUNPCKHBW,
27266 IX86_BUILTIN_PUNPCKHWD,
27267 IX86_BUILTIN_PUNPCKHDQ,
27268 IX86_BUILTIN_PUNPCKLBW,
27269 IX86_BUILTIN_PUNPCKLWD,
27270 IX86_BUILTIN_PUNPCKLDQ,
27271
27272 IX86_BUILTIN_SHUFPS,
27273
27274 IX86_BUILTIN_RCPPS,
27275 IX86_BUILTIN_RCPSS,
27276 IX86_BUILTIN_RSQRTPS,
27277 IX86_BUILTIN_RSQRTPS_NR,
27278 IX86_BUILTIN_RSQRTSS,
27279 IX86_BUILTIN_RSQRTF,
27280 IX86_BUILTIN_SQRTPS,
27281 IX86_BUILTIN_SQRTPS_NR,
27282 IX86_BUILTIN_SQRTSS,
27283
27284 IX86_BUILTIN_UNPCKHPS,
27285 IX86_BUILTIN_UNPCKLPS,
27286
27287 IX86_BUILTIN_ANDPS,
27288 IX86_BUILTIN_ANDNPS,
27289 IX86_BUILTIN_ORPS,
27290 IX86_BUILTIN_XORPS,
27291
27292 IX86_BUILTIN_EMMS,
27293 IX86_BUILTIN_LDMXCSR,
27294 IX86_BUILTIN_STMXCSR,
27295 IX86_BUILTIN_SFENCE,
27296
27297 IX86_BUILTIN_FXSAVE,
27298 IX86_BUILTIN_FXRSTOR,
27299 IX86_BUILTIN_FXSAVE64,
27300 IX86_BUILTIN_FXRSTOR64,
27301
27302 IX86_BUILTIN_XSAVE,
27303 IX86_BUILTIN_XRSTOR,
27304 IX86_BUILTIN_XSAVE64,
27305 IX86_BUILTIN_XRSTOR64,
27306
27307 IX86_BUILTIN_XSAVEOPT,
27308 IX86_BUILTIN_XSAVEOPT64,
27309
27310 /* 3DNow! Original */
27311 IX86_BUILTIN_FEMMS,
27312 IX86_BUILTIN_PAVGUSB,
27313 IX86_BUILTIN_PF2ID,
27314 IX86_BUILTIN_PFACC,
27315 IX86_BUILTIN_PFADD,
27316 IX86_BUILTIN_PFCMPEQ,
27317 IX86_BUILTIN_PFCMPGE,
27318 IX86_BUILTIN_PFCMPGT,
27319 IX86_BUILTIN_PFMAX,
27320 IX86_BUILTIN_PFMIN,
27321 IX86_BUILTIN_PFMUL,
27322 IX86_BUILTIN_PFRCP,
27323 IX86_BUILTIN_PFRCPIT1,
27324 IX86_BUILTIN_PFRCPIT2,
27325 IX86_BUILTIN_PFRSQIT1,
27326 IX86_BUILTIN_PFRSQRT,
27327 IX86_BUILTIN_PFSUB,
27328 IX86_BUILTIN_PFSUBR,
27329 IX86_BUILTIN_PI2FD,
27330 IX86_BUILTIN_PMULHRW,
27331
27332 /* 3DNow! Athlon Extensions */
27333 IX86_BUILTIN_PF2IW,
27334 IX86_BUILTIN_PFNACC,
27335 IX86_BUILTIN_PFPNACC,
27336 IX86_BUILTIN_PI2FW,
27337 IX86_BUILTIN_PSWAPDSI,
27338 IX86_BUILTIN_PSWAPDSF,
27339
27340 /* SSE2 */
27341 IX86_BUILTIN_ADDPD,
27342 IX86_BUILTIN_ADDSD,
27343 IX86_BUILTIN_DIVPD,
27344 IX86_BUILTIN_DIVSD,
27345 IX86_BUILTIN_MULPD,
27346 IX86_BUILTIN_MULSD,
27347 IX86_BUILTIN_SUBPD,
27348 IX86_BUILTIN_SUBSD,
27349
27350 IX86_BUILTIN_CMPEQPD,
27351 IX86_BUILTIN_CMPLTPD,
27352 IX86_BUILTIN_CMPLEPD,
27353 IX86_BUILTIN_CMPGTPD,
27354 IX86_BUILTIN_CMPGEPD,
27355 IX86_BUILTIN_CMPNEQPD,
27356 IX86_BUILTIN_CMPNLTPD,
27357 IX86_BUILTIN_CMPNLEPD,
27358 IX86_BUILTIN_CMPNGTPD,
27359 IX86_BUILTIN_CMPNGEPD,
27360 IX86_BUILTIN_CMPORDPD,
27361 IX86_BUILTIN_CMPUNORDPD,
27362 IX86_BUILTIN_CMPEQSD,
27363 IX86_BUILTIN_CMPLTSD,
27364 IX86_BUILTIN_CMPLESD,
27365 IX86_BUILTIN_CMPNEQSD,
27366 IX86_BUILTIN_CMPNLTSD,
27367 IX86_BUILTIN_CMPNLESD,
27368 IX86_BUILTIN_CMPORDSD,
27369 IX86_BUILTIN_CMPUNORDSD,
27370
27371 IX86_BUILTIN_COMIEQSD,
27372 IX86_BUILTIN_COMILTSD,
27373 IX86_BUILTIN_COMILESD,
27374 IX86_BUILTIN_COMIGTSD,
27375 IX86_BUILTIN_COMIGESD,
27376 IX86_BUILTIN_COMINEQSD,
27377 IX86_BUILTIN_UCOMIEQSD,
27378 IX86_BUILTIN_UCOMILTSD,
27379 IX86_BUILTIN_UCOMILESD,
27380 IX86_BUILTIN_UCOMIGTSD,
27381 IX86_BUILTIN_UCOMIGESD,
27382 IX86_BUILTIN_UCOMINEQSD,
27383
27384 IX86_BUILTIN_MAXPD,
27385 IX86_BUILTIN_MAXSD,
27386 IX86_BUILTIN_MINPD,
27387 IX86_BUILTIN_MINSD,
27388
27389 IX86_BUILTIN_ANDPD,
27390 IX86_BUILTIN_ANDNPD,
27391 IX86_BUILTIN_ORPD,
27392 IX86_BUILTIN_XORPD,
27393
27394 IX86_BUILTIN_SQRTPD,
27395 IX86_BUILTIN_SQRTSD,
27396
27397 IX86_BUILTIN_UNPCKHPD,
27398 IX86_BUILTIN_UNPCKLPD,
27399
27400 IX86_BUILTIN_SHUFPD,
27401
27402 IX86_BUILTIN_LOADUPD,
27403 IX86_BUILTIN_STOREUPD,
27404 IX86_BUILTIN_MOVSD,
27405
27406 IX86_BUILTIN_LOADHPD,
27407 IX86_BUILTIN_LOADLPD,
27408
27409 IX86_BUILTIN_CVTDQ2PD,
27410 IX86_BUILTIN_CVTDQ2PS,
27411
27412 IX86_BUILTIN_CVTPD2DQ,
27413 IX86_BUILTIN_CVTPD2PI,
27414 IX86_BUILTIN_CVTPD2PS,
27415 IX86_BUILTIN_CVTTPD2DQ,
27416 IX86_BUILTIN_CVTTPD2PI,
27417
27418 IX86_BUILTIN_CVTPI2PD,
27419 IX86_BUILTIN_CVTSI2SD,
27420 IX86_BUILTIN_CVTSI642SD,
27421
27422 IX86_BUILTIN_CVTSD2SI,
27423 IX86_BUILTIN_CVTSD2SI64,
27424 IX86_BUILTIN_CVTSD2SS,
27425 IX86_BUILTIN_CVTSS2SD,
27426 IX86_BUILTIN_CVTTSD2SI,
27427 IX86_BUILTIN_CVTTSD2SI64,
27428
27429 IX86_BUILTIN_CVTPS2DQ,
27430 IX86_BUILTIN_CVTPS2PD,
27431 IX86_BUILTIN_CVTTPS2DQ,
27432
27433 IX86_BUILTIN_MOVNTI,
27434 IX86_BUILTIN_MOVNTI64,
27435 IX86_BUILTIN_MOVNTPD,
27436 IX86_BUILTIN_MOVNTDQ,
27437
27438 IX86_BUILTIN_MOVQ128,
27439
27440 /* SSE2 MMX */
27441 IX86_BUILTIN_MASKMOVDQU,
27442 IX86_BUILTIN_MOVMSKPD,
27443 IX86_BUILTIN_PMOVMSKB128,
27444
27445 IX86_BUILTIN_PACKSSWB128,
27446 IX86_BUILTIN_PACKSSDW128,
27447 IX86_BUILTIN_PACKUSWB128,
27448
27449 IX86_BUILTIN_PADDB128,
27450 IX86_BUILTIN_PADDW128,
27451 IX86_BUILTIN_PADDD128,
27452 IX86_BUILTIN_PADDQ128,
27453 IX86_BUILTIN_PADDSB128,
27454 IX86_BUILTIN_PADDSW128,
27455 IX86_BUILTIN_PADDUSB128,
27456 IX86_BUILTIN_PADDUSW128,
27457 IX86_BUILTIN_PSUBB128,
27458 IX86_BUILTIN_PSUBW128,
27459 IX86_BUILTIN_PSUBD128,
27460 IX86_BUILTIN_PSUBQ128,
27461 IX86_BUILTIN_PSUBSB128,
27462 IX86_BUILTIN_PSUBSW128,
27463 IX86_BUILTIN_PSUBUSB128,
27464 IX86_BUILTIN_PSUBUSW128,
27465
27466 IX86_BUILTIN_PAND128,
27467 IX86_BUILTIN_PANDN128,
27468 IX86_BUILTIN_POR128,
27469 IX86_BUILTIN_PXOR128,
27470
27471 IX86_BUILTIN_PAVGB128,
27472 IX86_BUILTIN_PAVGW128,
27473
27474 IX86_BUILTIN_PCMPEQB128,
27475 IX86_BUILTIN_PCMPEQW128,
27476 IX86_BUILTIN_PCMPEQD128,
27477 IX86_BUILTIN_PCMPGTB128,
27478 IX86_BUILTIN_PCMPGTW128,
27479 IX86_BUILTIN_PCMPGTD128,
27480
27481 IX86_BUILTIN_PMADDWD128,
27482
27483 IX86_BUILTIN_PMAXSW128,
27484 IX86_BUILTIN_PMAXUB128,
27485 IX86_BUILTIN_PMINSW128,
27486 IX86_BUILTIN_PMINUB128,
27487
27488 IX86_BUILTIN_PMULUDQ,
27489 IX86_BUILTIN_PMULUDQ128,
27490 IX86_BUILTIN_PMULHUW128,
27491 IX86_BUILTIN_PMULHW128,
27492 IX86_BUILTIN_PMULLW128,
27493
27494 IX86_BUILTIN_PSADBW128,
27495 IX86_BUILTIN_PSHUFHW,
27496 IX86_BUILTIN_PSHUFLW,
27497 IX86_BUILTIN_PSHUFD,
27498
27499 IX86_BUILTIN_PSLLDQI128,
27500 IX86_BUILTIN_PSLLWI128,
27501 IX86_BUILTIN_PSLLDI128,
27502 IX86_BUILTIN_PSLLQI128,
27503 IX86_BUILTIN_PSRAWI128,
27504 IX86_BUILTIN_PSRADI128,
27505 IX86_BUILTIN_PSRLDQI128,
27506 IX86_BUILTIN_PSRLWI128,
27507 IX86_BUILTIN_PSRLDI128,
27508 IX86_BUILTIN_PSRLQI128,
27509
27510 IX86_BUILTIN_PSLLDQ128,
27511 IX86_BUILTIN_PSLLW128,
27512 IX86_BUILTIN_PSLLD128,
27513 IX86_BUILTIN_PSLLQ128,
27514 IX86_BUILTIN_PSRAW128,
27515 IX86_BUILTIN_PSRAD128,
27516 IX86_BUILTIN_PSRLW128,
27517 IX86_BUILTIN_PSRLD128,
27518 IX86_BUILTIN_PSRLQ128,
27519
27520 IX86_BUILTIN_PUNPCKHBW128,
27521 IX86_BUILTIN_PUNPCKHWD128,
27522 IX86_BUILTIN_PUNPCKHDQ128,
27523 IX86_BUILTIN_PUNPCKHQDQ128,
27524 IX86_BUILTIN_PUNPCKLBW128,
27525 IX86_BUILTIN_PUNPCKLWD128,
27526 IX86_BUILTIN_PUNPCKLDQ128,
27527 IX86_BUILTIN_PUNPCKLQDQ128,
27528
27529 IX86_BUILTIN_CLFLUSH,
27530 IX86_BUILTIN_MFENCE,
27531 IX86_BUILTIN_LFENCE,
27532 IX86_BUILTIN_PAUSE,
27533
27534 IX86_BUILTIN_FNSTENV,
27535 IX86_BUILTIN_FLDENV,
27536 IX86_BUILTIN_FNSTSW,
27537 IX86_BUILTIN_FNCLEX,
27538
27539 IX86_BUILTIN_BSRSI,
27540 IX86_BUILTIN_BSRDI,
27541 IX86_BUILTIN_RDPMC,
27542 IX86_BUILTIN_RDTSC,
27543 IX86_BUILTIN_RDTSCP,
27544 IX86_BUILTIN_ROLQI,
27545 IX86_BUILTIN_ROLHI,
27546 IX86_BUILTIN_RORQI,
27547 IX86_BUILTIN_RORHI,
27548
27549 /* SSE3. */
27550 IX86_BUILTIN_ADDSUBPS,
27551 IX86_BUILTIN_HADDPS,
27552 IX86_BUILTIN_HSUBPS,
27553 IX86_BUILTIN_MOVSHDUP,
27554 IX86_BUILTIN_MOVSLDUP,
27555 IX86_BUILTIN_ADDSUBPD,
27556 IX86_BUILTIN_HADDPD,
27557 IX86_BUILTIN_HSUBPD,
27558 IX86_BUILTIN_LDDQU,
27559
27560 IX86_BUILTIN_MONITOR,
27561 IX86_BUILTIN_MWAIT,
27562
27563 /* SSSE3. */
27564 IX86_BUILTIN_PHADDW,
27565 IX86_BUILTIN_PHADDD,
27566 IX86_BUILTIN_PHADDSW,
27567 IX86_BUILTIN_PHSUBW,
27568 IX86_BUILTIN_PHSUBD,
27569 IX86_BUILTIN_PHSUBSW,
27570 IX86_BUILTIN_PMADDUBSW,
27571 IX86_BUILTIN_PMULHRSW,
27572 IX86_BUILTIN_PSHUFB,
27573 IX86_BUILTIN_PSIGNB,
27574 IX86_BUILTIN_PSIGNW,
27575 IX86_BUILTIN_PSIGND,
27576 IX86_BUILTIN_PALIGNR,
27577 IX86_BUILTIN_PABSB,
27578 IX86_BUILTIN_PABSW,
27579 IX86_BUILTIN_PABSD,
27580
27581 IX86_BUILTIN_PHADDW128,
27582 IX86_BUILTIN_PHADDD128,
27583 IX86_BUILTIN_PHADDSW128,
27584 IX86_BUILTIN_PHSUBW128,
27585 IX86_BUILTIN_PHSUBD128,
27586 IX86_BUILTIN_PHSUBSW128,
27587 IX86_BUILTIN_PMADDUBSW128,
27588 IX86_BUILTIN_PMULHRSW128,
27589 IX86_BUILTIN_PSHUFB128,
27590 IX86_BUILTIN_PSIGNB128,
27591 IX86_BUILTIN_PSIGNW128,
27592 IX86_BUILTIN_PSIGND128,
27593 IX86_BUILTIN_PALIGNR128,
27594 IX86_BUILTIN_PABSB128,
27595 IX86_BUILTIN_PABSW128,
27596 IX86_BUILTIN_PABSD128,
27597
27598 /* AMDFAM10 - SSE4A New Instructions. */
27599 IX86_BUILTIN_MOVNTSD,
27600 IX86_BUILTIN_MOVNTSS,
27601 IX86_BUILTIN_EXTRQI,
27602 IX86_BUILTIN_EXTRQ,
27603 IX86_BUILTIN_INSERTQI,
27604 IX86_BUILTIN_INSERTQ,
27605
27606 /* SSE4.1. */
27607 IX86_BUILTIN_BLENDPD,
27608 IX86_BUILTIN_BLENDPS,
27609 IX86_BUILTIN_BLENDVPD,
27610 IX86_BUILTIN_BLENDVPS,
27611 IX86_BUILTIN_PBLENDVB128,
27612 IX86_BUILTIN_PBLENDW128,
27613
27614 IX86_BUILTIN_DPPD,
27615 IX86_BUILTIN_DPPS,
27616
27617 IX86_BUILTIN_INSERTPS128,
27618
27619 IX86_BUILTIN_MOVNTDQA,
27620 IX86_BUILTIN_MPSADBW128,
27621 IX86_BUILTIN_PACKUSDW128,
27622 IX86_BUILTIN_PCMPEQQ,
27623 IX86_BUILTIN_PHMINPOSUW128,
27624
27625 IX86_BUILTIN_PMAXSB128,
27626 IX86_BUILTIN_PMAXSD128,
27627 IX86_BUILTIN_PMAXUD128,
27628 IX86_BUILTIN_PMAXUW128,
27629
27630 IX86_BUILTIN_PMINSB128,
27631 IX86_BUILTIN_PMINSD128,
27632 IX86_BUILTIN_PMINUD128,
27633 IX86_BUILTIN_PMINUW128,
27634
27635 IX86_BUILTIN_PMOVSXBW128,
27636 IX86_BUILTIN_PMOVSXBD128,
27637 IX86_BUILTIN_PMOVSXBQ128,
27638 IX86_BUILTIN_PMOVSXWD128,
27639 IX86_BUILTIN_PMOVSXWQ128,
27640 IX86_BUILTIN_PMOVSXDQ128,
27641
27642 IX86_BUILTIN_PMOVZXBW128,
27643 IX86_BUILTIN_PMOVZXBD128,
27644 IX86_BUILTIN_PMOVZXBQ128,
27645 IX86_BUILTIN_PMOVZXWD128,
27646 IX86_BUILTIN_PMOVZXWQ128,
27647 IX86_BUILTIN_PMOVZXDQ128,
27648
27649 IX86_BUILTIN_PMULDQ128,
27650 IX86_BUILTIN_PMULLD128,
27651
27652 IX86_BUILTIN_ROUNDSD,
27653 IX86_BUILTIN_ROUNDSS,
27654
27655 IX86_BUILTIN_ROUNDPD,
27656 IX86_BUILTIN_ROUNDPS,
27657
27658 IX86_BUILTIN_FLOORPD,
27659 IX86_BUILTIN_CEILPD,
27660 IX86_BUILTIN_TRUNCPD,
27661 IX86_BUILTIN_RINTPD,
27662 IX86_BUILTIN_ROUNDPD_AZ,
27663
27664 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
27665 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
27666 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
27667
27668 IX86_BUILTIN_FLOORPS,
27669 IX86_BUILTIN_CEILPS,
27670 IX86_BUILTIN_TRUNCPS,
27671 IX86_BUILTIN_RINTPS,
27672 IX86_BUILTIN_ROUNDPS_AZ,
27673
27674 IX86_BUILTIN_FLOORPS_SFIX,
27675 IX86_BUILTIN_CEILPS_SFIX,
27676 IX86_BUILTIN_ROUNDPS_AZ_SFIX,
27677
27678 IX86_BUILTIN_PTESTZ,
27679 IX86_BUILTIN_PTESTC,
27680 IX86_BUILTIN_PTESTNZC,
27681
27682 IX86_BUILTIN_VEC_INIT_V2SI,
27683 IX86_BUILTIN_VEC_INIT_V4HI,
27684 IX86_BUILTIN_VEC_INIT_V8QI,
27685 IX86_BUILTIN_VEC_EXT_V2DF,
27686 IX86_BUILTIN_VEC_EXT_V2DI,
27687 IX86_BUILTIN_VEC_EXT_V4SF,
27688 IX86_BUILTIN_VEC_EXT_V4SI,
27689 IX86_BUILTIN_VEC_EXT_V8HI,
27690 IX86_BUILTIN_VEC_EXT_V2SI,
27691 IX86_BUILTIN_VEC_EXT_V4HI,
27692 IX86_BUILTIN_VEC_EXT_V16QI,
27693 IX86_BUILTIN_VEC_SET_V2DI,
27694 IX86_BUILTIN_VEC_SET_V4SF,
27695 IX86_BUILTIN_VEC_SET_V4SI,
27696 IX86_BUILTIN_VEC_SET_V8HI,
27697 IX86_BUILTIN_VEC_SET_V4HI,
27698 IX86_BUILTIN_VEC_SET_V16QI,
27699
27700 IX86_BUILTIN_VEC_PACK_SFIX,
27701 IX86_BUILTIN_VEC_PACK_SFIX256,
27702
27703 /* SSE4.2. */
27704 IX86_BUILTIN_CRC32QI,
27705 IX86_BUILTIN_CRC32HI,
27706 IX86_BUILTIN_CRC32SI,
27707 IX86_BUILTIN_CRC32DI,
27708
27709 IX86_BUILTIN_PCMPESTRI128,
27710 IX86_BUILTIN_PCMPESTRM128,
27711 IX86_BUILTIN_PCMPESTRA128,
27712 IX86_BUILTIN_PCMPESTRC128,
27713 IX86_BUILTIN_PCMPESTRO128,
27714 IX86_BUILTIN_PCMPESTRS128,
27715 IX86_BUILTIN_PCMPESTRZ128,
27716 IX86_BUILTIN_PCMPISTRI128,
27717 IX86_BUILTIN_PCMPISTRM128,
27718 IX86_BUILTIN_PCMPISTRA128,
27719 IX86_BUILTIN_PCMPISTRC128,
27720 IX86_BUILTIN_PCMPISTRO128,
27721 IX86_BUILTIN_PCMPISTRS128,
27722 IX86_BUILTIN_PCMPISTRZ128,
27723
27724 IX86_BUILTIN_PCMPGTQ,
27725
27726 /* AES instructions */
27727 IX86_BUILTIN_AESENC128,
27728 IX86_BUILTIN_AESENCLAST128,
27729 IX86_BUILTIN_AESDEC128,
27730 IX86_BUILTIN_AESDECLAST128,
27731 IX86_BUILTIN_AESIMC128,
27732 IX86_BUILTIN_AESKEYGENASSIST128,
27733
27734 /* PCLMUL instruction */
27735 IX86_BUILTIN_PCLMULQDQ128,
27736
27737 /* AVX */
27738 IX86_BUILTIN_ADDPD256,
27739 IX86_BUILTIN_ADDPS256,
27740 IX86_BUILTIN_ADDSUBPD256,
27741 IX86_BUILTIN_ADDSUBPS256,
27742 IX86_BUILTIN_ANDPD256,
27743 IX86_BUILTIN_ANDPS256,
27744 IX86_BUILTIN_ANDNPD256,
27745 IX86_BUILTIN_ANDNPS256,
27746 IX86_BUILTIN_BLENDPD256,
27747 IX86_BUILTIN_BLENDPS256,
27748 IX86_BUILTIN_BLENDVPD256,
27749 IX86_BUILTIN_BLENDVPS256,
27750 IX86_BUILTIN_DIVPD256,
27751 IX86_BUILTIN_DIVPS256,
27752 IX86_BUILTIN_DPPS256,
27753 IX86_BUILTIN_HADDPD256,
27754 IX86_BUILTIN_HADDPS256,
27755 IX86_BUILTIN_HSUBPD256,
27756 IX86_BUILTIN_HSUBPS256,
27757 IX86_BUILTIN_MAXPD256,
27758 IX86_BUILTIN_MAXPS256,
27759 IX86_BUILTIN_MINPD256,
27760 IX86_BUILTIN_MINPS256,
27761 IX86_BUILTIN_MULPD256,
27762 IX86_BUILTIN_MULPS256,
27763 IX86_BUILTIN_ORPD256,
27764 IX86_BUILTIN_ORPS256,
27765 IX86_BUILTIN_SHUFPD256,
27766 IX86_BUILTIN_SHUFPS256,
27767 IX86_BUILTIN_SUBPD256,
27768 IX86_BUILTIN_SUBPS256,
27769 IX86_BUILTIN_XORPD256,
27770 IX86_BUILTIN_XORPS256,
27771 IX86_BUILTIN_CMPSD,
27772 IX86_BUILTIN_CMPSS,
27773 IX86_BUILTIN_CMPPD,
27774 IX86_BUILTIN_CMPPS,
27775 IX86_BUILTIN_CMPPD256,
27776 IX86_BUILTIN_CMPPS256,
27777 IX86_BUILTIN_CVTDQ2PD256,
27778 IX86_BUILTIN_CVTDQ2PS256,
27779 IX86_BUILTIN_CVTPD2PS256,
27780 IX86_BUILTIN_CVTPS2DQ256,
27781 IX86_BUILTIN_CVTPS2PD256,
27782 IX86_BUILTIN_CVTTPD2DQ256,
27783 IX86_BUILTIN_CVTPD2DQ256,
27784 IX86_BUILTIN_CVTTPS2DQ256,
27785 IX86_BUILTIN_EXTRACTF128PD256,
27786 IX86_BUILTIN_EXTRACTF128PS256,
27787 IX86_BUILTIN_EXTRACTF128SI256,
27788 IX86_BUILTIN_VZEROALL,
27789 IX86_BUILTIN_VZEROUPPER,
27790 IX86_BUILTIN_VPERMILVARPD,
27791 IX86_BUILTIN_VPERMILVARPS,
27792 IX86_BUILTIN_VPERMILVARPD256,
27793 IX86_BUILTIN_VPERMILVARPS256,
27794 IX86_BUILTIN_VPERMILPD,
27795 IX86_BUILTIN_VPERMILPS,
27796 IX86_BUILTIN_VPERMILPD256,
27797 IX86_BUILTIN_VPERMILPS256,
27798 IX86_BUILTIN_VPERMIL2PD,
27799 IX86_BUILTIN_VPERMIL2PS,
27800 IX86_BUILTIN_VPERMIL2PD256,
27801 IX86_BUILTIN_VPERMIL2PS256,
27802 IX86_BUILTIN_VPERM2F128PD256,
27803 IX86_BUILTIN_VPERM2F128PS256,
27804 IX86_BUILTIN_VPERM2F128SI256,
27805 IX86_BUILTIN_VBROADCASTSS,
27806 IX86_BUILTIN_VBROADCASTSD256,
27807 IX86_BUILTIN_VBROADCASTSS256,
27808 IX86_BUILTIN_VBROADCASTPD256,
27809 IX86_BUILTIN_VBROADCASTPS256,
27810 IX86_BUILTIN_VINSERTF128PD256,
27811 IX86_BUILTIN_VINSERTF128PS256,
27812 IX86_BUILTIN_VINSERTF128SI256,
27813 IX86_BUILTIN_LOADUPD256,
27814 IX86_BUILTIN_LOADUPS256,
27815 IX86_BUILTIN_STOREUPD256,
27816 IX86_BUILTIN_STOREUPS256,
27817 IX86_BUILTIN_LDDQU256,
27818 IX86_BUILTIN_MOVNTDQ256,
27819 IX86_BUILTIN_MOVNTPD256,
27820 IX86_BUILTIN_MOVNTPS256,
27821 IX86_BUILTIN_LOADDQU256,
27822 IX86_BUILTIN_STOREDQU256,
27823 IX86_BUILTIN_MASKLOADPD,
27824 IX86_BUILTIN_MASKLOADPS,
27825 IX86_BUILTIN_MASKSTOREPD,
27826 IX86_BUILTIN_MASKSTOREPS,
27827 IX86_BUILTIN_MASKLOADPD256,
27828 IX86_BUILTIN_MASKLOADPS256,
27829 IX86_BUILTIN_MASKSTOREPD256,
27830 IX86_BUILTIN_MASKSTOREPS256,
27831 IX86_BUILTIN_MOVSHDUP256,
27832 IX86_BUILTIN_MOVSLDUP256,
27833 IX86_BUILTIN_MOVDDUP256,
27834
27835 IX86_BUILTIN_SQRTPD256,
27836 IX86_BUILTIN_SQRTPS256,
27837 IX86_BUILTIN_SQRTPS_NR256,
27838 IX86_BUILTIN_RSQRTPS256,
27839 IX86_BUILTIN_RSQRTPS_NR256,
27840
27841 IX86_BUILTIN_RCPPS256,
27842
27843 IX86_BUILTIN_ROUNDPD256,
27844 IX86_BUILTIN_ROUNDPS256,
27845
27846 IX86_BUILTIN_FLOORPD256,
27847 IX86_BUILTIN_CEILPD256,
27848 IX86_BUILTIN_TRUNCPD256,
27849 IX86_BUILTIN_RINTPD256,
27850 IX86_BUILTIN_ROUNDPD_AZ256,
27851
27852 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
27853 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
27854 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
27855
27856 IX86_BUILTIN_FLOORPS256,
27857 IX86_BUILTIN_CEILPS256,
27858 IX86_BUILTIN_TRUNCPS256,
27859 IX86_BUILTIN_RINTPS256,
27860 IX86_BUILTIN_ROUNDPS_AZ256,
27861
27862 IX86_BUILTIN_FLOORPS_SFIX256,
27863 IX86_BUILTIN_CEILPS_SFIX256,
27864 IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
27865
27866 IX86_BUILTIN_UNPCKHPD256,
27867 IX86_BUILTIN_UNPCKLPD256,
27868 IX86_BUILTIN_UNPCKHPS256,
27869 IX86_BUILTIN_UNPCKLPS256,
27870
27871 IX86_BUILTIN_SI256_SI,
27872 IX86_BUILTIN_PS256_PS,
27873 IX86_BUILTIN_PD256_PD,
27874 IX86_BUILTIN_SI_SI256,
27875 IX86_BUILTIN_PS_PS256,
27876 IX86_BUILTIN_PD_PD256,
27877
27878 IX86_BUILTIN_VTESTZPD,
27879 IX86_BUILTIN_VTESTCPD,
27880 IX86_BUILTIN_VTESTNZCPD,
27881 IX86_BUILTIN_VTESTZPS,
27882 IX86_BUILTIN_VTESTCPS,
27883 IX86_BUILTIN_VTESTNZCPS,
27884 IX86_BUILTIN_VTESTZPD256,
27885 IX86_BUILTIN_VTESTCPD256,
27886 IX86_BUILTIN_VTESTNZCPD256,
27887 IX86_BUILTIN_VTESTZPS256,
27888 IX86_BUILTIN_VTESTCPS256,
27889 IX86_BUILTIN_VTESTNZCPS256,
27890 IX86_BUILTIN_PTESTZ256,
27891 IX86_BUILTIN_PTESTC256,
27892 IX86_BUILTIN_PTESTNZC256,
27893
27894 IX86_BUILTIN_MOVMSKPD256,
27895 IX86_BUILTIN_MOVMSKPS256,
27896
27897 /* AVX2 */
27898 IX86_BUILTIN_MPSADBW256,
27899 IX86_BUILTIN_PABSB256,
27900 IX86_BUILTIN_PABSW256,
27901 IX86_BUILTIN_PABSD256,
27902 IX86_BUILTIN_PACKSSDW256,
27903 IX86_BUILTIN_PACKSSWB256,
27904 IX86_BUILTIN_PACKUSDW256,
27905 IX86_BUILTIN_PACKUSWB256,
27906 IX86_BUILTIN_PADDB256,
27907 IX86_BUILTIN_PADDW256,
27908 IX86_BUILTIN_PADDD256,
27909 IX86_BUILTIN_PADDQ256,
27910 IX86_BUILTIN_PADDSB256,
27911 IX86_BUILTIN_PADDSW256,
27912 IX86_BUILTIN_PADDUSB256,
27913 IX86_BUILTIN_PADDUSW256,
27914 IX86_BUILTIN_PALIGNR256,
27915 IX86_BUILTIN_AND256I,
27916 IX86_BUILTIN_ANDNOT256I,
27917 IX86_BUILTIN_PAVGB256,
27918 IX86_BUILTIN_PAVGW256,
27919 IX86_BUILTIN_PBLENDVB256,
27920 IX86_BUILTIN_PBLENDVW256,
27921 IX86_BUILTIN_PCMPEQB256,
27922 IX86_BUILTIN_PCMPEQW256,
27923 IX86_BUILTIN_PCMPEQD256,
27924 IX86_BUILTIN_PCMPEQQ256,
27925 IX86_BUILTIN_PCMPGTB256,
27926 IX86_BUILTIN_PCMPGTW256,
27927 IX86_BUILTIN_PCMPGTD256,
27928 IX86_BUILTIN_PCMPGTQ256,
27929 IX86_BUILTIN_PHADDW256,
27930 IX86_BUILTIN_PHADDD256,
27931 IX86_BUILTIN_PHADDSW256,
27932 IX86_BUILTIN_PHSUBW256,
27933 IX86_BUILTIN_PHSUBD256,
27934 IX86_BUILTIN_PHSUBSW256,
27935 IX86_BUILTIN_PMADDUBSW256,
27936 IX86_BUILTIN_PMADDWD256,
27937 IX86_BUILTIN_PMAXSB256,
27938 IX86_BUILTIN_PMAXSW256,
27939 IX86_BUILTIN_PMAXSD256,
27940 IX86_BUILTIN_PMAXUB256,
27941 IX86_BUILTIN_PMAXUW256,
27942 IX86_BUILTIN_PMAXUD256,
27943 IX86_BUILTIN_PMINSB256,
27944 IX86_BUILTIN_PMINSW256,
27945 IX86_BUILTIN_PMINSD256,
27946 IX86_BUILTIN_PMINUB256,
27947 IX86_BUILTIN_PMINUW256,
27948 IX86_BUILTIN_PMINUD256,
27949 IX86_BUILTIN_PMOVMSKB256,
27950 IX86_BUILTIN_PMOVSXBW256,
27951 IX86_BUILTIN_PMOVSXBD256,
27952 IX86_BUILTIN_PMOVSXBQ256,
27953 IX86_BUILTIN_PMOVSXWD256,
27954 IX86_BUILTIN_PMOVSXWQ256,
27955 IX86_BUILTIN_PMOVSXDQ256,
27956 IX86_BUILTIN_PMOVZXBW256,
27957 IX86_BUILTIN_PMOVZXBD256,
27958 IX86_BUILTIN_PMOVZXBQ256,
27959 IX86_BUILTIN_PMOVZXWD256,
27960 IX86_BUILTIN_PMOVZXWQ256,
27961 IX86_BUILTIN_PMOVZXDQ256,
27962 IX86_BUILTIN_PMULDQ256,
27963 IX86_BUILTIN_PMULHRSW256,
27964 IX86_BUILTIN_PMULHUW256,
27965 IX86_BUILTIN_PMULHW256,
27966 IX86_BUILTIN_PMULLW256,
27967 IX86_BUILTIN_PMULLD256,
27968 IX86_BUILTIN_PMULUDQ256,
27969 IX86_BUILTIN_POR256,
27970 IX86_BUILTIN_PSADBW256,
27971 IX86_BUILTIN_PSHUFB256,
27972 IX86_BUILTIN_PSHUFD256,
27973 IX86_BUILTIN_PSHUFHW256,
27974 IX86_BUILTIN_PSHUFLW256,
27975 IX86_BUILTIN_PSIGNB256,
27976 IX86_BUILTIN_PSIGNW256,
27977 IX86_BUILTIN_PSIGND256,
27978 IX86_BUILTIN_PSLLDQI256,
27979 IX86_BUILTIN_PSLLWI256,
27980 IX86_BUILTIN_PSLLW256,
27981 IX86_BUILTIN_PSLLDI256,
27982 IX86_BUILTIN_PSLLD256,
27983 IX86_BUILTIN_PSLLQI256,
27984 IX86_BUILTIN_PSLLQ256,
27985 IX86_BUILTIN_PSRAWI256,
27986 IX86_BUILTIN_PSRAW256,
27987 IX86_BUILTIN_PSRADI256,
27988 IX86_BUILTIN_PSRAD256,
27989 IX86_BUILTIN_PSRLDQI256,
27990 IX86_BUILTIN_PSRLWI256,
27991 IX86_BUILTIN_PSRLW256,
27992 IX86_BUILTIN_PSRLDI256,
27993 IX86_BUILTIN_PSRLD256,
27994 IX86_BUILTIN_PSRLQI256,
27995 IX86_BUILTIN_PSRLQ256,
27996 IX86_BUILTIN_PSUBB256,
27997 IX86_BUILTIN_PSUBW256,
27998 IX86_BUILTIN_PSUBD256,
27999 IX86_BUILTIN_PSUBQ256,
28000 IX86_BUILTIN_PSUBSB256,
28001 IX86_BUILTIN_PSUBSW256,
28002 IX86_BUILTIN_PSUBUSB256,
28003 IX86_BUILTIN_PSUBUSW256,
28004 IX86_BUILTIN_PUNPCKHBW256,
28005 IX86_BUILTIN_PUNPCKHWD256,
28006 IX86_BUILTIN_PUNPCKHDQ256,
28007 IX86_BUILTIN_PUNPCKHQDQ256,
28008 IX86_BUILTIN_PUNPCKLBW256,
28009 IX86_BUILTIN_PUNPCKLWD256,
28010 IX86_BUILTIN_PUNPCKLDQ256,
28011 IX86_BUILTIN_PUNPCKLQDQ256,
28012 IX86_BUILTIN_PXOR256,
28013 IX86_BUILTIN_MOVNTDQA256,
28014 IX86_BUILTIN_VBROADCASTSS_PS,
28015 IX86_BUILTIN_VBROADCASTSS_PS256,
28016 IX86_BUILTIN_VBROADCASTSD_PD256,
28017 IX86_BUILTIN_VBROADCASTSI256,
28018 IX86_BUILTIN_PBLENDD256,
28019 IX86_BUILTIN_PBLENDD128,
28020 IX86_BUILTIN_PBROADCASTB256,
28021 IX86_BUILTIN_PBROADCASTW256,
28022 IX86_BUILTIN_PBROADCASTD256,
28023 IX86_BUILTIN_PBROADCASTQ256,
28024 IX86_BUILTIN_PBROADCASTB128,
28025 IX86_BUILTIN_PBROADCASTW128,
28026 IX86_BUILTIN_PBROADCASTD128,
28027 IX86_BUILTIN_PBROADCASTQ128,
28028 IX86_BUILTIN_VPERMVARSI256,
28029 IX86_BUILTIN_VPERMDF256,
28030 IX86_BUILTIN_VPERMVARSF256,
28031 IX86_BUILTIN_VPERMDI256,
28032 IX86_BUILTIN_VPERMTI256,
28033 IX86_BUILTIN_VEXTRACT128I256,
28034 IX86_BUILTIN_VINSERT128I256,
28035 IX86_BUILTIN_MASKLOADD,
28036 IX86_BUILTIN_MASKLOADQ,
28037 IX86_BUILTIN_MASKLOADD256,
28038 IX86_BUILTIN_MASKLOADQ256,
28039 IX86_BUILTIN_MASKSTORED,
28040 IX86_BUILTIN_MASKSTOREQ,
28041 IX86_BUILTIN_MASKSTORED256,
28042 IX86_BUILTIN_MASKSTOREQ256,
28043 IX86_BUILTIN_PSLLVV4DI,
28044 IX86_BUILTIN_PSLLVV2DI,
28045 IX86_BUILTIN_PSLLVV8SI,
28046 IX86_BUILTIN_PSLLVV4SI,
28047 IX86_BUILTIN_PSRAVV8SI,
28048 IX86_BUILTIN_PSRAVV4SI,
28049 IX86_BUILTIN_PSRLVV4DI,
28050 IX86_BUILTIN_PSRLVV2DI,
28051 IX86_BUILTIN_PSRLVV8SI,
28052 IX86_BUILTIN_PSRLVV4SI,
28053
28054 IX86_BUILTIN_GATHERSIV2DF,
28055 IX86_BUILTIN_GATHERSIV4DF,
28056 IX86_BUILTIN_GATHERDIV2DF,
28057 IX86_BUILTIN_GATHERDIV4DF,
28058 IX86_BUILTIN_GATHERSIV4SF,
28059 IX86_BUILTIN_GATHERSIV8SF,
28060 IX86_BUILTIN_GATHERDIV4SF,
28061 IX86_BUILTIN_GATHERDIV8SF,
28062 IX86_BUILTIN_GATHERSIV2DI,
28063 IX86_BUILTIN_GATHERSIV4DI,
28064 IX86_BUILTIN_GATHERDIV2DI,
28065 IX86_BUILTIN_GATHERDIV4DI,
28066 IX86_BUILTIN_GATHERSIV4SI,
28067 IX86_BUILTIN_GATHERSIV8SI,
28068 IX86_BUILTIN_GATHERDIV4SI,
28069 IX86_BUILTIN_GATHERDIV8SI,
28070
28071 /* AVX512F */
28072 IX86_BUILTIN_ADDPD512,
28073 IX86_BUILTIN_ADDPS512,
28074 IX86_BUILTIN_ADDSD_ROUND,
28075 IX86_BUILTIN_ADDSS_ROUND,
28076 IX86_BUILTIN_ALIGND512,
28077 IX86_BUILTIN_ALIGNQ512,
28078 IX86_BUILTIN_BLENDMD512,
28079 IX86_BUILTIN_BLENDMPD512,
28080 IX86_BUILTIN_BLENDMPS512,
28081 IX86_BUILTIN_BLENDMQ512,
28082 IX86_BUILTIN_BROADCASTF32X4_512,
28083 IX86_BUILTIN_BROADCASTF64X4_512,
28084 IX86_BUILTIN_BROADCASTI32X4_512,
28085 IX86_BUILTIN_BROADCASTI64X4_512,
28086 IX86_BUILTIN_BROADCASTSD512,
28087 IX86_BUILTIN_BROADCASTSS512,
28088 IX86_BUILTIN_CMPD512,
28089 IX86_BUILTIN_CMPPD512,
28090 IX86_BUILTIN_CMPPS512,
28091 IX86_BUILTIN_CMPQ512,
28092 IX86_BUILTIN_CMPSD_MASK,
28093 IX86_BUILTIN_CMPSS_MASK,
28094 IX86_BUILTIN_COMIDF,
28095 IX86_BUILTIN_COMISF,
28096 IX86_BUILTIN_COMPRESSPD512,
28097 IX86_BUILTIN_COMPRESSPDSTORE512,
28098 IX86_BUILTIN_COMPRESSPS512,
28099 IX86_BUILTIN_COMPRESSPSSTORE512,
28100 IX86_BUILTIN_CVTDQ2PD512,
28101 IX86_BUILTIN_CVTDQ2PS512,
28102 IX86_BUILTIN_CVTPD2DQ512,
28103 IX86_BUILTIN_CVTPD2PS512,
28104 IX86_BUILTIN_CVTPD2UDQ512,
28105 IX86_BUILTIN_CVTPH2PS512,
28106 IX86_BUILTIN_CVTPS2DQ512,
28107 IX86_BUILTIN_CVTPS2PD512,
28108 IX86_BUILTIN_CVTPS2PH512,
28109 IX86_BUILTIN_CVTPS2UDQ512,
28110 IX86_BUILTIN_CVTSD2SS_ROUND,
28111 IX86_BUILTIN_CVTSI2SD64,
28112 IX86_BUILTIN_CVTSI2SS32,
28113 IX86_BUILTIN_CVTSI2SS64,
28114 IX86_BUILTIN_CVTSS2SD_ROUND,
28115 IX86_BUILTIN_CVTTPD2DQ512,
28116 IX86_BUILTIN_CVTTPD2UDQ512,
28117 IX86_BUILTIN_CVTTPS2DQ512,
28118 IX86_BUILTIN_CVTTPS2UDQ512,
28119 IX86_BUILTIN_CVTUDQ2PD512,
28120 IX86_BUILTIN_CVTUDQ2PS512,
28121 IX86_BUILTIN_CVTUSI2SD32,
28122 IX86_BUILTIN_CVTUSI2SD64,
28123 IX86_BUILTIN_CVTUSI2SS32,
28124 IX86_BUILTIN_CVTUSI2SS64,
28125 IX86_BUILTIN_DIVPD512,
28126 IX86_BUILTIN_DIVPS512,
28127 IX86_BUILTIN_DIVSD_ROUND,
28128 IX86_BUILTIN_DIVSS_ROUND,
28129 IX86_BUILTIN_EXPANDPD512,
28130 IX86_BUILTIN_EXPANDPD512Z,
28131 IX86_BUILTIN_EXPANDPDLOAD512,
28132 IX86_BUILTIN_EXPANDPDLOAD512Z,
28133 IX86_BUILTIN_EXPANDPS512,
28134 IX86_BUILTIN_EXPANDPS512Z,
28135 IX86_BUILTIN_EXPANDPSLOAD512,
28136 IX86_BUILTIN_EXPANDPSLOAD512Z,
28137 IX86_BUILTIN_EXTRACTF32X4,
28138 IX86_BUILTIN_EXTRACTF64X4,
28139 IX86_BUILTIN_EXTRACTI32X4,
28140 IX86_BUILTIN_EXTRACTI64X4,
28141 IX86_BUILTIN_FIXUPIMMPD512_MASK,
28142 IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
28143 IX86_BUILTIN_FIXUPIMMPS512_MASK,
28144 IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
28145 IX86_BUILTIN_FIXUPIMMSD128_MASK,
28146 IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
28147 IX86_BUILTIN_FIXUPIMMSS128_MASK,
28148 IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
28149 IX86_BUILTIN_GETEXPPD512,
28150 IX86_BUILTIN_GETEXPPS512,
28151 IX86_BUILTIN_GETEXPSD128,
28152 IX86_BUILTIN_GETEXPSS128,
28153 IX86_BUILTIN_GETMANTPD512,
28154 IX86_BUILTIN_GETMANTPS512,
28155 IX86_BUILTIN_GETMANTSD128,
28156 IX86_BUILTIN_GETMANTSS128,
28157 IX86_BUILTIN_INSERTF32X4,
28158 IX86_BUILTIN_INSERTF64X4,
28159 IX86_BUILTIN_INSERTI32X4,
28160 IX86_BUILTIN_INSERTI64X4,
28161 IX86_BUILTIN_LOADAPD512,
28162 IX86_BUILTIN_LOADAPS512,
28163 IX86_BUILTIN_LOADDQUDI512,
28164 IX86_BUILTIN_LOADDQUSI512,
28165 IX86_BUILTIN_LOADUPD512,
28166 IX86_BUILTIN_LOADUPS512,
28167 IX86_BUILTIN_MAXPD512,
28168 IX86_BUILTIN_MAXPS512,
28169 IX86_BUILTIN_MAXSD_ROUND,
28170 IX86_BUILTIN_MAXSS_ROUND,
28171 IX86_BUILTIN_MINPD512,
28172 IX86_BUILTIN_MINPS512,
28173 IX86_BUILTIN_MINSD_ROUND,
28174 IX86_BUILTIN_MINSS_ROUND,
28175 IX86_BUILTIN_MOVAPD512,
28176 IX86_BUILTIN_MOVAPS512,
28177 IX86_BUILTIN_MOVDDUP512,
28178 IX86_BUILTIN_MOVDQA32LOAD512,
28179 IX86_BUILTIN_MOVDQA32STORE512,
28180 IX86_BUILTIN_MOVDQA32_512,
28181 IX86_BUILTIN_MOVDQA64LOAD512,
28182 IX86_BUILTIN_MOVDQA64STORE512,
28183 IX86_BUILTIN_MOVDQA64_512,
28184 IX86_BUILTIN_MOVNTDQ512,
28185 IX86_BUILTIN_MOVNTDQA512,
28186 IX86_BUILTIN_MOVNTPD512,
28187 IX86_BUILTIN_MOVNTPS512,
28188 IX86_BUILTIN_MOVSHDUP512,
28189 IX86_BUILTIN_MOVSLDUP512,
28190 IX86_BUILTIN_MULPD512,
28191 IX86_BUILTIN_MULPS512,
28192 IX86_BUILTIN_MULSD_ROUND,
28193 IX86_BUILTIN_MULSS_ROUND,
28194 IX86_BUILTIN_PABSD512,
28195 IX86_BUILTIN_PABSQ512,
28196 IX86_BUILTIN_PADDD512,
28197 IX86_BUILTIN_PADDQ512,
28198 IX86_BUILTIN_PANDD512,
28199 IX86_BUILTIN_PANDND512,
28200 IX86_BUILTIN_PANDNQ512,
28201 IX86_BUILTIN_PANDQ512,
28202 IX86_BUILTIN_PBROADCASTD512,
28203 IX86_BUILTIN_PBROADCASTD512_GPR,
28204 IX86_BUILTIN_PBROADCASTMB512,
28205 IX86_BUILTIN_PBROADCASTMW512,
28206 IX86_BUILTIN_PBROADCASTQ512,
28207 IX86_BUILTIN_PBROADCASTQ512_GPR,
28208 IX86_BUILTIN_PBROADCASTQ512_MEM,
28209 IX86_BUILTIN_PCMPEQD512_MASK,
28210 IX86_BUILTIN_PCMPEQQ512_MASK,
28211 IX86_BUILTIN_PCMPGTD512_MASK,
28212 IX86_BUILTIN_PCMPGTQ512_MASK,
28213 IX86_BUILTIN_PCOMPRESSD512,
28214 IX86_BUILTIN_PCOMPRESSDSTORE512,
28215 IX86_BUILTIN_PCOMPRESSQ512,
28216 IX86_BUILTIN_PCOMPRESSQSTORE512,
28217 IX86_BUILTIN_PEXPANDD512,
28218 IX86_BUILTIN_PEXPANDD512Z,
28219 IX86_BUILTIN_PEXPANDDLOAD512,
28220 IX86_BUILTIN_PEXPANDDLOAD512Z,
28221 IX86_BUILTIN_PEXPANDQ512,
28222 IX86_BUILTIN_PEXPANDQ512Z,
28223 IX86_BUILTIN_PEXPANDQLOAD512,
28224 IX86_BUILTIN_PEXPANDQLOAD512Z,
28225 IX86_BUILTIN_PMAXSD512,
28226 IX86_BUILTIN_PMAXSQ512,
28227 IX86_BUILTIN_PMAXUD512,
28228 IX86_BUILTIN_PMAXUQ512,
28229 IX86_BUILTIN_PMINSD512,
28230 IX86_BUILTIN_PMINSQ512,
28231 IX86_BUILTIN_PMINUD512,
28232 IX86_BUILTIN_PMINUQ512,
28233 IX86_BUILTIN_PMOVDB512,
28234 IX86_BUILTIN_PMOVDB512_MEM,
28235 IX86_BUILTIN_PMOVDW512,
28236 IX86_BUILTIN_PMOVDW512_MEM,
28237 IX86_BUILTIN_PMOVQB512,
28238 IX86_BUILTIN_PMOVQB512_MEM,
28239 IX86_BUILTIN_PMOVQD512,
28240 IX86_BUILTIN_PMOVQD512_MEM,
28241 IX86_BUILTIN_PMOVQW512,
28242 IX86_BUILTIN_PMOVQW512_MEM,
28243 IX86_BUILTIN_PMOVSDB512,
28244 IX86_BUILTIN_PMOVSDB512_MEM,
28245 IX86_BUILTIN_PMOVSDW512,
28246 IX86_BUILTIN_PMOVSDW512_MEM,
28247 IX86_BUILTIN_PMOVSQB512,
28248 IX86_BUILTIN_PMOVSQB512_MEM,
28249 IX86_BUILTIN_PMOVSQD512,
28250 IX86_BUILTIN_PMOVSQD512_MEM,
28251 IX86_BUILTIN_PMOVSQW512,
28252 IX86_BUILTIN_PMOVSQW512_MEM,
28253 IX86_BUILTIN_PMOVSXBD512,
28254 IX86_BUILTIN_PMOVSXBQ512,
28255 IX86_BUILTIN_PMOVSXDQ512,
28256 IX86_BUILTIN_PMOVSXWD512,
28257 IX86_BUILTIN_PMOVSXWQ512,
28258 IX86_BUILTIN_PMOVUSDB512,
28259 IX86_BUILTIN_PMOVUSDB512_MEM,
28260 IX86_BUILTIN_PMOVUSDW512,
28261 IX86_BUILTIN_PMOVUSDW512_MEM,
28262 IX86_BUILTIN_PMOVUSQB512,
28263 IX86_BUILTIN_PMOVUSQB512_MEM,
28264 IX86_BUILTIN_PMOVUSQD512,
28265 IX86_BUILTIN_PMOVUSQD512_MEM,
28266 IX86_BUILTIN_PMOVUSQW512,
28267 IX86_BUILTIN_PMOVUSQW512_MEM,
28268 IX86_BUILTIN_PMOVZXBD512,
28269 IX86_BUILTIN_PMOVZXBQ512,
28270 IX86_BUILTIN_PMOVZXDQ512,
28271 IX86_BUILTIN_PMOVZXWD512,
28272 IX86_BUILTIN_PMOVZXWQ512,
28273 IX86_BUILTIN_PMULDQ512,
28274 IX86_BUILTIN_PMULLD512,
28275 IX86_BUILTIN_PMULUDQ512,
28276 IX86_BUILTIN_PORD512,
28277 IX86_BUILTIN_PORQ512,
28278 IX86_BUILTIN_PROLD512,
28279 IX86_BUILTIN_PROLQ512,
28280 IX86_BUILTIN_PROLVD512,
28281 IX86_BUILTIN_PROLVQ512,
28282 IX86_BUILTIN_PRORD512,
28283 IX86_BUILTIN_PRORQ512,
28284 IX86_BUILTIN_PRORVD512,
28285 IX86_BUILTIN_PRORVQ512,
28286 IX86_BUILTIN_PSHUFD512,
28287 IX86_BUILTIN_PSLLD512,
28288 IX86_BUILTIN_PSLLDI512,
28289 IX86_BUILTIN_PSLLQ512,
28290 IX86_BUILTIN_PSLLQI512,
28291 IX86_BUILTIN_PSLLVV16SI,
28292 IX86_BUILTIN_PSLLVV8DI,
28293 IX86_BUILTIN_PSRAD512,
28294 IX86_BUILTIN_PSRADI512,
28295 IX86_BUILTIN_PSRAQ512,
28296 IX86_BUILTIN_PSRAQI512,
28297 IX86_BUILTIN_PSRAVV16SI,
28298 IX86_BUILTIN_PSRAVV8DI,
28299 IX86_BUILTIN_PSRLD512,
28300 IX86_BUILTIN_PSRLDI512,
28301 IX86_BUILTIN_PSRLQ512,
28302 IX86_BUILTIN_PSRLQI512,
28303 IX86_BUILTIN_PSRLVV16SI,
28304 IX86_BUILTIN_PSRLVV8DI,
28305 IX86_BUILTIN_PSUBD512,
28306 IX86_BUILTIN_PSUBQ512,
28307 IX86_BUILTIN_PTESTMD512,
28308 IX86_BUILTIN_PTESTMQ512,
28309 IX86_BUILTIN_PTESTNMD512,
28310 IX86_BUILTIN_PTESTNMQ512,
28311 IX86_BUILTIN_PUNPCKHDQ512,
28312 IX86_BUILTIN_PUNPCKHQDQ512,
28313 IX86_BUILTIN_PUNPCKLDQ512,
28314 IX86_BUILTIN_PUNPCKLQDQ512,
28315 IX86_BUILTIN_PXORD512,
28316 IX86_BUILTIN_PXORQ512,
28317 IX86_BUILTIN_RCP14PD512,
28318 IX86_BUILTIN_RCP14PS512,
28319 IX86_BUILTIN_RCP14SD,
28320 IX86_BUILTIN_RCP14SS,
28321 IX86_BUILTIN_RNDSCALEPD,
28322 IX86_BUILTIN_RNDSCALEPS,
28323 IX86_BUILTIN_RNDSCALESD,
28324 IX86_BUILTIN_RNDSCALESS,
28325 IX86_BUILTIN_RSQRT14PD512,
28326 IX86_BUILTIN_RSQRT14PS512,
28327 IX86_BUILTIN_RSQRT14SD,
28328 IX86_BUILTIN_RSQRT14SS,
28329 IX86_BUILTIN_SCALEFPD512,
28330 IX86_BUILTIN_SCALEFPS512,
28331 IX86_BUILTIN_SCALEFSD,
28332 IX86_BUILTIN_SCALEFSS,
28333 IX86_BUILTIN_SHUFPD512,
28334 IX86_BUILTIN_SHUFPS512,
28335 IX86_BUILTIN_SHUF_F32x4,
28336 IX86_BUILTIN_SHUF_F64x2,
28337 IX86_BUILTIN_SHUF_I32x4,
28338 IX86_BUILTIN_SHUF_I64x2,
28339 IX86_BUILTIN_SQRTPD512,
28340 IX86_BUILTIN_SQRTPD512_MASK,
28341 IX86_BUILTIN_SQRTPS512_MASK,
28342 IX86_BUILTIN_SQRTPS_NR512,
28343 IX86_BUILTIN_SQRTSD_ROUND,
28344 IX86_BUILTIN_SQRTSS_ROUND,
28345 IX86_BUILTIN_STOREAPD512,
28346 IX86_BUILTIN_STOREAPS512,
28347 IX86_BUILTIN_STOREDQUDI512,
28348 IX86_BUILTIN_STOREDQUSI512,
28349 IX86_BUILTIN_STOREUPD512,
28350 IX86_BUILTIN_STOREUPS512,
28351 IX86_BUILTIN_SUBPD512,
28352 IX86_BUILTIN_SUBPS512,
28353 IX86_BUILTIN_SUBSD_ROUND,
28354 IX86_BUILTIN_SUBSS_ROUND,
28355 IX86_BUILTIN_UCMPD512,
28356 IX86_BUILTIN_UCMPQ512,
28357 IX86_BUILTIN_UNPCKHPD512,
28358 IX86_BUILTIN_UNPCKHPS512,
28359 IX86_BUILTIN_UNPCKLPD512,
28360 IX86_BUILTIN_UNPCKLPS512,
28361 IX86_BUILTIN_VCVTSD2SI32,
28362 IX86_BUILTIN_VCVTSD2SI64,
28363 IX86_BUILTIN_VCVTSD2USI32,
28364 IX86_BUILTIN_VCVTSD2USI64,
28365 IX86_BUILTIN_VCVTSS2SI32,
28366 IX86_BUILTIN_VCVTSS2SI64,
28367 IX86_BUILTIN_VCVTSS2USI32,
28368 IX86_BUILTIN_VCVTSS2USI64,
28369 IX86_BUILTIN_VCVTTSD2SI32,
28370 IX86_BUILTIN_VCVTTSD2SI64,
28371 IX86_BUILTIN_VCVTTSD2USI32,
28372 IX86_BUILTIN_VCVTTSD2USI64,
28373 IX86_BUILTIN_VCVTTSS2SI32,
28374 IX86_BUILTIN_VCVTTSS2SI64,
28375 IX86_BUILTIN_VCVTTSS2USI32,
28376 IX86_BUILTIN_VCVTTSS2USI64,
28377 IX86_BUILTIN_VFMADDPD512_MASK,
28378 IX86_BUILTIN_VFMADDPD512_MASK3,
28379 IX86_BUILTIN_VFMADDPD512_MASKZ,
28380 IX86_BUILTIN_VFMADDPS512_MASK,
28381 IX86_BUILTIN_VFMADDPS512_MASK3,
28382 IX86_BUILTIN_VFMADDPS512_MASKZ,
28383 IX86_BUILTIN_VFMADDSD3_ROUND,
28384 IX86_BUILTIN_VFMADDSS3_ROUND,
28385 IX86_BUILTIN_VFMADDSUBPD512_MASK,
28386 IX86_BUILTIN_VFMADDSUBPD512_MASK3,
28387 IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
28388 IX86_BUILTIN_VFMADDSUBPS512_MASK,
28389 IX86_BUILTIN_VFMADDSUBPS512_MASK3,
28390 IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
28391 IX86_BUILTIN_VFMSUBADDPD512_MASK3,
28392 IX86_BUILTIN_VFMSUBADDPS512_MASK3,
28393 IX86_BUILTIN_VFMSUBPD512_MASK3,
28394 IX86_BUILTIN_VFMSUBPS512_MASK3,
28395 IX86_BUILTIN_VFMSUBSD3_MASK3,
28396 IX86_BUILTIN_VFMSUBSS3_MASK3,
28397 IX86_BUILTIN_VFNMADDPD512_MASK,
28398 IX86_BUILTIN_VFNMADDPS512_MASK,
28399 IX86_BUILTIN_VFNMSUBPD512_MASK,
28400 IX86_BUILTIN_VFNMSUBPD512_MASK3,
28401 IX86_BUILTIN_VFNMSUBPS512_MASK,
28402 IX86_BUILTIN_VFNMSUBPS512_MASK3,
28403 IX86_BUILTIN_VPCLZCNTD512,
28404 IX86_BUILTIN_VPCLZCNTQ512,
28405 IX86_BUILTIN_VPCONFLICTD512,
28406 IX86_BUILTIN_VPCONFLICTQ512,
28407 IX86_BUILTIN_VPERMDF512,
28408 IX86_BUILTIN_VPERMDI512,
28409 IX86_BUILTIN_VPERMI2VARD512,
28410 IX86_BUILTIN_VPERMI2VARPD512,
28411 IX86_BUILTIN_VPERMI2VARPS512,
28412 IX86_BUILTIN_VPERMI2VARQ512,
28413 IX86_BUILTIN_VPERMILPD512,
28414 IX86_BUILTIN_VPERMILPS512,
28415 IX86_BUILTIN_VPERMILVARPD512,
28416 IX86_BUILTIN_VPERMILVARPS512,
28417 IX86_BUILTIN_VPERMT2VARD512,
28418 IX86_BUILTIN_VPERMT2VARD512_MASKZ,
28419 IX86_BUILTIN_VPERMT2VARPD512,
28420 IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
28421 IX86_BUILTIN_VPERMT2VARPS512,
28422 IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
28423 IX86_BUILTIN_VPERMT2VARQ512,
28424 IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
28425 IX86_BUILTIN_VPERMVARDF512,
28426 IX86_BUILTIN_VPERMVARDI512,
28427 IX86_BUILTIN_VPERMVARSF512,
28428 IX86_BUILTIN_VPERMVARSI512,
28429 IX86_BUILTIN_VTERNLOGD512_MASK,
28430 IX86_BUILTIN_VTERNLOGD512_MASKZ,
28431 IX86_BUILTIN_VTERNLOGQ512_MASK,
28432 IX86_BUILTIN_VTERNLOGQ512_MASKZ,
28433
28434 /* Mask arithmetic operations */
28435 IX86_BUILTIN_KAND16,
28436 IX86_BUILTIN_KANDN16,
28437 IX86_BUILTIN_KNOT16,
28438 IX86_BUILTIN_KOR16,
28439 IX86_BUILTIN_KORTESTC16,
28440 IX86_BUILTIN_KORTESTZ16,
28441 IX86_BUILTIN_KUNPCKBW,
28442 IX86_BUILTIN_KXNOR16,
28443 IX86_BUILTIN_KXOR16,
28444 IX86_BUILTIN_KMOV16,
28445
28446 /* Alternate 4 and 8 element gather/scatter for the vectorizer
28447 where all operands are 32-byte or 64-byte wide respectively. */
28448 IX86_BUILTIN_GATHERALTSIV4DF,
28449 IX86_BUILTIN_GATHERALTDIV8SF,
28450 IX86_BUILTIN_GATHERALTSIV4DI,
28451 IX86_BUILTIN_GATHERALTDIV8SI,
28452 IX86_BUILTIN_GATHER3ALTDIV16SF,
28453 IX86_BUILTIN_GATHER3ALTDIV16SI,
28454 IX86_BUILTIN_GATHER3ALTSIV8DF,
28455 IX86_BUILTIN_GATHER3ALTSIV8DI,
28456 IX86_BUILTIN_GATHER3DIV16SF,
28457 IX86_BUILTIN_GATHER3DIV16SI,
28458 IX86_BUILTIN_GATHER3DIV8DF,
28459 IX86_BUILTIN_GATHER3DIV8DI,
28460 IX86_BUILTIN_GATHER3SIV16SF,
28461 IX86_BUILTIN_GATHER3SIV16SI,
28462 IX86_BUILTIN_GATHER3SIV8DF,
28463 IX86_BUILTIN_GATHER3SIV8DI,
28464 IX86_BUILTIN_SCATTERDIV16SF,
28465 IX86_BUILTIN_SCATTERDIV16SI,
28466 IX86_BUILTIN_SCATTERDIV8DF,
28467 IX86_BUILTIN_SCATTERDIV8DI,
28468 IX86_BUILTIN_SCATTERSIV16SF,
28469 IX86_BUILTIN_SCATTERSIV16SI,
28470 IX86_BUILTIN_SCATTERSIV8DF,
28471 IX86_BUILTIN_SCATTERSIV8DI,
28472
28473 /* AVX512PF */
28474 IX86_BUILTIN_GATHERPFQPD,
28475 IX86_BUILTIN_GATHERPFDPS,
28476 IX86_BUILTIN_GATHERPFDPD,
28477 IX86_BUILTIN_GATHERPFQPS,
28478 IX86_BUILTIN_SCATTERPFDPD,
28479 IX86_BUILTIN_SCATTERPFDPS,
28480 IX86_BUILTIN_SCATTERPFQPD,
28481 IX86_BUILTIN_SCATTERPFQPS,
28482
28483 /* AVX-512ER */
28484 IX86_BUILTIN_EXP2PD_MASK,
28485 IX86_BUILTIN_EXP2PS_MASK,
28486 IX86_BUILTIN_EXP2PS,
28487 IX86_BUILTIN_RCP28PD,
28488 IX86_BUILTIN_RCP28PS,
28489 IX86_BUILTIN_RCP28SD,
28490 IX86_BUILTIN_RCP28SS,
28491 IX86_BUILTIN_RSQRT28PD,
28492 IX86_BUILTIN_RSQRT28PS,
28493 IX86_BUILTIN_RSQRT28SD,
28494 IX86_BUILTIN_RSQRT28SS,
28495
28496 /* SHA builtins. */
28497 IX86_BUILTIN_SHA1MSG1,
28498 IX86_BUILTIN_SHA1MSG2,
28499 IX86_BUILTIN_SHA1NEXTE,
28500 IX86_BUILTIN_SHA1RNDS4,
28501 IX86_BUILTIN_SHA256MSG1,
28502 IX86_BUILTIN_SHA256MSG2,
28503 IX86_BUILTIN_SHA256RNDS2,
28504
28505 /* TFmode support builtins. */
28506 IX86_BUILTIN_INFQ,
28507 IX86_BUILTIN_HUGE_VALQ,
28508 IX86_BUILTIN_FABSQ,
28509 IX86_BUILTIN_COPYSIGNQ,
28510
28511 /* Vectorizer support builtins. */
28512 IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
28513 IX86_BUILTIN_CPYSGNPS,
28514 IX86_BUILTIN_CPYSGNPD,
28515 IX86_BUILTIN_CPYSGNPS256,
28516 IX86_BUILTIN_CPYSGNPS512,
28517 IX86_BUILTIN_CPYSGNPD256,
28518 IX86_BUILTIN_CPYSGNPD512,
28519 IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
28520 IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
28521
28522
28523 /* FMA4 instructions. */
28524 IX86_BUILTIN_VFMADDSS,
28525 IX86_BUILTIN_VFMADDSD,
28526 IX86_BUILTIN_VFMADDPS,
28527 IX86_BUILTIN_VFMADDPD,
28528 IX86_BUILTIN_VFMADDPS256,
28529 IX86_BUILTIN_VFMADDPD256,
28530 IX86_BUILTIN_VFMADDSUBPS,
28531 IX86_BUILTIN_VFMADDSUBPD,
28532 IX86_BUILTIN_VFMADDSUBPS256,
28533 IX86_BUILTIN_VFMADDSUBPD256,
28534
28535 /* FMA3 instructions. */
28536 IX86_BUILTIN_VFMADDSS3,
28537 IX86_BUILTIN_VFMADDSD3,
28538
28539 /* XOP instructions. */
28540 IX86_BUILTIN_VPCMOV,
28541 IX86_BUILTIN_VPCMOV_V2DI,
28542 IX86_BUILTIN_VPCMOV_V4SI,
28543 IX86_BUILTIN_VPCMOV_V8HI,
28544 IX86_BUILTIN_VPCMOV_V16QI,
28545 IX86_BUILTIN_VPCMOV_V4SF,
28546 IX86_BUILTIN_VPCMOV_V2DF,
28547 IX86_BUILTIN_VPCMOV256,
28548 IX86_BUILTIN_VPCMOV_V4DI256,
28549 IX86_BUILTIN_VPCMOV_V8SI256,
28550 IX86_BUILTIN_VPCMOV_V16HI256,
28551 IX86_BUILTIN_VPCMOV_V32QI256,
28552 IX86_BUILTIN_VPCMOV_V8SF256,
28553 IX86_BUILTIN_VPCMOV_V4DF256,
28554
28555 IX86_BUILTIN_VPPERM,
28556
28557 IX86_BUILTIN_VPMACSSWW,
28558 IX86_BUILTIN_VPMACSWW,
28559 IX86_BUILTIN_VPMACSSWD,
28560 IX86_BUILTIN_VPMACSWD,
28561 IX86_BUILTIN_VPMACSSDD,
28562 IX86_BUILTIN_VPMACSDD,
28563 IX86_BUILTIN_VPMACSSDQL,
28564 IX86_BUILTIN_VPMACSSDQH,
28565 IX86_BUILTIN_VPMACSDQL,
28566 IX86_BUILTIN_VPMACSDQH,
28567 IX86_BUILTIN_VPMADCSSWD,
28568 IX86_BUILTIN_VPMADCSWD,
28569
28570 IX86_BUILTIN_VPHADDBW,
28571 IX86_BUILTIN_VPHADDBD,
28572 IX86_BUILTIN_VPHADDBQ,
28573 IX86_BUILTIN_VPHADDWD,
28574 IX86_BUILTIN_VPHADDWQ,
28575 IX86_BUILTIN_VPHADDDQ,
28576 IX86_BUILTIN_VPHADDUBW,
28577 IX86_BUILTIN_VPHADDUBD,
28578 IX86_BUILTIN_VPHADDUBQ,
28579 IX86_BUILTIN_VPHADDUWD,
28580 IX86_BUILTIN_VPHADDUWQ,
28581 IX86_BUILTIN_VPHADDUDQ,
28582 IX86_BUILTIN_VPHSUBBW,
28583 IX86_BUILTIN_VPHSUBWD,
28584 IX86_BUILTIN_VPHSUBDQ,
28585
28586 IX86_BUILTIN_VPROTB,
28587 IX86_BUILTIN_VPROTW,
28588 IX86_BUILTIN_VPROTD,
28589 IX86_BUILTIN_VPROTQ,
28590 IX86_BUILTIN_VPROTB_IMM,
28591 IX86_BUILTIN_VPROTW_IMM,
28592 IX86_BUILTIN_VPROTD_IMM,
28593 IX86_BUILTIN_VPROTQ_IMM,
28594
28595 IX86_BUILTIN_VPSHLB,
28596 IX86_BUILTIN_VPSHLW,
28597 IX86_BUILTIN_VPSHLD,
28598 IX86_BUILTIN_VPSHLQ,
28599 IX86_BUILTIN_VPSHAB,
28600 IX86_BUILTIN_VPSHAW,
28601 IX86_BUILTIN_VPSHAD,
28602 IX86_BUILTIN_VPSHAQ,
28603
28604 IX86_BUILTIN_VFRCZSS,
28605 IX86_BUILTIN_VFRCZSD,
28606 IX86_BUILTIN_VFRCZPS,
28607 IX86_BUILTIN_VFRCZPD,
28608 IX86_BUILTIN_VFRCZPS256,
28609 IX86_BUILTIN_VFRCZPD256,
28610
28611 IX86_BUILTIN_VPCOMEQUB,
28612 IX86_BUILTIN_VPCOMNEUB,
28613 IX86_BUILTIN_VPCOMLTUB,
28614 IX86_BUILTIN_VPCOMLEUB,
28615 IX86_BUILTIN_VPCOMGTUB,
28616 IX86_BUILTIN_VPCOMGEUB,
28617 IX86_BUILTIN_VPCOMFALSEUB,
28618 IX86_BUILTIN_VPCOMTRUEUB,
28619
28620 IX86_BUILTIN_VPCOMEQUW,
28621 IX86_BUILTIN_VPCOMNEUW,
28622 IX86_BUILTIN_VPCOMLTUW,
28623 IX86_BUILTIN_VPCOMLEUW,
28624 IX86_BUILTIN_VPCOMGTUW,
28625 IX86_BUILTIN_VPCOMGEUW,
28626 IX86_BUILTIN_VPCOMFALSEUW,
28627 IX86_BUILTIN_VPCOMTRUEUW,
28628
28629 IX86_BUILTIN_VPCOMEQUD,
28630 IX86_BUILTIN_VPCOMNEUD,
28631 IX86_BUILTIN_VPCOMLTUD,
28632 IX86_BUILTIN_VPCOMLEUD,
28633 IX86_BUILTIN_VPCOMGTUD,
28634 IX86_BUILTIN_VPCOMGEUD,
28635 IX86_BUILTIN_VPCOMFALSEUD,
28636 IX86_BUILTIN_VPCOMTRUEUD,
28637
28638 IX86_BUILTIN_VPCOMEQUQ,
28639 IX86_BUILTIN_VPCOMNEUQ,
28640 IX86_BUILTIN_VPCOMLTUQ,
28641 IX86_BUILTIN_VPCOMLEUQ,
28642 IX86_BUILTIN_VPCOMGTUQ,
28643 IX86_BUILTIN_VPCOMGEUQ,
28644 IX86_BUILTIN_VPCOMFALSEUQ,
28645 IX86_BUILTIN_VPCOMTRUEUQ,
28646
28647 IX86_BUILTIN_VPCOMEQB,
28648 IX86_BUILTIN_VPCOMNEB,
28649 IX86_BUILTIN_VPCOMLTB,
28650 IX86_BUILTIN_VPCOMLEB,
28651 IX86_BUILTIN_VPCOMGTB,
28652 IX86_BUILTIN_VPCOMGEB,
28653 IX86_BUILTIN_VPCOMFALSEB,
28654 IX86_BUILTIN_VPCOMTRUEB,
28655
28656 IX86_BUILTIN_VPCOMEQW,
28657 IX86_BUILTIN_VPCOMNEW,
28658 IX86_BUILTIN_VPCOMLTW,
28659 IX86_BUILTIN_VPCOMLEW,
28660 IX86_BUILTIN_VPCOMGTW,
28661 IX86_BUILTIN_VPCOMGEW,
28662 IX86_BUILTIN_VPCOMFALSEW,
28663 IX86_BUILTIN_VPCOMTRUEW,
28664
28665 IX86_BUILTIN_VPCOMEQD,
28666 IX86_BUILTIN_VPCOMNED,
28667 IX86_BUILTIN_VPCOMLTD,
28668 IX86_BUILTIN_VPCOMLED,
28669 IX86_BUILTIN_VPCOMGTD,
28670 IX86_BUILTIN_VPCOMGED,
28671 IX86_BUILTIN_VPCOMFALSED,
28672 IX86_BUILTIN_VPCOMTRUED,
28673
28674 IX86_BUILTIN_VPCOMEQQ,
28675 IX86_BUILTIN_VPCOMNEQ,
28676 IX86_BUILTIN_VPCOMLTQ,
28677 IX86_BUILTIN_VPCOMLEQ,
28678 IX86_BUILTIN_VPCOMGTQ,
28679 IX86_BUILTIN_VPCOMGEQ,
28680 IX86_BUILTIN_VPCOMFALSEQ,
28681 IX86_BUILTIN_VPCOMTRUEQ,
28682
28683 /* LWP instructions. */
28684 IX86_BUILTIN_LLWPCB,
28685 IX86_BUILTIN_SLWPCB,
28686 IX86_BUILTIN_LWPVAL32,
28687 IX86_BUILTIN_LWPVAL64,
28688 IX86_BUILTIN_LWPINS32,
28689 IX86_BUILTIN_LWPINS64,
28690
28691 IX86_BUILTIN_CLZS,
28692
28693 /* RTM */
28694 IX86_BUILTIN_XBEGIN,
28695 IX86_BUILTIN_XEND,
28696 IX86_BUILTIN_XABORT,
28697 IX86_BUILTIN_XTEST,
28698
28699 /* BMI instructions. */
28700 IX86_BUILTIN_BEXTR32,
28701 IX86_BUILTIN_BEXTR64,
28702 IX86_BUILTIN_CTZS,
28703
28704 /* TBM instructions. */
28705 IX86_BUILTIN_BEXTRI32,
28706 IX86_BUILTIN_BEXTRI64,
28707
28708 /* BMI2 instructions. */
28709 IX86_BUILTIN_BZHI32,
28710 IX86_BUILTIN_BZHI64,
28711 IX86_BUILTIN_PDEP32,
28712 IX86_BUILTIN_PDEP64,
28713 IX86_BUILTIN_PEXT32,
28714 IX86_BUILTIN_PEXT64,
28715
28716 /* ADX instructions. */
28717 IX86_BUILTIN_ADDCARRYX32,
28718 IX86_BUILTIN_ADDCARRYX64,
28719
28720 /* FSGSBASE instructions. */
28721 IX86_BUILTIN_RDFSBASE32,
28722 IX86_BUILTIN_RDFSBASE64,
28723 IX86_BUILTIN_RDGSBASE32,
28724 IX86_BUILTIN_RDGSBASE64,
28725 IX86_BUILTIN_WRFSBASE32,
28726 IX86_BUILTIN_WRFSBASE64,
28727 IX86_BUILTIN_WRGSBASE32,
28728 IX86_BUILTIN_WRGSBASE64,
28729
28730 /* RDRND instructions. */
28731 IX86_BUILTIN_RDRAND16_STEP,
28732 IX86_BUILTIN_RDRAND32_STEP,
28733 IX86_BUILTIN_RDRAND64_STEP,
28734
28735 /* RDSEED instructions. */
28736 IX86_BUILTIN_RDSEED16_STEP,
28737 IX86_BUILTIN_RDSEED32_STEP,
28738 IX86_BUILTIN_RDSEED64_STEP,
28739
28740 /* F16C instructions. */
28741 IX86_BUILTIN_CVTPH2PS,
28742 IX86_BUILTIN_CVTPH2PS256,
28743 IX86_BUILTIN_CVTPS2PH,
28744 IX86_BUILTIN_CVTPS2PH256,
28745
28746 /* CFString built-in for darwin */
28747 IX86_BUILTIN_CFSTRING,
28748
28749 /* Builtins to get CPU type and supported features. */
28750 IX86_BUILTIN_CPU_INIT,
28751 IX86_BUILTIN_CPU_IS,
28752 IX86_BUILTIN_CPU_SUPPORTS,
28753
28754 /* Read/write FLAGS register built-ins. */
28755 IX86_BUILTIN_READ_FLAGS,
28756 IX86_BUILTIN_WRITE_FLAGS,
28757
28758 IX86_BUILTIN_MAX
28759 };
28760
28761 /* Table for the ix86 builtin decls. */
28762 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
28763
28764 /* Table of all of the builtin functions that are possible with different ISA's
28765 but are waiting to be built until a function is declared to use that
28766 ISA. */
28767 struct builtin_isa {
28768 const char *name; /* function name */
28769 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
28770 HOST_WIDE_INT isa; /* isa_flags this builtin is defined for */
28771 bool const_p; /* true if the declaration is constant */
28772 bool set_and_not_built_p;
28773 };
28774
28775 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
28776
28777
28778 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
28779 of which isa_flags to use in the ix86_builtins_isa array. Stores the
28780 function decl in the ix86_builtins array. Returns the function decl or
28781 NULL_TREE, if the builtin was not added.
28782
28783 If the front end has a special hook for builtin functions, delay adding
28784 builtin functions that aren't in the current ISA until the ISA is changed
28785 with function specific optimization. Doing so, can save about 300K for the
28786 default compiler. When the builtin is expanded, check at that time whether
28787 it is valid.
28788
28789 If the front end doesn't have a special hook, record all builtins, even if
28790 it isn't an instruction set in the current ISA in case the user uses
28791 function specific options for a different ISA, so that we don't get scope
28792 errors if a builtin is added in the middle of a function scope. */
28793
28794 static inline tree
28795 def_builtin (HOST_WIDE_INT mask, const char *name,
28796 enum ix86_builtin_func_type tcode,
28797 enum ix86_builtins code)
28798 {
28799 tree decl = NULL_TREE;
28800
28801 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
28802 {
28803 ix86_builtins_isa[(int) code].isa = mask;
28804
28805 mask &= ~OPTION_MASK_ISA_64BIT;
28806 if (mask == 0
28807 || (mask & ix86_isa_flags) != 0
28808 || (lang_hooks.builtin_function
28809 == lang_hooks.builtin_function_ext_scope))
28810
28811 {
28812 tree type = ix86_get_builtin_func_type (tcode);
28813 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
28814 NULL, NULL_TREE);
28815 ix86_builtins[(int) code] = decl;
28816 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
28817 }
28818 else
28819 {
28820 ix86_builtins[(int) code] = NULL_TREE;
28821 ix86_builtins_isa[(int) code].tcode = tcode;
28822 ix86_builtins_isa[(int) code].name = name;
28823 ix86_builtins_isa[(int) code].const_p = false;
28824 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
28825 }
28826 }
28827
28828 return decl;
28829 }
28830
28831 /* Like def_builtin, but also marks the function decl "const". */
28832
28833 static inline tree
28834 def_builtin_const (HOST_WIDE_INT mask, const char *name,
28835 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
28836 {
28837 tree decl = def_builtin (mask, name, tcode, code);
28838 if (decl)
28839 TREE_READONLY (decl) = 1;
28840 else
28841 ix86_builtins_isa[(int) code].const_p = true;
28842
28843 return decl;
28844 }
28845
28846 /* Add any new builtin functions for a given ISA that may not have been
28847 declared. This saves a bit of space compared to adding all of the
28848 declarations to the tree, even if we didn't use them. */
28849
28850 static void
28851 ix86_add_new_builtins (HOST_WIDE_INT isa)
28852 {
28853 int i;
28854
28855 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
28856 {
28857 if ((ix86_builtins_isa[i].isa & isa) != 0
28858 && ix86_builtins_isa[i].set_and_not_built_p)
28859 {
28860 tree decl, type;
28861
28862 /* Don't define the builtin again. */
28863 ix86_builtins_isa[i].set_and_not_built_p = false;
28864
28865 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
28866 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
28867 type, i, BUILT_IN_MD, NULL,
28868 NULL_TREE);
28869
28870 ix86_builtins[i] = decl;
28871 if (ix86_builtins_isa[i].const_p)
28872 TREE_READONLY (decl) = 1;
28873 }
28874 }
28875 }
28876
28877 /* Bits for builtin_description.flag. */
28878
28879 /* Set when we don't support the comparison natively, and should
28880 swap_comparison in order to support it. */
28881 #define BUILTIN_DESC_SWAP_OPERANDS 1
28882
28883 struct builtin_description
28884 {
28885 const HOST_WIDE_INT mask;
28886 const enum insn_code icode;
28887 const char *const name;
28888 const enum ix86_builtins code;
28889 const enum rtx_code comparison;
28890 const int flag;
28891 };
28892
28893 static const struct builtin_description bdesc_comi[] =
28894 {
28895 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
28896 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
28897 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
28898 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
28899 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
28900 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
28901 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
28902 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
28903 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
28904 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
28905 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
28906 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
28907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
28908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
28909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
28910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
28911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
28912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
28913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
28914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
28915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
28916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
28917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
28918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
28919 };
28920
28921 static const struct builtin_description bdesc_pcmpestr[] =
28922 {
28923 /* SSE4.2 */
28924 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
28925 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
28926 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
28927 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
28928 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
28929 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
28930 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
28931 };
28932
28933 static const struct builtin_description bdesc_pcmpistr[] =
28934 {
28935 /* SSE4.2 */
28936 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
28937 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
28938 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
28939 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
28940 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
28941 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
28942 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
28943 };
28944
28945 /* Special builtins with variable number of arguments. */
28946 static const struct builtin_description bdesc_special_args[] =
28947 {
28948 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
28949 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
28950 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
28951
28952 /* 80387 (for use internally for atomic compound assignment). */
28953 { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
28954 { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
28955 { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) VOID_FTYPE_PUSHORT },
28956 { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
28957
28958 /* MMX */
28959 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28960
28961 /* 3DNow! */
28962 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
28963
28964 /* FXSR, XSAVE and XSAVEOPT */
28965 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
28966 { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
28967 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28968 { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28969 { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28970
28971 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28972 { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
28973 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28974 { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28975 { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
28976
28977 /* SSE */
28978 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28979 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
28980 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
28981
28982 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28983 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
28984 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28985 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
28986
28987 /* SSE or 3DNow!A */
28988 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28989 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
28990
28991 /* SSE2 */
28992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
28994 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
28996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
28997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
28998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
28999 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
29000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
29001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29002
29003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
29005
29006 /* SSE3 */
29007 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
29008
29009 /* SSE4.1 */
29010 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
29011
29012 /* SSE4A */
29013 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
29014 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
29015
29016 /* AVX */
29017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
29018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
29019
29020 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
29021 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29022 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
29024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
29025
29026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
29027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
29028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29031 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
29032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
29033
29034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
29035 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
29036 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
29037
29038 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
29039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
29040 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
29041 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
29042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
29043 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
29044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
29045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
29046
29047 /* AVX2 */
29048 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
29049 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
29050 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
29051 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
29052 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
29053 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
29054 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
29055 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
29056 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
29057
29058 /* AVX512F */
29059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
29076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
29077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
29078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
29079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
29080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
29081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
29082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
29083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
29089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
29092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
29095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
29098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
29101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
29103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
29104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
29105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
29106
29107 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
29108 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
29109 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
29110 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
29111 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
29112 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
29113
29114 /* FSGSBASE */
29115 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29116 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29117 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29118 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
29119 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29120 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29121 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
29122 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
29123
29124 /* RTM */
29125 { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
29126 { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
29127 { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
29128 };
29129
29130 /* Builtins with variable number of arguments. */
29131 static const struct builtin_description bdesc_args[] =
29132 {
29133 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
29134 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
29135 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
29136 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29137 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29138 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
29139 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
29140
29141 /* MMX */
29142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29148
29149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29157
29158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29160
29161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29165
29166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29172
29173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29176 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
29178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
29179
29180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
29182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
29183
29184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
29185
29186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29192
29193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
29196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
29199
29200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
29201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
29202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
29203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
29204
29205 /* 3DNow! */
29206 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29207 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29208 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29209 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29210
29211 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29212 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29213 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29214 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29215 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29216 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
29217 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29218 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29219 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29220 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29221 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29222 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29223 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29224 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29225 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29226
29227 /* 3DNow!A */
29228 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
29229 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
29230 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29231 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
29232 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29233 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
29234
29235 /* SSE */
29236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
29237 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29238 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29240 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29244 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
29246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
29247 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
29248
29249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29250
29251 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29252 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29253 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29259
29260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
29270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
29271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
29273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
29274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
29275 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29276 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
29277 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
29278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
29279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
29280
29281 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29282 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29283 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29284 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29285
29286 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29288 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29289 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29290
29291 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29292
29293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29294 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29295 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29296 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29297 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29298
29299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
29300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
29301 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
29302
29303 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
29304
29305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
29308
29309 { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
29310 { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
29311
29312 /* SSE MMX or 3Dnow!A */
29313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29314 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29315 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29316
29317 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29318 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29319 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29320 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29321
29322 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
29323 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
29324
29325 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
29326
29327 /* SSE2 */
29328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29329
29330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
29331 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
29332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29333 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
29334 { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
29335
29336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29337 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
29339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
29340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
29341
29342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
29343
29344 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29345 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
29346 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29347 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
29348
29349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
29351 { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29352
29353 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29354 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29355 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29356 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29361
29362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
29367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29372 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
29373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29374 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
29375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
29376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
29377 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
29379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
29380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
29381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
29382
29383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29384 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29387
29388 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29390 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29391 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29392
29393 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29394
29395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29396 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29397 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29398
29399 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29400
29401 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29402 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29403 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29404 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29405 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29406 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29407 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29408 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29409
29410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29418
29419 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29420 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
29421
29422 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29424 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29425 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29426
29427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29429
29430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29436
29437 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29438 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29439 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29441
29442 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29443 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29444 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29445 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29446 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29447 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29448 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29449 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29450
29451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29452 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
29454
29455 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
29457
29458 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
29459 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29460
29461 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
29462
29463 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
29464 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
29465 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
29466 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
29467
29468 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29469 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29470 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29471 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29472 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29473 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29474 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29475
29476 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
29477 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29478 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29479 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
29480 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29481 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29482 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
29483
29484 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
29485 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
29486 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
29487 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
29488
29489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
29490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
29492
29493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
29494
29495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29496
29497 /* SSE2 MMX */
29498 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29499 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
29500
29501 /* SSE3 */
29502 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
29503 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29504
29505 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29506 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29507 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29508 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29509 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
29510 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
29511
29512 /* SSSE3 */
29513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
29515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
29517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
29519
29520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29527 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
29533 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
29534 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29535 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29536 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29537 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29538 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29539 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
29540 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29541 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
29542 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29543 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
29544
29545 /* SSSE3. */
29546 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
29547 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
29548
29549 /* SSE4.1 */
29550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29551 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
29553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
29554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
29558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
29559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
29560
29561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29565 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29566 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29567 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
29568 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
29569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
29570 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
29571 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
29572 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
29573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29574
29575 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
29576 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29578 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29579 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29580 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29581 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
29582 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29583 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29584 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
29585 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
29586 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29587
29588 /* SSE4.1 */
29589 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29590 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29591 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29592 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29593
29594 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
29595 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
29596 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
29597 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
29598
29599 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29600 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
29601
29602 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
29603 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
29604
29605 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
29606 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
29607 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
29608 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
29609
29610 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
29611 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
29612
29613 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29614 { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
29615
29616 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29617 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29618 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
29619
29620 /* SSE4.2 */
29621 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29622 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
29623 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
29624 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29625 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29626
29627 /* SSE4A */
29628 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
29629 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
29630 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
29631 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29632
29633 /* AES */
29634 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
29635 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29636
29637 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29638 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29639 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29640 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29641
29642 /* PCLMUL */
29643 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
29644
29645 /* AVX */
29646 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29647 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29650 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29651 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29654 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29660 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29661 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29662 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29663 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29664 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29665 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29666 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29667 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29668 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29669 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29670 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29671 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29672
29673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
29674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
29675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
29676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29677
29678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
29681 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
29682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29685 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29686 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
29688 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
29689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29690 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29691 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
29692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
29693 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
29694 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
29695 { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
29696 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
29697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
29699 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
29701 { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
29703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
29704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
29706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
29707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29708 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
29710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
29711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
29712
29713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29716
29717 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29719 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29721 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29722
29723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29724
29725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
29727
29728 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
29729 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
29730 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
29731 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
29732
29733 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
29734 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29735
29736 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29737 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
29738
29739 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
29740 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
29741 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
29742 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
29743
29744 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
29745 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
29746
29747 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
29748 { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
29749
29750 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29751 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29752 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29753 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29754
29755 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29756 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29757 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29758 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
29759 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
29760 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
29761
29762 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29763 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29764 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
29765 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29766 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29767 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
29768 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29769 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29770 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
29771 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29772 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29773 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
29774 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29775 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29776 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
29777
29778 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
29779 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
29780
29781 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
29782 { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
29783
29784 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
29785
29786 /* AVX2 */
29787 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
29788 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
29789 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
29790 { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
29791 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256", IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29792 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256", IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29793 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256", IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
29794 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256", IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
29795 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29796 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29797 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29798 { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29799 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29800 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29801 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29802 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29803 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
29804 { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29805 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29806 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256", IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29807 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256", IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29808 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
29809 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
29810 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29811 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29812 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29813 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29814 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29815 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29816 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29817 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29818 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29819 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29820 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29821 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29822 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29823 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29824 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29825 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
29826 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29827 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29828 { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29829 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29830 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29831 { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29832 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29833 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29834 { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29835 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29836 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29837 { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29838 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
29839 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29840 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2 , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29841 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2 , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29842 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2 , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29843 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2 , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29844 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2 , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29845 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
29846 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2 , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
29847 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2 , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
29848 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2 , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
29849 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
29850 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
29851 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29852 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29853 { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29854 { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29855 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29856 { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29857 { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
29858 { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29859 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
29860 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29861 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
29862 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29863 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
29864 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29865 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29866 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29867 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29868 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29869 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29870 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29871 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29872 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29873 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29874 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29875 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29876 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29877 { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29878 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
29879 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
29880 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
29881 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
29882 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
29883 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
29884 { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
29885 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29886 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29887 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29888 { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29889 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29890 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29891 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29892 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29893 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29894 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29895 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29896 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29897 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
29898 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
29899 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29900 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29901 { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29902 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
29903 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
29904 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
29905 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29906 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
29907 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
29908 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
29909 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
29910 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
29911 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
29912 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
29913 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
29914 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
29915 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
29916 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29917 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
29918 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
29919 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
29920 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
29921 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
29922 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_inserti128, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
29923 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29924 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29925 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29926 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29927 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29928 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29929 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
29930 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
29931 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
29932 { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
29933
29934 { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29935
29936 /* BMI */
29937 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29938 { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29939 { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2, "__builtin_ctzs", IX86_BUILTIN_CTZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
29940
29941 /* TBM */
29942 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29943 { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29944
29945 /* F16C */
29946 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
29947 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
29948 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
29949 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
29950
29951 /* BMI2 */
29952 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29953 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29954 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29955 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29956 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
29957 { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
29958
29959 /* AVX512F */
29960 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
29961 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
29962 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29963 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29964 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29965 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29966 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29967 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
29968 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
29969 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
29970 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
29971 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
29972 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
29973 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
29974 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29975 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29976 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29977 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask, "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
29978 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
29979 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
29980 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29981 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29982 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29983 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29984 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
29985 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
29986 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
29987 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
29988 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
29989 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
29990 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
29991 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
29992 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29993 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29994 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
29995 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
29996 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
29997 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29998 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
29999 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30000 { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30001 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30002 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30003 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30004 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30005 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30006 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30007 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
30008 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
30009 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
30010 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
30011 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
30012 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30013 { OPTION_MASK_ISA_AVX512F & ~OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vec_dup_memv8di_mask, "__builtin_ia32_pbroadcastq512_mem_mask", IX86_BUILTIN_PBROADCASTQ512_MEM, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
30014 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30015 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30016 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30017 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30018 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30019 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30020 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30021 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30022 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30023 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30024 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30025 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30026 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30027 { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30028 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30029 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30030 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30031 { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30032 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30033 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30034 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30035 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30036 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30037 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30038 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30039 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30040 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30041 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30042 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30043 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30044 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30045 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30046 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30047 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
30048 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
30049 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
30050 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
30051 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
30052 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
30053 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
30054 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
30055 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
30056 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
30057 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30058 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask" , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30059 { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
30060 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30061 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30062 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30063 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30064 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30065 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30066 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30067 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30068 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30069 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30070 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30071 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30072 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30073 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30074 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30075 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30076 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30077 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30078 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30079 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30080 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30081 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30082 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30083 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
30084 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
30085 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
30086 { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30087 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30088 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30089 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30090 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30091 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30092 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30093 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
30094 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
30095 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30096 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30097 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30098 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30099 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30100 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30101 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30102 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30103 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30104 { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30105 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
30106 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
30107 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
30108 { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
30109 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30110 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30111 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
30112 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
30113 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
30114 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
30115 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
30116 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
30117 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30118 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30119 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
30120 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask, "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
30121 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30122 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30123 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
30124 { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
30125 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30126 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
30127 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30128 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30129 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30130 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30131 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
30132 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
30133 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30134 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30135 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30136 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30137 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30138 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
30139 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30140 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
30141 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30142 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30143 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
30144 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
30145 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
30146 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
30147 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30148 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
30149 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30150 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
30151
30152 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3, "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
30153 { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3, "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
30154 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
30155 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30156 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
30157 { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
30158 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30159 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
30160
30161 /* Mask arithmetic operations */
30162 { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30163 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30164 { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
30165 { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30166 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30167 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30168 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
30169 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30170 { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
30171 { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
30172
30173 /* SHA */
30174 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30175 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30176 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30177 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
30178 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30179 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
30180 { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
30181 };
30182
30183 /* Builtins with rounding support. */
30184 static const struct builtin_description bdesc_round_args[] =
30185 {
30186 /* AVX512F */
30187 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30188 { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30189 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30190 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30191 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
30192 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
30193 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
30194 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
30195 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
30196 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
30197 { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30198 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30199 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round, "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
30200 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv8dfv8si_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30201 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round, "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
30202 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30203 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
30204 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30205 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
30206 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
30207 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
30208 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
30209 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
30210 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30211 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
30212 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30213 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
30214 { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
30215 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
30216 { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
30217 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
30218 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30219 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30220 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30221 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30222 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30223 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
30224 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30225 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
30226 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30227 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
30228 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30229 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
30230 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30231 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30232 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30233 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30234 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30235 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30236 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30237 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30238 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30239 { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30240 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30241 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30242 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30243 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30244 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30245 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30246 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30247 { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30248 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30249 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30250 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
30251 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
30252 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
30253 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
30254 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30255 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30256 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30257 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30258 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30259 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30260 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30261 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30262 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30263 { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30264 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30265 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30266 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30267 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30268 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30269 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30270 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30271 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30272 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30273 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30274 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
30275 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
30276 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
30277 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
30278 { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
30279 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
30280 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
30281 { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
30282 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30283 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30284 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30285 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30286 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30287 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30288 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
30289 { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
30290 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30291 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30292 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30293 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30294 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30295 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30296 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30297 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30298 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30299 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30300 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30301 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30302 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30303 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
30304 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30305 { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
30306
30307 /* AVX512ER */
30308 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30309 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30310 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30311 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30312 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30313 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30314 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
30315 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
30316 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
30317 { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
30318 };
30319
30320 /* FMA4 and XOP. */
30321 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
30322 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
30323 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
30324 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
30325 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
30326 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
30327 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
30328 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
30329 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
30330 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
30331 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
30332 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
30333 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
30334 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
30335 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
30336 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
30337 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
30338 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
30339 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
30340 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
30341 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
30342 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
30343 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
30344 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
30345 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
30346 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
30347 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
30348 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
30349 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
30350 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
30351 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
30352 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
30353 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
30354 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
30355 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
30356 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
30357 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
30358 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
30359 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
30360 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
30361 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
30362 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
30363 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
30364 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
30365 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
30366 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
30367 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
30368 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
30369 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
30370 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
30371 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
30372 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
30373
30374 static const struct builtin_description bdesc_multi_arg[] =
30375 {
30376 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
30377 "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
30378 UNKNOWN, (int)MULTI_ARG_3_SF },
30379 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
30380 "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
30381 UNKNOWN, (int)MULTI_ARG_3_DF },
30382
30383 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
30384 "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
30385 UNKNOWN, (int)MULTI_ARG_3_SF },
30386 { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
30387 "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
30388 UNKNOWN, (int)MULTI_ARG_3_DF },
30389
30390 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
30391 "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
30392 UNKNOWN, (int)MULTI_ARG_3_SF },
30393 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
30394 "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
30395 UNKNOWN, (int)MULTI_ARG_3_DF },
30396 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
30397 "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
30398 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30399 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
30400 "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
30401 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30402
30403 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
30404 "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
30405 UNKNOWN, (int)MULTI_ARG_3_SF },
30406 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
30407 "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
30408 UNKNOWN, (int)MULTI_ARG_3_DF },
30409 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
30410 "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
30411 UNKNOWN, (int)MULTI_ARG_3_SF2 },
30412 { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
30413 "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
30414 UNKNOWN, (int)MULTI_ARG_3_DF2 },
30415
30416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
30417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
30418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
30419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
30420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
30421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
30422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
30423
30424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
30426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
30427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
30428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
30429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
30430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
30431
30432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
30433
30434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
30436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
30440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
30444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
30446
30447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
30449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
30450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
30451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
30452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
30453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
30454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
30455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
30457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
30458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
30459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
30460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
30461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
30462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
30463
30464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_1_SF },
30465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_1_DF },
30466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
30467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
30468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
30469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
30470
30471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
30479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
30480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
30482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
30484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
30485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
30486
30487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
30488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
30490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
30491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
30492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
30493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
30494
30495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
30496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
30498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
30499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
30500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
30501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
30502
30503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
30504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
30506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
30507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
30508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
30509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
30510
30511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
30514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
30515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
30516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
30517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
30518
30519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
30520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
30522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
30523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
30524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
30525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
30526
30527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
30528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
30530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
30531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
30532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
30533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
30534
30535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
30536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
30538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
30539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
30540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
30541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
30542
30543 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
30544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
30546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
30547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
30548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
30549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
30550
30551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30552 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
30556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
30557 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
30558 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
30559
30560 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30561 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30562 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30563 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30564 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
30565 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
30566 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
30567 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
30568
30569 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
30570 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
30571 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
30572 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
30573
30574 };
30575 \f
30576 /* TM vector builtins. */
30577
30578 /* Reuse the existing x86-specific `struct builtin_description' cause
30579 we're lazy. Add casts to make them fit. */
30580 static const struct builtin_description bdesc_tm[] =
30581 {
30582 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30583 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30584 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
30585 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30586 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30587 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30588 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
30589
30590 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30591 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30592 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
30593 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30594 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30595 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30596 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
30597
30598 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30599 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30600 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
30601 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30602 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30603 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30604 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
30605
30606 { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
30607 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
30608 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
30609 };
30610
30611 /* TM callbacks. */
30612
30613 /* Return the builtin decl needed to load a vector of TYPE. */
30614
30615 static tree
30616 ix86_builtin_tm_load (tree type)
30617 {
30618 if (TREE_CODE (type) == VECTOR_TYPE)
30619 {
30620 switch (tree_to_uhwi (TYPE_SIZE (type)))
30621 {
30622 case 64:
30623 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
30624 case 128:
30625 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
30626 case 256:
30627 return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
30628 }
30629 }
30630 return NULL_TREE;
30631 }
30632
30633 /* Return the builtin decl needed to store a vector of TYPE. */
30634
30635 static tree
30636 ix86_builtin_tm_store (tree type)
30637 {
30638 if (TREE_CODE (type) == VECTOR_TYPE)
30639 {
30640 switch (tree_to_uhwi (TYPE_SIZE (type)))
30641 {
30642 case 64:
30643 return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
30644 case 128:
30645 return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
30646 case 256:
30647 return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
30648 }
30649 }
30650 return NULL_TREE;
30651 }
30652 \f
30653 /* Initialize the transactional memory vector load/store builtins. */
30654
30655 static void
30656 ix86_init_tm_builtins (void)
30657 {
30658 enum ix86_builtin_func_type ftype;
30659 const struct builtin_description *d;
30660 size_t i;
30661 tree decl;
30662 tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
30663 tree attrs_log, attrs_type_log;
30664
30665 if (!flag_tm)
30666 return;
30667
30668 /* If there are no builtins defined, we must be compiling in a
30669 language without trans-mem support. */
30670 if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
30671 return;
30672
30673 /* Use whatever attributes a normal TM load has. */
30674 decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
30675 attrs_load = DECL_ATTRIBUTES (decl);
30676 attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30677 /* Use whatever attributes a normal TM store has. */
30678 decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
30679 attrs_store = DECL_ATTRIBUTES (decl);
30680 attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30681 /* Use whatever attributes a normal TM log has. */
30682 decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
30683 attrs_log = DECL_ATTRIBUTES (decl);
30684 attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
30685
30686 for (i = 0, d = bdesc_tm;
30687 i < ARRAY_SIZE (bdesc_tm);
30688 i++, d++)
30689 {
30690 if ((d->mask & ix86_isa_flags) != 0
30691 || (lang_hooks.builtin_function
30692 == lang_hooks.builtin_function_ext_scope))
30693 {
30694 tree type, attrs, attrs_type;
30695 enum built_in_function code = (enum built_in_function) d->code;
30696
30697 ftype = (enum ix86_builtin_func_type) d->flag;
30698 type = ix86_get_builtin_func_type (ftype);
30699
30700 if (BUILTIN_TM_LOAD_P (code))
30701 {
30702 attrs = attrs_load;
30703 attrs_type = attrs_type_load;
30704 }
30705 else if (BUILTIN_TM_STORE_P (code))
30706 {
30707 attrs = attrs_store;
30708 attrs_type = attrs_type_store;
30709 }
30710 else
30711 {
30712 attrs = attrs_log;
30713 attrs_type = attrs_type_log;
30714 }
30715 decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
30716 /* The builtin without the prefix for
30717 calling it directly. */
30718 d->name + strlen ("__builtin_"),
30719 attrs);
30720 /* add_builtin_function() will set the DECL_ATTRIBUTES, now
30721 set the TYPE_ATTRIBUTES. */
30722 decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
30723
30724 set_builtin_decl (code, decl, false);
30725 }
30726 }
30727 }
30728
30729 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
30730 in the current target ISA to allow the user to compile particular modules
30731 with different target specific options that differ from the command line
30732 options. */
30733 static void
30734 ix86_init_mmx_sse_builtins (void)
30735 {
30736 const struct builtin_description * d;
30737 enum ix86_builtin_func_type ftype;
30738 size_t i;
30739
30740 /* Add all special builtins with variable number of operands. */
30741 for (i = 0, d = bdesc_special_args;
30742 i < ARRAY_SIZE (bdesc_special_args);
30743 i++, d++)
30744 {
30745 if (d->name == 0)
30746 continue;
30747
30748 ftype = (enum ix86_builtin_func_type) d->flag;
30749 def_builtin (d->mask, d->name, ftype, d->code);
30750 }
30751
30752 /* Add all builtins with variable number of operands. */
30753 for (i = 0, d = bdesc_args;
30754 i < ARRAY_SIZE (bdesc_args);
30755 i++, d++)
30756 {
30757 if (d->name == 0)
30758 continue;
30759
30760 ftype = (enum ix86_builtin_func_type) d->flag;
30761 def_builtin_const (d->mask, d->name, ftype, d->code);
30762 }
30763
30764 /* Add all builtins with rounding. */
30765 for (i = 0, d = bdesc_round_args;
30766 i < ARRAY_SIZE (bdesc_round_args);
30767 i++, d++)
30768 {
30769 if (d->name == 0)
30770 continue;
30771
30772 ftype = (enum ix86_builtin_func_type) d->flag;
30773 def_builtin_const (d->mask, d->name, ftype, d->code);
30774 }
30775
30776 /* pcmpestr[im] insns. */
30777 for (i = 0, d = bdesc_pcmpestr;
30778 i < ARRAY_SIZE (bdesc_pcmpestr);
30779 i++, d++)
30780 {
30781 if (d->code == IX86_BUILTIN_PCMPESTRM128)
30782 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
30783 else
30784 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
30785 def_builtin_const (d->mask, d->name, ftype, d->code);
30786 }
30787
30788 /* pcmpistr[im] insns. */
30789 for (i = 0, d = bdesc_pcmpistr;
30790 i < ARRAY_SIZE (bdesc_pcmpistr);
30791 i++, d++)
30792 {
30793 if (d->code == IX86_BUILTIN_PCMPISTRM128)
30794 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
30795 else
30796 ftype = INT_FTYPE_V16QI_V16QI_INT;
30797 def_builtin_const (d->mask, d->name, ftype, d->code);
30798 }
30799
30800 /* comi/ucomi insns. */
30801 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
30802 {
30803 if (d->mask == OPTION_MASK_ISA_SSE2)
30804 ftype = INT_FTYPE_V2DF_V2DF;
30805 else
30806 ftype = INT_FTYPE_V4SF_V4SF;
30807 def_builtin_const (d->mask, d->name, ftype, d->code);
30808 }
30809
30810 /* SSE */
30811 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
30812 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
30813 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
30814 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
30815
30816 /* SSE or 3DNow!A */
30817 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
30818 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
30819 IX86_BUILTIN_MASKMOVQ);
30820
30821 /* SSE2 */
30822 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
30823 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
30824
30825 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
30826 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
30827 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
30828 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
30829
30830 /* SSE3. */
30831 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
30832 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
30833 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
30834 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
30835
30836 /* AES */
30837 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
30838 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
30839 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
30840 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
30841 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
30842 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
30843 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
30844 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
30845 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
30846 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
30847 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
30848 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
30849
30850 /* PCLMUL */
30851 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
30852 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
30853
30854 /* RDRND */
30855 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
30856 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
30857 def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
30858 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
30859 def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
30860 "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
30861 IX86_BUILTIN_RDRAND64_STEP);
30862
30863 /* AVX2 */
30864 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
30865 V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
30866 IX86_BUILTIN_GATHERSIV2DF);
30867
30868 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
30869 V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
30870 IX86_BUILTIN_GATHERSIV4DF);
30871
30872 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
30873 V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
30874 IX86_BUILTIN_GATHERDIV2DF);
30875
30876 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
30877 V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
30878 IX86_BUILTIN_GATHERDIV4DF);
30879
30880 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
30881 V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
30882 IX86_BUILTIN_GATHERSIV4SF);
30883
30884 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
30885 V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
30886 IX86_BUILTIN_GATHERSIV8SF);
30887
30888 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
30889 V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
30890 IX86_BUILTIN_GATHERDIV4SF);
30891
30892 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
30893 V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
30894 IX86_BUILTIN_GATHERDIV8SF);
30895
30896 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
30897 V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
30898 IX86_BUILTIN_GATHERSIV2DI);
30899
30900 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
30901 V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
30902 IX86_BUILTIN_GATHERSIV4DI);
30903
30904 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
30905 V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
30906 IX86_BUILTIN_GATHERDIV2DI);
30907
30908 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
30909 V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
30910 IX86_BUILTIN_GATHERDIV4DI);
30911
30912 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
30913 V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
30914 IX86_BUILTIN_GATHERSIV4SI);
30915
30916 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
30917 V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
30918 IX86_BUILTIN_GATHERSIV8SI);
30919
30920 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
30921 V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
30922 IX86_BUILTIN_GATHERDIV4SI);
30923
30924 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
30925 V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
30926 IX86_BUILTIN_GATHERDIV8SI);
30927
30928 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
30929 V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
30930 IX86_BUILTIN_GATHERALTSIV4DF);
30931
30932 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
30933 V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
30934 IX86_BUILTIN_GATHERALTDIV8SF);
30935
30936 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
30937 V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
30938 IX86_BUILTIN_GATHERALTSIV4DI);
30939
30940 def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
30941 V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
30942 IX86_BUILTIN_GATHERALTDIV8SI);
30943
30944 /* AVX512F */
30945 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
30946 V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
30947 IX86_BUILTIN_GATHER3SIV16SF);
30948
30949 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
30950 V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
30951 IX86_BUILTIN_GATHER3SIV8DF);
30952
30953 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
30954 V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
30955 IX86_BUILTIN_GATHER3DIV16SF);
30956
30957 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
30958 V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
30959 IX86_BUILTIN_GATHER3DIV8DF);
30960
30961 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
30962 V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
30963 IX86_BUILTIN_GATHER3SIV16SI);
30964
30965 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
30966 V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
30967 IX86_BUILTIN_GATHER3SIV8DI);
30968
30969 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
30970 V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
30971 IX86_BUILTIN_GATHER3DIV16SI);
30972
30973 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
30974 V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
30975 IX86_BUILTIN_GATHER3DIV8DI);
30976
30977 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
30978 V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
30979 IX86_BUILTIN_GATHER3ALTSIV8DF);
30980
30981 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
30982 V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
30983 IX86_BUILTIN_GATHER3ALTDIV16SF);
30984
30985 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
30986 V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
30987 IX86_BUILTIN_GATHER3ALTSIV8DI);
30988
30989 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
30990 V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
30991 IX86_BUILTIN_GATHER3ALTDIV16SI);
30992
30993 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
30994 VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
30995 IX86_BUILTIN_SCATTERSIV16SF);
30996
30997 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
30998 VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
30999 IX86_BUILTIN_SCATTERSIV8DF);
31000
31001 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
31002 VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
31003 IX86_BUILTIN_SCATTERDIV16SF);
31004
31005 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
31006 VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
31007 IX86_BUILTIN_SCATTERDIV8DF);
31008
31009 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
31010 VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
31011 IX86_BUILTIN_SCATTERSIV16SI);
31012
31013 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
31014 VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
31015 IX86_BUILTIN_SCATTERSIV8DI);
31016
31017 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
31018 VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
31019 IX86_BUILTIN_SCATTERDIV16SI);
31020
31021 def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
31022 VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
31023 IX86_BUILTIN_SCATTERDIV8DI);
31024
31025 /* AVX512PF */
31026 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
31027 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31028 IX86_BUILTIN_GATHERPFDPD);
31029 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
31030 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31031 IX86_BUILTIN_GATHERPFDPS);
31032 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
31033 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31034 IX86_BUILTIN_GATHERPFQPD);
31035 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
31036 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31037 IX86_BUILTIN_GATHERPFQPS);
31038 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
31039 VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
31040 IX86_BUILTIN_SCATTERPFDPD);
31041 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
31042 VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
31043 IX86_BUILTIN_SCATTERPFDPS);
31044 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
31045 VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
31046 IX86_BUILTIN_SCATTERPFQPD);
31047 def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
31048 VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
31049 IX86_BUILTIN_SCATTERPFQPS);
31050
31051 /* SHA */
31052 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
31053 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
31054 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
31055 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
31056 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
31057 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
31058 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
31059 V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
31060 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
31061 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
31062 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
31063 V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
31064 def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
31065 V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
31066
31067 /* RTM. */
31068 def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
31069 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
31070
31071 /* MMX access to the vec_init patterns. */
31072 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
31073 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
31074
31075 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
31076 V4HI_FTYPE_HI_HI_HI_HI,
31077 IX86_BUILTIN_VEC_INIT_V4HI);
31078
31079 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
31080 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
31081 IX86_BUILTIN_VEC_INIT_V8QI);
31082
31083 /* Access to the vec_extract patterns. */
31084 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
31085 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
31086 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
31087 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
31088 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
31089 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
31090 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
31091 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
31092 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
31093 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
31094
31095 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31096 "__builtin_ia32_vec_ext_v4hi",
31097 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
31098
31099 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
31100 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
31101
31102 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
31103 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
31104
31105 /* Access to the vec_set patterns. */
31106 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
31107 "__builtin_ia32_vec_set_v2di",
31108 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
31109
31110 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
31111 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
31112
31113 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
31114 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
31115
31116 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
31117 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
31118
31119 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
31120 "__builtin_ia32_vec_set_v4hi",
31121 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
31122
31123 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
31124 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
31125
31126 /* RDSEED */
31127 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
31128 INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
31129 def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
31130 INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
31131 def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
31132 "__builtin_ia32_rdseed_di_step",
31133 INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
31134
31135 /* ADCX */
31136 def_builtin (0, "__builtin_ia32_addcarryx_u32",
31137 UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
31138 def_builtin (OPTION_MASK_ISA_64BIT,
31139 "__builtin_ia32_addcarryx_u64",
31140 UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
31141 IX86_BUILTIN_ADDCARRYX64);
31142
31143 /* Read/write FLAGS. */
31144 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
31145 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31146 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
31147 UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
31148 def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
31149 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
31150 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
31151 VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
31152
31153
31154 /* Add FMA4 multi-arg argument instructions */
31155 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
31156 {
31157 if (d->name == 0)
31158 continue;
31159
31160 ftype = (enum ix86_builtin_func_type) d->flag;
31161 def_builtin_const (d->mask, d->name, ftype, d->code);
31162 }
31163 }
31164
31165 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
31166 to return a pointer to VERSION_DECL if the outcome of the expression
31167 formed by PREDICATE_CHAIN is true. This function will be called during
31168 version dispatch to decide which function version to execute. It returns
31169 the basic block at the end, to which more conditions can be added. */
31170
31171 static basic_block
31172 add_condition_to_bb (tree function_decl, tree version_decl,
31173 tree predicate_chain, basic_block new_bb)
31174 {
31175 gimple return_stmt;
31176 tree convert_expr, result_var;
31177 gimple convert_stmt;
31178 gimple call_cond_stmt;
31179 gimple if_else_stmt;
31180
31181 basic_block bb1, bb2, bb3;
31182 edge e12, e23;
31183
31184 tree cond_var, and_expr_var = NULL_TREE;
31185 gimple_seq gseq;
31186
31187 tree predicate_decl, predicate_arg;
31188
31189 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
31190
31191 gcc_assert (new_bb != NULL);
31192 gseq = bb_seq (new_bb);
31193
31194
31195 convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
31196 build_fold_addr_expr (version_decl));
31197 result_var = create_tmp_var (ptr_type_node, NULL);
31198 convert_stmt = gimple_build_assign (result_var, convert_expr);
31199 return_stmt = gimple_build_return (result_var);
31200
31201 if (predicate_chain == NULL_TREE)
31202 {
31203 gimple_seq_add_stmt (&gseq, convert_stmt);
31204 gimple_seq_add_stmt (&gseq, return_stmt);
31205 set_bb_seq (new_bb, gseq);
31206 gimple_set_bb (convert_stmt, new_bb);
31207 gimple_set_bb (return_stmt, new_bb);
31208 pop_cfun ();
31209 return new_bb;
31210 }
31211
31212 while (predicate_chain != NULL)
31213 {
31214 cond_var = create_tmp_var (integer_type_node, NULL);
31215 predicate_decl = TREE_PURPOSE (predicate_chain);
31216 predicate_arg = TREE_VALUE (predicate_chain);
31217 call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
31218 gimple_call_set_lhs (call_cond_stmt, cond_var);
31219
31220 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
31221 gimple_set_bb (call_cond_stmt, new_bb);
31222 gimple_seq_add_stmt (&gseq, call_cond_stmt);
31223
31224 predicate_chain = TREE_CHAIN (predicate_chain);
31225
31226 if (and_expr_var == NULL)
31227 and_expr_var = cond_var;
31228 else
31229 {
31230 gimple assign_stmt;
31231 /* Use MIN_EXPR to check if any integer is zero?.
31232 and_expr_var = min_expr <cond_var, and_expr_var> */
31233 assign_stmt = gimple_build_assign (and_expr_var,
31234 build2 (MIN_EXPR, integer_type_node,
31235 cond_var, and_expr_var));
31236
31237 gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
31238 gimple_set_bb (assign_stmt, new_bb);
31239 gimple_seq_add_stmt (&gseq, assign_stmt);
31240 }
31241 }
31242
31243 if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
31244 integer_zero_node,
31245 NULL_TREE, NULL_TREE);
31246 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
31247 gimple_set_bb (if_else_stmt, new_bb);
31248 gimple_seq_add_stmt (&gseq, if_else_stmt);
31249
31250 gimple_seq_add_stmt (&gseq, convert_stmt);
31251 gimple_seq_add_stmt (&gseq, return_stmt);
31252 set_bb_seq (new_bb, gseq);
31253
31254 bb1 = new_bb;
31255 e12 = split_block (bb1, if_else_stmt);
31256 bb2 = e12->dest;
31257 e12->flags &= ~EDGE_FALLTHRU;
31258 e12->flags |= EDGE_TRUE_VALUE;
31259
31260 e23 = split_block (bb2, return_stmt);
31261
31262 gimple_set_bb (convert_stmt, bb2);
31263 gimple_set_bb (return_stmt, bb2);
31264
31265 bb3 = e23->dest;
31266 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
31267
31268 remove_edge (e23);
31269 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
31270
31271 pop_cfun ();
31272
31273 return bb3;
31274 }
31275
31276 /* This parses the attribute arguments to target in DECL and determines
31277 the right builtin to use to match the platform specification.
31278 It returns the priority value for this version decl. If PREDICATE_LIST
31279 is not NULL, it stores the list of cpu features that need to be checked
31280 before dispatching this function. */
31281
31282 static unsigned int
31283 get_builtin_code_for_version (tree decl, tree *predicate_list)
31284 {
31285 tree attrs;
31286 struct cl_target_option cur_target;
31287 tree target_node;
31288 struct cl_target_option *new_target;
31289 const char *arg_str = NULL;
31290 const char *attrs_str = NULL;
31291 char *tok_str = NULL;
31292 char *token;
31293
31294 /* Priority of i386 features, greater value is higher priority. This is
31295 used to decide the order in which function dispatch must happen. For
31296 instance, a version specialized for SSE4.2 should be checked for dispatch
31297 before a version for SSE3, as SSE4.2 implies SSE3. */
31298 enum feature_priority
31299 {
31300 P_ZERO = 0,
31301 P_MMX,
31302 P_SSE,
31303 P_SSE2,
31304 P_SSE3,
31305 P_SSSE3,
31306 P_PROC_SSSE3,
31307 P_SSE4_A,
31308 P_PROC_SSE4_A,
31309 P_SSE4_1,
31310 P_SSE4_2,
31311 P_PROC_SSE4_2,
31312 P_POPCNT,
31313 P_AVX,
31314 P_PROC_AVX,
31315 P_FMA4,
31316 P_XOP,
31317 P_PROC_XOP,
31318 P_FMA,
31319 P_PROC_FMA,
31320 P_AVX2,
31321 P_PROC_AVX2
31322 };
31323
31324 enum feature_priority priority = P_ZERO;
31325
31326 /* These are the target attribute strings for which a dispatcher is
31327 available, from fold_builtin_cpu. */
31328
31329 static struct _feature_list
31330 {
31331 const char *const name;
31332 const enum feature_priority priority;
31333 }
31334 const feature_list[] =
31335 {
31336 {"mmx", P_MMX},
31337 {"sse", P_SSE},
31338 {"sse2", P_SSE2},
31339 {"sse3", P_SSE3},
31340 {"sse4a", P_SSE4_A},
31341 {"ssse3", P_SSSE3},
31342 {"sse4.1", P_SSE4_1},
31343 {"sse4.2", P_SSE4_2},
31344 {"popcnt", P_POPCNT},
31345 {"avx", P_AVX},
31346 {"fma4", P_FMA4},
31347 {"xop", P_XOP},
31348 {"fma", P_FMA},
31349 {"avx2", P_AVX2}
31350 };
31351
31352
31353 static unsigned int NUM_FEATURES
31354 = sizeof (feature_list) / sizeof (struct _feature_list);
31355
31356 unsigned int i;
31357
31358 tree predicate_chain = NULL_TREE;
31359 tree predicate_decl, predicate_arg;
31360
31361 attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31362 gcc_assert (attrs != NULL);
31363
31364 attrs = TREE_VALUE (TREE_VALUE (attrs));
31365
31366 gcc_assert (TREE_CODE (attrs) == STRING_CST);
31367 attrs_str = TREE_STRING_POINTER (attrs);
31368
31369 /* Return priority zero for default function. */
31370 if (strcmp (attrs_str, "default") == 0)
31371 return 0;
31372
31373 /* Handle arch= if specified. For priority, set it to be 1 more than
31374 the best instruction set the processor can handle. For instance, if
31375 there is a version for atom and a version for ssse3 (the highest ISA
31376 priority for atom), the atom version must be checked for dispatch
31377 before the ssse3 version. */
31378 if (strstr (attrs_str, "arch=") != NULL)
31379 {
31380 cl_target_option_save (&cur_target, &global_options);
31381 target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
31382 &global_options_set);
31383
31384 gcc_assert (target_node);
31385 new_target = TREE_TARGET_OPTION (target_node);
31386 gcc_assert (new_target);
31387
31388 if (new_target->arch_specified && new_target->arch > 0)
31389 {
31390 switch (new_target->arch)
31391 {
31392 case PROCESSOR_CORE2:
31393 arg_str = "core2";
31394 priority = P_PROC_SSSE3;
31395 break;
31396 case PROCESSOR_NEHALEM:
31397 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
31398 arg_str = "westmere";
31399 else
31400 /* We translate "arch=corei7" and "arch=nehalem" to
31401 "corei7" so that it will be mapped to M_INTEL_COREI7
31402 as cpu type to cover all M_INTEL_COREI7_XXXs. */
31403 arg_str = "corei7";
31404 priority = P_PROC_SSE4_2;
31405 break;
31406 case PROCESSOR_SANDYBRIDGE:
31407 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
31408 arg_str = "ivybridge";
31409 else
31410 arg_str = "sandybridge";
31411 priority = P_PROC_AVX;
31412 break;
31413 case PROCESSOR_HASWELL:
31414 if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
31415 arg_str = "broadwell";
31416 else
31417 arg_str = "haswell";
31418 priority = P_PROC_AVX2;
31419 break;
31420 case PROCESSOR_BONNELL:
31421 arg_str = "bonnell";
31422 priority = P_PROC_SSSE3;
31423 break;
31424 case PROCESSOR_SILVERMONT:
31425 arg_str = "silvermont";
31426 priority = P_PROC_SSE4_2;
31427 break;
31428 case PROCESSOR_AMDFAM10:
31429 arg_str = "amdfam10h";
31430 priority = P_PROC_SSE4_A;
31431 break;
31432 case PROCESSOR_BTVER1:
31433 arg_str = "btver1";
31434 priority = P_PROC_SSE4_A;
31435 break;
31436 case PROCESSOR_BTVER2:
31437 arg_str = "btver2";
31438 priority = P_PROC_AVX;
31439 break;
31440 case PROCESSOR_BDVER1:
31441 arg_str = "bdver1";
31442 priority = P_PROC_XOP;
31443 break;
31444 case PROCESSOR_BDVER2:
31445 arg_str = "bdver2";
31446 priority = P_PROC_FMA;
31447 break;
31448 case PROCESSOR_BDVER3:
31449 arg_str = "bdver3";
31450 priority = P_PROC_FMA;
31451 break;
31452 case PROCESSOR_BDVER4:
31453 arg_str = "bdver4";
31454 priority = P_PROC_AVX2;
31455 break;
31456 }
31457 }
31458
31459 cl_target_option_restore (&global_options, &cur_target);
31460
31461 if (predicate_list && arg_str == NULL)
31462 {
31463 error_at (DECL_SOURCE_LOCATION (decl),
31464 "No dispatcher found for the versioning attributes");
31465 return 0;
31466 }
31467
31468 if (predicate_list)
31469 {
31470 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
31471 /* For a C string literal the length includes the trailing NULL. */
31472 predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
31473 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31474 predicate_chain);
31475 }
31476 }
31477
31478 /* Process feature name. */
31479 tok_str = (char *) xmalloc (strlen (attrs_str) + 1);
31480 strcpy (tok_str, attrs_str);
31481 token = strtok (tok_str, ",");
31482 predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
31483
31484 while (token != NULL)
31485 {
31486 /* Do not process "arch=" */
31487 if (strncmp (token, "arch=", 5) == 0)
31488 {
31489 token = strtok (NULL, ",");
31490 continue;
31491 }
31492 for (i = 0; i < NUM_FEATURES; ++i)
31493 {
31494 if (strcmp (token, feature_list[i].name) == 0)
31495 {
31496 if (predicate_list)
31497 {
31498 predicate_arg = build_string_literal (
31499 strlen (feature_list[i].name) + 1,
31500 feature_list[i].name);
31501 predicate_chain = tree_cons (predicate_decl, predicate_arg,
31502 predicate_chain);
31503 }
31504 /* Find the maximum priority feature. */
31505 if (feature_list[i].priority > priority)
31506 priority = feature_list[i].priority;
31507
31508 break;
31509 }
31510 }
31511 if (predicate_list && i == NUM_FEATURES)
31512 {
31513 error_at (DECL_SOURCE_LOCATION (decl),
31514 "No dispatcher found for %s", token);
31515 return 0;
31516 }
31517 token = strtok (NULL, ",");
31518 }
31519 free (tok_str);
31520
31521 if (predicate_list && predicate_chain == NULL_TREE)
31522 {
31523 error_at (DECL_SOURCE_LOCATION (decl),
31524 "No dispatcher found for the versioning attributes : %s",
31525 attrs_str);
31526 return 0;
31527 }
31528 else if (predicate_list)
31529 {
31530 predicate_chain = nreverse (predicate_chain);
31531 *predicate_list = predicate_chain;
31532 }
31533
31534 return priority;
31535 }
31536
31537 /* This compares the priority of target features in function DECL1
31538 and DECL2. It returns positive value if DECL1 is higher priority,
31539 negative value if DECL2 is higher priority and 0 if they are the
31540 same. */
31541
31542 static int
31543 ix86_compare_version_priority (tree decl1, tree decl2)
31544 {
31545 unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
31546 unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
31547
31548 return (int)priority1 - (int)priority2;
31549 }
31550
31551 /* V1 and V2 point to function versions with different priorities
31552 based on the target ISA. This function compares their priorities. */
31553
31554 static int
31555 feature_compare (const void *v1, const void *v2)
31556 {
31557 typedef struct _function_version_info
31558 {
31559 tree version_decl;
31560 tree predicate_chain;
31561 unsigned int dispatch_priority;
31562 } function_version_info;
31563
31564 const function_version_info c1 = *(const function_version_info *)v1;
31565 const function_version_info c2 = *(const function_version_info *)v2;
31566 return (c2.dispatch_priority - c1.dispatch_priority);
31567 }
31568
31569 /* This function generates the dispatch function for
31570 multi-versioned functions. DISPATCH_DECL is the function which will
31571 contain the dispatch logic. FNDECLS are the function choices for
31572 dispatch, and is a tree chain. EMPTY_BB is the basic block pointer
31573 in DISPATCH_DECL in which the dispatch code is generated. */
31574
31575 static int
31576 dispatch_function_versions (tree dispatch_decl,
31577 void *fndecls_p,
31578 basic_block *empty_bb)
31579 {
31580 tree default_decl;
31581 gimple ifunc_cpu_init_stmt;
31582 gimple_seq gseq;
31583 int ix;
31584 tree ele;
31585 vec<tree> *fndecls;
31586 unsigned int num_versions = 0;
31587 unsigned int actual_versions = 0;
31588 unsigned int i;
31589
31590 struct _function_version_info
31591 {
31592 tree version_decl;
31593 tree predicate_chain;
31594 unsigned int dispatch_priority;
31595 }*function_version_info;
31596
31597 gcc_assert (dispatch_decl != NULL
31598 && fndecls_p != NULL
31599 && empty_bb != NULL);
31600
31601 /*fndecls_p is actually a vector. */
31602 fndecls = static_cast<vec<tree> *> (fndecls_p);
31603
31604 /* At least one more version other than the default. */
31605 num_versions = fndecls->length ();
31606 gcc_assert (num_versions >= 2);
31607
31608 function_version_info = (struct _function_version_info *)
31609 XNEWVEC (struct _function_version_info, (num_versions - 1));
31610
31611 /* The first version in the vector is the default decl. */
31612 default_decl = (*fndecls)[0];
31613
31614 push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
31615
31616 gseq = bb_seq (*empty_bb);
31617 /* Function version dispatch is via IFUNC. IFUNC resolvers fire before
31618 constructors, so explicity call __builtin_cpu_init here. */
31619 ifunc_cpu_init_stmt = gimple_build_call_vec (
31620 ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
31621 gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
31622 gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
31623 set_bb_seq (*empty_bb, gseq);
31624
31625 pop_cfun ();
31626
31627
31628 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
31629 {
31630 tree version_decl = ele;
31631 tree predicate_chain = NULL_TREE;
31632 unsigned int priority;
31633 /* Get attribute string, parse it and find the right predicate decl.
31634 The predicate function could be a lengthy combination of many
31635 features, like arch-type and various isa-variants. */
31636 priority = get_builtin_code_for_version (version_decl,
31637 &predicate_chain);
31638
31639 if (predicate_chain == NULL_TREE)
31640 continue;
31641
31642 function_version_info [actual_versions].version_decl = version_decl;
31643 function_version_info [actual_versions].predicate_chain
31644 = predicate_chain;
31645 function_version_info [actual_versions].dispatch_priority = priority;
31646 actual_versions++;
31647 }
31648
31649 /* Sort the versions according to descending order of dispatch priority. The
31650 priority is based on the ISA. This is not a perfect solution. There
31651 could still be ambiguity. If more than one function version is suitable
31652 to execute, which one should be dispatched? In future, allow the user
31653 to specify a dispatch priority next to the version. */
31654 qsort (function_version_info, actual_versions,
31655 sizeof (struct _function_version_info), feature_compare);
31656
31657 for (i = 0; i < actual_versions; ++i)
31658 *empty_bb = add_condition_to_bb (dispatch_decl,
31659 function_version_info[i].version_decl,
31660 function_version_info[i].predicate_chain,
31661 *empty_bb);
31662
31663 /* dispatch default version at the end. */
31664 *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
31665 NULL, *empty_bb);
31666
31667 free (function_version_info);
31668 return 0;
31669 }
31670
31671 /* Comparator function to be used in qsort routine to sort attribute
31672 specification strings to "target". */
31673
31674 static int
31675 attr_strcmp (const void *v1, const void *v2)
31676 {
31677 const char *c1 = *(char *const*)v1;
31678 const char *c2 = *(char *const*)v2;
31679 return strcmp (c1, c2);
31680 }
31681
31682 /* ARGLIST is the argument to target attribute. This function tokenizes
31683 the comma separated arguments, sorts them and returns a string which
31684 is a unique identifier for the comma separated arguments. It also
31685 replaces non-identifier characters "=,-" with "_". */
31686
31687 static char *
31688 sorted_attr_string (tree arglist)
31689 {
31690 tree arg;
31691 size_t str_len_sum = 0;
31692 char **args = NULL;
31693 char *attr_str, *ret_str;
31694 char *attr = NULL;
31695 unsigned int argnum = 1;
31696 unsigned int i;
31697
31698 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31699 {
31700 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31701 size_t len = strlen (str);
31702 str_len_sum += len + 1;
31703 if (arg != arglist)
31704 argnum++;
31705 for (i = 0; i < strlen (str); i++)
31706 if (str[i] == ',')
31707 argnum++;
31708 }
31709
31710 attr_str = XNEWVEC (char, str_len_sum);
31711 str_len_sum = 0;
31712 for (arg = arglist; arg; arg = TREE_CHAIN (arg))
31713 {
31714 const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
31715 size_t len = strlen (str);
31716 memcpy (attr_str + str_len_sum, str, len);
31717 attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
31718 str_len_sum += len + 1;
31719 }
31720
31721 /* Replace "=,-" with "_". */
31722 for (i = 0; i < strlen (attr_str); i++)
31723 if (attr_str[i] == '=' || attr_str[i]== '-')
31724 attr_str[i] = '_';
31725
31726 if (argnum == 1)
31727 return attr_str;
31728
31729 args = XNEWVEC (char *, argnum);
31730
31731 i = 0;
31732 attr = strtok (attr_str, ",");
31733 while (attr != NULL)
31734 {
31735 args[i] = attr;
31736 i++;
31737 attr = strtok (NULL, ",");
31738 }
31739
31740 qsort (args, argnum, sizeof (char *), attr_strcmp);
31741
31742 ret_str = XNEWVEC (char, str_len_sum);
31743 str_len_sum = 0;
31744 for (i = 0; i < argnum; i++)
31745 {
31746 size_t len = strlen (args[i]);
31747 memcpy (ret_str + str_len_sum, args[i], len);
31748 ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
31749 str_len_sum += len + 1;
31750 }
31751
31752 XDELETEVEC (args);
31753 XDELETEVEC (attr_str);
31754 return ret_str;
31755 }
31756
31757 /* This function changes the assembler name for functions that are
31758 versions. If DECL is a function version and has a "target"
31759 attribute, it appends the attribute string to its assembler name. */
31760
31761 static tree
31762 ix86_mangle_function_version_assembler_name (tree decl, tree id)
31763 {
31764 tree version_attr;
31765 const char *orig_name, *version_string;
31766 char *attr_str, *assembler_name;
31767
31768 if (DECL_DECLARED_INLINE_P (decl)
31769 && lookup_attribute ("gnu_inline",
31770 DECL_ATTRIBUTES (decl)))
31771 error_at (DECL_SOURCE_LOCATION (decl),
31772 "Function versions cannot be marked as gnu_inline,"
31773 " bodies have to be generated");
31774
31775 if (DECL_VIRTUAL_P (decl)
31776 || DECL_VINDEX (decl))
31777 sorry ("Virtual function multiversioning not supported");
31778
31779 version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31780
31781 /* target attribute string cannot be NULL. */
31782 gcc_assert (version_attr != NULL_TREE);
31783
31784 orig_name = IDENTIFIER_POINTER (id);
31785 version_string
31786 = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
31787
31788 if (strcmp (version_string, "default") == 0)
31789 return id;
31790
31791 attr_str = sorted_attr_string (TREE_VALUE (version_attr));
31792 assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
31793
31794 sprintf (assembler_name, "%s.%s", orig_name, attr_str);
31795
31796 /* Allow assembler name to be modified if already set. */
31797 if (DECL_ASSEMBLER_NAME_SET_P (decl))
31798 SET_DECL_RTL (decl, NULL);
31799
31800 tree ret = get_identifier (assembler_name);
31801 XDELETEVEC (attr_str);
31802 XDELETEVEC (assembler_name);
31803 return ret;
31804 }
31805
31806 /* This function returns true if FN1 and FN2 are versions of the same function,
31807 that is, the target strings of the function decls are different. This assumes
31808 that FN1 and FN2 have the same signature. */
31809
31810 static bool
31811 ix86_function_versions (tree fn1, tree fn2)
31812 {
31813 tree attr1, attr2;
31814 char *target1, *target2;
31815 bool result;
31816
31817 if (TREE_CODE (fn1) != FUNCTION_DECL
31818 || TREE_CODE (fn2) != FUNCTION_DECL)
31819 return false;
31820
31821 attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
31822 attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
31823
31824 /* At least one function decl should have the target attribute specified. */
31825 if (attr1 == NULL_TREE && attr2 == NULL_TREE)
31826 return false;
31827
31828 /* Diagnose missing target attribute if one of the decls is already
31829 multi-versioned. */
31830 if (attr1 == NULL_TREE || attr2 == NULL_TREE)
31831 {
31832 if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
31833 {
31834 if (attr2 != NULL_TREE)
31835 {
31836 tree tem = fn1;
31837 fn1 = fn2;
31838 fn2 = tem;
31839 attr1 = attr2;
31840 }
31841 error_at (DECL_SOURCE_LOCATION (fn2),
31842 "missing %<target%> attribute for multi-versioned %D",
31843 fn2);
31844 inform (DECL_SOURCE_LOCATION (fn1),
31845 "previous declaration of %D", fn1);
31846 /* Prevent diagnosing of the same error multiple times. */
31847 DECL_ATTRIBUTES (fn2)
31848 = tree_cons (get_identifier ("target"),
31849 copy_node (TREE_VALUE (attr1)),
31850 DECL_ATTRIBUTES (fn2));
31851 }
31852 return false;
31853 }
31854
31855 target1 = sorted_attr_string (TREE_VALUE (attr1));
31856 target2 = sorted_attr_string (TREE_VALUE (attr2));
31857
31858 /* The sorted target strings must be different for fn1 and fn2
31859 to be versions. */
31860 if (strcmp (target1, target2) == 0)
31861 result = false;
31862 else
31863 result = true;
31864
31865 XDELETEVEC (target1);
31866 XDELETEVEC (target2);
31867
31868 return result;
31869 }
31870
31871 static tree
31872 ix86_mangle_decl_assembler_name (tree decl, tree id)
31873 {
31874 /* For function version, add the target suffix to the assembler name. */
31875 if (TREE_CODE (decl) == FUNCTION_DECL
31876 && DECL_FUNCTION_VERSIONED (decl))
31877 id = ix86_mangle_function_version_assembler_name (decl, id);
31878 #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
31879 id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
31880 #endif
31881
31882 return id;
31883 }
31884
31885 /* Return a new name by appending SUFFIX to the DECL name. If make_unique
31886 is true, append the full path name of the source file. */
31887
31888 static char *
31889 make_name (tree decl, const char *suffix, bool make_unique)
31890 {
31891 char *global_var_name;
31892 int name_len;
31893 const char *name;
31894 const char *unique_name = NULL;
31895
31896 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
31897
31898 /* Get a unique name that can be used globally without any chances
31899 of collision at link time. */
31900 if (make_unique)
31901 unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
31902
31903 name_len = strlen (name) + strlen (suffix) + 2;
31904
31905 if (make_unique)
31906 name_len += strlen (unique_name) + 1;
31907 global_var_name = XNEWVEC (char, name_len);
31908
31909 /* Use '.' to concatenate names as it is demangler friendly. */
31910 if (make_unique)
31911 snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
31912 suffix);
31913 else
31914 snprintf (global_var_name, name_len, "%s.%s", name, suffix);
31915
31916 return global_var_name;
31917 }
31918
31919 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
31920
31921 /* Make a dispatcher declaration for the multi-versioned function DECL.
31922 Calls to DECL function will be replaced with calls to the dispatcher
31923 by the front-end. Return the decl created. */
31924
31925 static tree
31926 make_dispatcher_decl (const tree decl)
31927 {
31928 tree func_decl;
31929 char *func_name;
31930 tree fn_type, func_type;
31931 bool is_uniq = false;
31932
31933 if (TREE_PUBLIC (decl) == 0)
31934 is_uniq = true;
31935
31936 func_name = make_name (decl, "ifunc", is_uniq);
31937
31938 fn_type = TREE_TYPE (decl);
31939 func_type = build_function_type (TREE_TYPE (fn_type),
31940 TYPE_ARG_TYPES (fn_type));
31941
31942 func_decl = build_fn_decl (func_name, func_type);
31943 XDELETEVEC (func_name);
31944 TREE_USED (func_decl) = 1;
31945 DECL_CONTEXT (func_decl) = NULL_TREE;
31946 DECL_INITIAL (func_decl) = error_mark_node;
31947 DECL_ARTIFICIAL (func_decl) = 1;
31948 /* Mark this func as external, the resolver will flip it again if
31949 it gets generated. */
31950 DECL_EXTERNAL (func_decl) = 1;
31951 /* This will be of type IFUNCs have to be externally visible. */
31952 TREE_PUBLIC (func_decl) = 1;
31953
31954 return func_decl;
31955 }
31956
31957 #endif
31958
31959 /* Returns true if decl is multi-versioned and DECL is the default function,
31960 that is it is not tagged with target specific optimization. */
31961
31962 static bool
31963 is_function_default_version (const tree decl)
31964 {
31965 if (TREE_CODE (decl) != FUNCTION_DECL
31966 || !DECL_FUNCTION_VERSIONED (decl))
31967 return false;
31968 tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
31969 gcc_assert (attr);
31970 attr = TREE_VALUE (TREE_VALUE (attr));
31971 return (TREE_CODE (attr) == STRING_CST
31972 && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
31973 }
31974
31975 /* Make a dispatcher declaration for the multi-versioned function DECL.
31976 Calls to DECL function will be replaced with calls to the dispatcher
31977 by the front-end. Returns the decl of the dispatcher function. */
31978
31979 static tree
31980 ix86_get_function_versions_dispatcher (void *decl)
31981 {
31982 tree fn = (tree) decl;
31983 struct cgraph_node *node = NULL;
31984 struct cgraph_node *default_node = NULL;
31985 struct cgraph_function_version_info *node_v = NULL;
31986 struct cgraph_function_version_info *first_v = NULL;
31987
31988 tree dispatch_decl = NULL;
31989
31990 struct cgraph_function_version_info *default_version_info = NULL;
31991
31992 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
31993
31994 node = cgraph_get_node (fn);
31995 gcc_assert (node != NULL);
31996
31997 node_v = get_cgraph_node_version (node);
31998 gcc_assert (node_v != NULL);
31999
32000 if (node_v->dispatcher_resolver != NULL)
32001 return node_v->dispatcher_resolver;
32002
32003 /* Find the default version and make it the first node. */
32004 first_v = node_v;
32005 /* Go to the beginning of the chain. */
32006 while (first_v->prev != NULL)
32007 first_v = first_v->prev;
32008 default_version_info = first_v;
32009 while (default_version_info != NULL)
32010 {
32011 if (is_function_default_version
32012 (default_version_info->this_node->decl))
32013 break;
32014 default_version_info = default_version_info->next;
32015 }
32016
32017 /* If there is no default node, just return NULL. */
32018 if (default_version_info == NULL)
32019 return NULL;
32020
32021 /* Make default info the first node. */
32022 if (first_v != default_version_info)
32023 {
32024 default_version_info->prev->next = default_version_info->next;
32025 if (default_version_info->next)
32026 default_version_info->next->prev = default_version_info->prev;
32027 first_v->prev = default_version_info;
32028 default_version_info->next = first_v;
32029 default_version_info->prev = NULL;
32030 }
32031
32032 default_node = default_version_info->this_node;
32033
32034 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
32035 if (targetm.has_ifunc_p ())
32036 {
32037 struct cgraph_function_version_info *it_v = NULL;
32038 struct cgraph_node *dispatcher_node = NULL;
32039 struct cgraph_function_version_info *dispatcher_version_info = NULL;
32040
32041 /* Right now, the dispatching is done via ifunc. */
32042 dispatch_decl = make_dispatcher_decl (default_node->decl);
32043
32044 dispatcher_node = cgraph_get_create_node (dispatch_decl);
32045 gcc_assert (dispatcher_node != NULL);
32046 dispatcher_node->dispatcher_function = 1;
32047 dispatcher_version_info
32048 = insert_new_cgraph_node_version (dispatcher_node);
32049 dispatcher_version_info->next = default_version_info;
32050 dispatcher_node->definition = 1;
32051
32052 /* Set the dispatcher for all the versions. */
32053 it_v = default_version_info;
32054 while (it_v != NULL)
32055 {
32056 it_v->dispatcher_resolver = dispatch_decl;
32057 it_v = it_v->next;
32058 }
32059 }
32060 else
32061 #endif
32062 {
32063 error_at (DECL_SOURCE_LOCATION (default_node->decl),
32064 "multiversioning needs ifunc which is not supported "
32065 "on this target");
32066 }
32067
32068 return dispatch_decl;
32069 }
32070
32071 /* Makes a function attribute of the form NAME(ARG_NAME) and chains
32072 it to CHAIN. */
32073
32074 static tree
32075 make_attribute (const char *name, const char *arg_name, tree chain)
32076 {
32077 tree attr_name;
32078 tree attr_arg_name;
32079 tree attr_args;
32080 tree attr;
32081
32082 attr_name = get_identifier (name);
32083 attr_arg_name = build_string (strlen (arg_name), arg_name);
32084 attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
32085 attr = tree_cons (attr_name, attr_args, chain);
32086 return attr;
32087 }
32088
32089 /* Make the resolver function decl to dispatch the versions of
32090 a multi-versioned function, DEFAULT_DECL. Create an
32091 empty basic block in the resolver and store the pointer in
32092 EMPTY_BB. Return the decl of the resolver function. */
32093
32094 static tree
32095 make_resolver_func (const tree default_decl,
32096 const tree dispatch_decl,
32097 basic_block *empty_bb)
32098 {
32099 char *resolver_name;
32100 tree decl, type, decl_name, t;
32101 bool is_uniq = false;
32102
32103 /* IFUNC's have to be globally visible. So, if the default_decl is
32104 not, then the name of the IFUNC should be made unique. */
32105 if (TREE_PUBLIC (default_decl) == 0)
32106 is_uniq = true;
32107
32108 /* Append the filename to the resolver function if the versions are
32109 not externally visible. This is because the resolver function has
32110 to be externally visible for the loader to find it. So, appending
32111 the filename will prevent conflicts with a resolver function from
32112 another module which is based on the same version name. */
32113 resolver_name = make_name (default_decl, "resolver", is_uniq);
32114
32115 /* The resolver function should return a (void *). */
32116 type = build_function_type_list (ptr_type_node, NULL_TREE);
32117
32118 decl = build_fn_decl (resolver_name, type);
32119 decl_name = get_identifier (resolver_name);
32120 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
32121
32122 DECL_NAME (decl) = decl_name;
32123 TREE_USED (decl) = 1;
32124 DECL_ARTIFICIAL (decl) = 1;
32125 DECL_IGNORED_P (decl) = 0;
32126 /* IFUNC resolvers have to be externally visible. */
32127 TREE_PUBLIC (decl) = 1;
32128 DECL_UNINLINABLE (decl) = 1;
32129
32130 /* Resolver is not external, body is generated. */
32131 DECL_EXTERNAL (decl) = 0;
32132 DECL_EXTERNAL (dispatch_decl) = 0;
32133
32134 DECL_CONTEXT (decl) = NULL_TREE;
32135 DECL_INITIAL (decl) = make_node (BLOCK);
32136 DECL_STATIC_CONSTRUCTOR (decl) = 0;
32137
32138 if (DECL_COMDAT_GROUP (default_decl)
32139 || TREE_PUBLIC (default_decl))
32140 {
32141 /* In this case, each translation unit with a call to this
32142 versioned function will put out a resolver. Ensure it
32143 is comdat to keep just one copy. */
32144 DECL_COMDAT (decl) = 1;
32145 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
32146 }
32147 /* Build result decl and add to function_decl. */
32148 t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
32149 DECL_ARTIFICIAL (t) = 1;
32150 DECL_IGNORED_P (t) = 1;
32151 DECL_RESULT (decl) = t;
32152
32153 gimplify_function_tree (decl);
32154 push_cfun (DECL_STRUCT_FUNCTION (decl));
32155 *empty_bb = init_lowered_empty_function (decl, false);
32156
32157 cgraph_add_new_function (decl, true);
32158 cgraph_call_function_insertion_hooks (cgraph_get_create_node (decl));
32159
32160 pop_cfun ();
32161
32162 gcc_assert (dispatch_decl != NULL);
32163 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
32164 DECL_ATTRIBUTES (dispatch_decl)
32165 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
32166
32167 /* Create the alias for dispatch to resolver here. */
32168 /*cgraph_create_function_alias (dispatch_decl, decl);*/
32169 cgraph_same_body_alias (NULL, dispatch_decl, decl);
32170 XDELETEVEC (resolver_name);
32171 return decl;
32172 }
32173
32174 /* Generate the dispatching code body to dispatch multi-versioned function
32175 DECL. The target hook is called to process the "target" attributes and
32176 provide the code to dispatch the right function at run-time. NODE points
32177 to the dispatcher decl whose body will be created. */
32178
32179 static tree
32180 ix86_generate_version_dispatcher_body (void *node_p)
32181 {
32182 tree resolver_decl;
32183 basic_block empty_bb;
32184 tree default_ver_decl;
32185 struct cgraph_node *versn;
32186 struct cgraph_node *node;
32187
32188 struct cgraph_function_version_info *node_version_info = NULL;
32189 struct cgraph_function_version_info *versn_info = NULL;
32190
32191 node = (cgraph_node *)node_p;
32192
32193 node_version_info = get_cgraph_node_version (node);
32194 gcc_assert (node->dispatcher_function
32195 && node_version_info != NULL);
32196
32197 if (node_version_info->dispatcher_resolver)
32198 return node_version_info->dispatcher_resolver;
32199
32200 /* The first version in the chain corresponds to the default version. */
32201 default_ver_decl = node_version_info->next->this_node->decl;
32202
32203 /* node is going to be an alias, so remove the finalized bit. */
32204 node->definition = false;
32205
32206 resolver_decl = make_resolver_func (default_ver_decl,
32207 node->decl, &empty_bb);
32208
32209 node_version_info->dispatcher_resolver = resolver_decl;
32210
32211 push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
32212
32213 auto_vec<tree, 2> fn_ver_vec;
32214
32215 for (versn_info = node_version_info->next; versn_info;
32216 versn_info = versn_info->next)
32217 {
32218 versn = versn_info->this_node;
32219 /* Check for virtual functions here again, as by this time it should
32220 have been determined if this function needs a vtable index or
32221 not. This happens for methods in derived classes that override
32222 virtual methods in base classes but are not explicitly marked as
32223 virtual. */
32224 if (DECL_VINDEX (versn->decl))
32225 sorry ("Virtual function multiversioning not supported");
32226
32227 fn_ver_vec.safe_push (versn->decl);
32228 }
32229
32230 dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
32231 rebuild_cgraph_edges ();
32232 pop_cfun ();
32233 return resolver_decl;
32234 }
32235 /* This builds the processor_model struct type defined in
32236 libgcc/config/i386/cpuinfo.c */
32237
32238 static tree
32239 build_processor_model_struct (void)
32240 {
32241 const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
32242 "__cpu_features"};
32243 tree field = NULL_TREE, field_chain = NULL_TREE;
32244 int i;
32245 tree type = make_node (RECORD_TYPE);
32246
32247 /* The first 3 fields are unsigned int. */
32248 for (i = 0; i < 3; ++i)
32249 {
32250 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32251 get_identifier (field_name[i]), unsigned_type_node);
32252 if (field_chain != NULL_TREE)
32253 DECL_CHAIN (field) = field_chain;
32254 field_chain = field;
32255 }
32256
32257 /* The last field is an array of unsigned integers of size one. */
32258 field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
32259 get_identifier (field_name[3]),
32260 build_array_type (unsigned_type_node,
32261 build_index_type (size_one_node)));
32262 if (field_chain != NULL_TREE)
32263 DECL_CHAIN (field) = field_chain;
32264 field_chain = field;
32265
32266 finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
32267 return type;
32268 }
32269
32270 /* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
32271
32272 static tree
32273 make_var_decl (tree type, const char *name)
32274 {
32275 tree new_decl;
32276
32277 new_decl = build_decl (UNKNOWN_LOCATION,
32278 VAR_DECL,
32279 get_identifier(name),
32280 type);
32281
32282 DECL_EXTERNAL (new_decl) = 1;
32283 TREE_STATIC (new_decl) = 1;
32284 TREE_PUBLIC (new_decl) = 1;
32285 DECL_INITIAL (new_decl) = 0;
32286 DECL_ARTIFICIAL (new_decl) = 0;
32287 DECL_PRESERVE_P (new_decl) = 1;
32288
32289 make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
32290 assemble_variable (new_decl, 0, 0, 0);
32291
32292 return new_decl;
32293 }
32294
32295 /* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
32296 into an integer defined in libgcc/config/i386/cpuinfo.c */
32297
32298 static tree
32299 fold_builtin_cpu (tree fndecl, tree *args)
32300 {
32301 unsigned int i;
32302 enum ix86_builtins fn_code = (enum ix86_builtins)
32303 DECL_FUNCTION_CODE (fndecl);
32304 tree param_string_cst = NULL;
32305
32306 /* This is the order of bit-fields in __processor_features in cpuinfo.c */
32307 enum processor_features
32308 {
32309 F_CMOV = 0,
32310 F_MMX,
32311 F_POPCNT,
32312 F_SSE,
32313 F_SSE2,
32314 F_SSE3,
32315 F_SSSE3,
32316 F_SSE4_1,
32317 F_SSE4_2,
32318 F_AVX,
32319 F_AVX2,
32320 F_SSE4_A,
32321 F_FMA4,
32322 F_XOP,
32323 F_FMA,
32324 F_MAX
32325 };
32326
32327 /* These are the values for vendor types and cpu types and subtypes
32328 in cpuinfo.c. Cpu types and subtypes should be subtracted by
32329 the corresponding start value. */
32330 enum processor_model
32331 {
32332 M_INTEL = 1,
32333 M_AMD,
32334 M_CPU_TYPE_START,
32335 M_INTEL_BONNELL,
32336 M_INTEL_CORE2,
32337 M_INTEL_COREI7,
32338 M_AMDFAM10H,
32339 M_AMDFAM15H,
32340 M_INTEL_SILVERMONT,
32341 M_AMD_BTVER1,
32342 M_AMD_BTVER2,
32343 M_CPU_SUBTYPE_START,
32344 M_INTEL_COREI7_NEHALEM,
32345 M_INTEL_COREI7_WESTMERE,
32346 M_INTEL_COREI7_SANDYBRIDGE,
32347 M_AMDFAM10H_BARCELONA,
32348 M_AMDFAM10H_SHANGHAI,
32349 M_AMDFAM10H_ISTANBUL,
32350 M_AMDFAM15H_BDVER1,
32351 M_AMDFAM15H_BDVER2,
32352 M_AMDFAM15H_BDVER3,
32353 M_AMDFAM15H_BDVER4,
32354 M_INTEL_COREI7_IVYBRIDGE,
32355 M_INTEL_COREI7_HASWELL
32356 };
32357
32358 static struct _arch_names_table
32359 {
32360 const char *const name;
32361 const enum processor_model model;
32362 }
32363 const arch_names_table[] =
32364 {
32365 {"amd", M_AMD},
32366 {"intel", M_INTEL},
32367 {"atom", M_INTEL_BONNELL},
32368 {"slm", M_INTEL_SILVERMONT},
32369 {"core2", M_INTEL_CORE2},
32370 {"corei7", M_INTEL_COREI7},
32371 {"nehalem", M_INTEL_COREI7_NEHALEM},
32372 {"westmere", M_INTEL_COREI7_WESTMERE},
32373 {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
32374 {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
32375 {"haswell", M_INTEL_COREI7_HASWELL},
32376 {"bonnell", M_INTEL_BONNELL},
32377 {"silvermont", M_INTEL_SILVERMONT},
32378 {"amdfam10h", M_AMDFAM10H},
32379 {"barcelona", M_AMDFAM10H_BARCELONA},
32380 {"shanghai", M_AMDFAM10H_SHANGHAI},
32381 {"istanbul", M_AMDFAM10H_ISTANBUL},
32382 {"btver1", M_AMD_BTVER1},
32383 {"amdfam15h", M_AMDFAM15H},
32384 {"bdver1", M_AMDFAM15H_BDVER1},
32385 {"bdver2", M_AMDFAM15H_BDVER2},
32386 {"bdver3", M_AMDFAM15H_BDVER3},
32387 {"bdver4", M_AMDFAM15H_BDVER4},
32388 {"btver2", M_AMD_BTVER2},
32389 };
32390
32391 static struct _isa_names_table
32392 {
32393 const char *const name;
32394 const enum processor_features feature;
32395 }
32396 const isa_names_table[] =
32397 {
32398 {"cmov", F_CMOV},
32399 {"mmx", F_MMX},
32400 {"popcnt", F_POPCNT},
32401 {"sse", F_SSE},
32402 {"sse2", F_SSE2},
32403 {"sse3", F_SSE3},
32404 {"ssse3", F_SSSE3},
32405 {"sse4a", F_SSE4_A},
32406 {"sse4.1", F_SSE4_1},
32407 {"sse4.2", F_SSE4_2},
32408 {"avx", F_AVX},
32409 {"fma4", F_FMA4},
32410 {"xop", F_XOP},
32411 {"fma", F_FMA},
32412 {"avx2", F_AVX2}
32413 };
32414
32415 tree __processor_model_type = build_processor_model_struct ();
32416 tree __cpu_model_var = make_var_decl (__processor_model_type,
32417 "__cpu_model");
32418
32419
32420 varpool_add_new_variable (__cpu_model_var);
32421
32422 gcc_assert ((args != NULL) && (*args != NULL));
32423
32424 param_string_cst = *args;
32425 while (param_string_cst
32426 && TREE_CODE (param_string_cst) != STRING_CST)
32427 {
32428 /* *args must be a expr that can contain other EXPRS leading to a
32429 STRING_CST. */
32430 if (!EXPR_P (param_string_cst))
32431 {
32432 error ("Parameter to builtin must be a string constant or literal");
32433 return integer_zero_node;
32434 }
32435 param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
32436 }
32437
32438 gcc_assert (param_string_cst);
32439
32440 if (fn_code == IX86_BUILTIN_CPU_IS)
32441 {
32442 tree ref;
32443 tree field;
32444 tree final;
32445
32446 unsigned int field_val = 0;
32447 unsigned int NUM_ARCH_NAMES
32448 = sizeof (arch_names_table) / sizeof (struct _arch_names_table);
32449
32450 for (i = 0; i < NUM_ARCH_NAMES; i++)
32451 if (strcmp (arch_names_table[i].name,
32452 TREE_STRING_POINTER (param_string_cst)) == 0)
32453 break;
32454
32455 if (i == NUM_ARCH_NAMES)
32456 {
32457 error ("Parameter to builtin not valid: %s",
32458 TREE_STRING_POINTER (param_string_cst));
32459 return integer_zero_node;
32460 }
32461
32462 field = TYPE_FIELDS (__processor_model_type);
32463 field_val = arch_names_table[i].model;
32464
32465 /* CPU types are stored in the next field. */
32466 if (field_val > M_CPU_TYPE_START
32467 && field_val < M_CPU_SUBTYPE_START)
32468 {
32469 field = DECL_CHAIN (field);
32470 field_val -= M_CPU_TYPE_START;
32471 }
32472
32473 /* CPU subtypes are stored in the next field. */
32474 if (field_val > M_CPU_SUBTYPE_START)
32475 {
32476 field = DECL_CHAIN ( DECL_CHAIN (field));
32477 field_val -= M_CPU_SUBTYPE_START;
32478 }
32479
32480 /* Get the appropriate field in __cpu_model. */
32481 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32482 field, NULL_TREE);
32483
32484 /* Check the value. */
32485 final = build2 (EQ_EXPR, unsigned_type_node, ref,
32486 build_int_cstu (unsigned_type_node, field_val));
32487 return build1 (CONVERT_EXPR, integer_type_node, final);
32488 }
32489 else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32490 {
32491 tree ref;
32492 tree array_elt;
32493 tree field;
32494 tree final;
32495
32496 unsigned int field_val = 0;
32497 unsigned int NUM_ISA_NAMES
32498 = sizeof (isa_names_table) / sizeof (struct _isa_names_table);
32499
32500 for (i = 0; i < NUM_ISA_NAMES; i++)
32501 if (strcmp (isa_names_table[i].name,
32502 TREE_STRING_POINTER (param_string_cst)) == 0)
32503 break;
32504
32505 if (i == NUM_ISA_NAMES)
32506 {
32507 error ("Parameter to builtin not valid: %s",
32508 TREE_STRING_POINTER (param_string_cst));
32509 return integer_zero_node;
32510 }
32511
32512 field = TYPE_FIELDS (__processor_model_type);
32513 /* Get the last field, which is __cpu_features. */
32514 while (DECL_CHAIN (field))
32515 field = DECL_CHAIN (field);
32516
32517 /* Get the appropriate field: __cpu_model.__cpu_features */
32518 ref = build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
32519 field, NULL_TREE);
32520
32521 /* Access the 0th element of __cpu_features array. */
32522 array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
32523 integer_zero_node, NULL_TREE, NULL_TREE);
32524
32525 field_val = (1 << isa_names_table[i].feature);
32526 /* Return __cpu_model.__cpu_features[0] & field_val */
32527 final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
32528 build_int_cstu (unsigned_type_node, field_val));
32529 return build1 (CONVERT_EXPR, integer_type_node, final);
32530 }
32531 gcc_unreachable ();
32532 }
32533
32534 static tree
32535 ix86_fold_builtin (tree fndecl, int n_args,
32536 tree *args, bool ignore ATTRIBUTE_UNUSED)
32537 {
32538 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
32539 {
32540 enum ix86_builtins fn_code = (enum ix86_builtins)
32541 DECL_FUNCTION_CODE (fndecl);
32542 if (fn_code == IX86_BUILTIN_CPU_IS
32543 || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
32544 {
32545 gcc_assert (n_args == 1);
32546 return fold_builtin_cpu (fndecl, args);
32547 }
32548 }
32549
32550 #ifdef SUBTARGET_FOLD_BUILTIN
32551 return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
32552 #endif
32553
32554 return NULL_TREE;
32555 }
32556
32557 /* Make builtins to detect cpu type and features supported. NAME is
32558 the builtin name, CODE is the builtin code, and FTYPE is the function
32559 type of the builtin. */
32560
32561 static void
32562 make_cpu_type_builtin (const char* name, int code,
32563 enum ix86_builtin_func_type ftype, bool is_const)
32564 {
32565 tree decl;
32566 tree type;
32567
32568 type = ix86_get_builtin_func_type (ftype);
32569 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
32570 NULL, NULL_TREE);
32571 gcc_assert (decl != NULL_TREE);
32572 ix86_builtins[(int) code] = decl;
32573 TREE_READONLY (decl) = is_const;
32574 }
32575
32576 /* Make builtins to get CPU type and features supported. The created
32577 builtins are :
32578
32579 __builtin_cpu_init (), to detect cpu type and features,
32580 __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
32581 __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
32582 */
32583
32584 static void
32585 ix86_init_platform_type_builtins (void)
32586 {
32587 make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
32588 INT_FTYPE_VOID, false);
32589 make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
32590 INT_FTYPE_PCCHAR, true);
32591 make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
32592 INT_FTYPE_PCCHAR, true);
32593 }
32594
32595 /* Internal method for ix86_init_builtins. */
32596
32597 static void
32598 ix86_init_builtins_va_builtins_abi (void)
32599 {
32600 tree ms_va_ref, sysv_va_ref;
32601 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
32602 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
32603 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
32604 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
32605
32606 if (!TARGET_64BIT)
32607 return;
32608 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
32609 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
32610 ms_va_ref = build_reference_type (ms_va_list_type_node);
32611 sysv_va_ref =
32612 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
32613
32614 fnvoid_va_end_ms =
32615 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32616 fnvoid_va_start_ms =
32617 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
32618 fnvoid_va_end_sysv =
32619 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
32620 fnvoid_va_start_sysv =
32621 build_varargs_function_type_list (void_type_node, sysv_va_ref,
32622 NULL_TREE);
32623 fnvoid_va_copy_ms =
32624 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
32625 NULL_TREE);
32626 fnvoid_va_copy_sysv =
32627 build_function_type_list (void_type_node, sysv_va_ref,
32628 sysv_va_ref, NULL_TREE);
32629
32630 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
32631 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
32632 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
32633 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
32634 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
32635 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
32636 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
32637 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32638 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
32639 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32640 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
32641 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
32642 }
32643
32644 static void
32645 ix86_init_builtin_types (void)
32646 {
32647 tree float128_type_node, float80_type_node;
32648
32649 /* The __float80 type. */
32650 float80_type_node = long_double_type_node;
32651 if (TYPE_MODE (float80_type_node) != XFmode)
32652 {
32653 /* The __float80 type. */
32654 float80_type_node = make_node (REAL_TYPE);
32655
32656 TYPE_PRECISION (float80_type_node) = 80;
32657 layout_type (float80_type_node);
32658 }
32659 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
32660
32661 /* The __float128 type. */
32662 float128_type_node = make_node (REAL_TYPE);
32663 TYPE_PRECISION (float128_type_node) = 128;
32664 layout_type (float128_type_node);
32665 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
32666
32667 /* This macro is built by i386-builtin-types.awk. */
32668 DEFINE_BUILTIN_PRIMITIVE_TYPES;
32669 }
32670
32671 static void
32672 ix86_init_builtins (void)
32673 {
32674 tree t;
32675
32676 ix86_init_builtin_types ();
32677
32678 /* Builtins to get CPU type and features. */
32679 ix86_init_platform_type_builtins ();
32680
32681 /* TFmode support builtins. */
32682 def_builtin_const (0, "__builtin_infq",
32683 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
32684 def_builtin_const (0, "__builtin_huge_valq",
32685 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
32686
32687 /* We will expand them to normal call if SSE isn't available since
32688 they are used by libgcc. */
32689 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
32690 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
32691 BUILT_IN_MD, "__fabstf2", NULL_TREE);
32692 TREE_READONLY (t) = 1;
32693 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
32694
32695 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
32696 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
32697 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
32698 TREE_READONLY (t) = 1;
32699 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
32700
32701 ix86_init_tm_builtins ();
32702 ix86_init_mmx_sse_builtins ();
32703
32704 if (TARGET_LP64)
32705 ix86_init_builtins_va_builtins_abi ();
32706
32707 #ifdef SUBTARGET_INIT_BUILTINS
32708 SUBTARGET_INIT_BUILTINS;
32709 #endif
32710 }
32711
32712 /* Return the ix86 builtin for CODE. */
32713
32714 static tree
32715 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
32716 {
32717 if (code >= IX86_BUILTIN_MAX)
32718 return error_mark_node;
32719
32720 return ix86_builtins[code];
32721 }
32722
32723 /* Errors in the source file can cause expand_expr to return const0_rtx
32724 where we expect a vector. To avoid crashing, use one of the vector
32725 clear instructions. */
32726 static rtx
32727 safe_vector_operand (rtx x, enum machine_mode mode)
32728 {
32729 if (x == const0_rtx)
32730 x = CONST0_RTX (mode);
32731 return x;
32732 }
32733
32734 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
32735
32736 static rtx
32737 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
32738 {
32739 rtx pat;
32740 tree arg0 = CALL_EXPR_ARG (exp, 0);
32741 tree arg1 = CALL_EXPR_ARG (exp, 1);
32742 rtx op0 = expand_normal (arg0);
32743 rtx op1 = expand_normal (arg1);
32744 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32745 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
32746 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
32747
32748 if (VECTOR_MODE_P (mode0))
32749 op0 = safe_vector_operand (op0, mode0);
32750 if (VECTOR_MODE_P (mode1))
32751 op1 = safe_vector_operand (op1, mode1);
32752
32753 if (optimize || !target
32754 || GET_MODE (target) != tmode
32755 || !insn_data[icode].operand[0].predicate (target, tmode))
32756 target = gen_reg_rtx (tmode);
32757
32758 if (GET_MODE (op1) == SImode && mode1 == TImode)
32759 {
32760 rtx x = gen_reg_rtx (V4SImode);
32761 emit_insn (gen_sse2_loadd (x, op1));
32762 op1 = gen_lowpart (TImode, x);
32763 }
32764
32765 if (!insn_data[icode].operand[1].predicate (op0, mode0))
32766 op0 = copy_to_mode_reg (mode0, op0);
32767 if (!insn_data[icode].operand[2].predicate (op1, mode1))
32768 op1 = copy_to_mode_reg (mode1, op1);
32769
32770 pat = GEN_FCN (icode) (target, op0, op1);
32771 if (! pat)
32772 return 0;
32773
32774 emit_insn (pat);
32775
32776 return target;
32777 }
32778
32779 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
32780
32781 static rtx
32782 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
32783 enum ix86_builtin_func_type m_type,
32784 enum rtx_code sub_code)
32785 {
32786 rtx pat;
32787 int i;
32788 int nargs;
32789 bool comparison_p = false;
32790 bool tf_p = false;
32791 bool last_arg_constant = false;
32792 int num_memory = 0;
32793 struct {
32794 rtx op;
32795 enum machine_mode mode;
32796 } args[4];
32797
32798 enum machine_mode tmode = insn_data[icode].operand[0].mode;
32799
32800 switch (m_type)
32801 {
32802 case MULTI_ARG_4_DF2_DI_I:
32803 case MULTI_ARG_4_DF2_DI_I1:
32804 case MULTI_ARG_4_SF2_SI_I:
32805 case MULTI_ARG_4_SF2_SI_I1:
32806 nargs = 4;
32807 last_arg_constant = true;
32808 break;
32809
32810 case MULTI_ARG_3_SF:
32811 case MULTI_ARG_3_DF:
32812 case MULTI_ARG_3_SF2:
32813 case MULTI_ARG_3_DF2:
32814 case MULTI_ARG_3_DI:
32815 case MULTI_ARG_3_SI:
32816 case MULTI_ARG_3_SI_DI:
32817 case MULTI_ARG_3_HI:
32818 case MULTI_ARG_3_HI_SI:
32819 case MULTI_ARG_3_QI:
32820 case MULTI_ARG_3_DI2:
32821 case MULTI_ARG_3_SI2:
32822 case MULTI_ARG_3_HI2:
32823 case MULTI_ARG_3_QI2:
32824 nargs = 3;
32825 break;
32826
32827 case MULTI_ARG_2_SF:
32828 case MULTI_ARG_2_DF:
32829 case MULTI_ARG_2_DI:
32830 case MULTI_ARG_2_SI:
32831 case MULTI_ARG_2_HI:
32832 case MULTI_ARG_2_QI:
32833 nargs = 2;
32834 break;
32835
32836 case MULTI_ARG_2_DI_IMM:
32837 case MULTI_ARG_2_SI_IMM:
32838 case MULTI_ARG_2_HI_IMM:
32839 case MULTI_ARG_2_QI_IMM:
32840 nargs = 2;
32841 last_arg_constant = true;
32842 break;
32843
32844 case MULTI_ARG_1_SF:
32845 case MULTI_ARG_1_DF:
32846 case MULTI_ARG_1_SF2:
32847 case MULTI_ARG_1_DF2:
32848 case MULTI_ARG_1_DI:
32849 case MULTI_ARG_1_SI:
32850 case MULTI_ARG_1_HI:
32851 case MULTI_ARG_1_QI:
32852 case MULTI_ARG_1_SI_DI:
32853 case MULTI_ARG_1_HI_DI:
32854 case MULTI_ARG_1_HI_SI:
32855 case MULTI_ARG_1_QI_DI:
32856 case MULTI_ARG_1_QI_SI:
32857 case MULTI_ARG_1_QI_HI:
32858 nargs = 1;
32859 break;
32860
32861 case MULTI_ARG_2_DI_CMP:
32862 case MULTI_ARG_2_SI_CMP:
32863 case MULTI_ARG_2_HI_CMP:
32864 case MULTI_ARG_2_QI_CMP:
32865 nargs = 2;
32866 comparison_p = true;
32867 break;
32868
32869 case MULTI_ARG_2_SF_TF:
32870 case MULTI_ARG_2_DF_TF:
32871 case MULTI_ARG_2_DI_TF:
32872 case MULTI_ARG_2_SI_TF:
32873 case MULTI_ARG_2_HI_TF:
32874 case MULTI_ARG_2_QI_TF:
32875 nargs = 2;
32876 tf_p = true;
32877 break;
32878
32879 default:
32880 gcc_unreachable ();
32881 }
32882
32883 if (optimize || !target
32884 || GET_MODE (target) != tmode
32885 || !insn_data[icode].operand[0].predicate (target, tmode))
32886 target = gen_reg_rtx (tmode);
32887
32888 gcc_assert (nargs <= 4);
32889
32890 for (i = 0; i < nargs; i++)
32891 {
32892 tree arg = CALL_EXPR_ARG (exp, i);
32893 rtx op = expand_normal (arg);
32894 int adjust = (comparison_p) ? 1 : 0;
32895 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
32896
32897 if (last_arg_constant && i == nargs - 1)
32898 {
32899 if (!insn_data[icode].operand[i + 1].predicate (op, mode))
32900 {
32901 enum insn_code new_icode = icode;
32902 switch (icode)
32903 {
32904 case CODE_FOR_xop_vpermil2v2df3:
32905 case CODE_FOR_xop_vpermil2v4sf3:
32906 case CODE_FOR_xop_vpermil2v4df3:
32907 case CODE_FOR_xop_vpermil2v8sf3:
32908 error ("the last argument must be a 2-bit immediate");
32909 return gen_reg_rtx (tmode);
32910 case CODE_FOR_xop_rotlv2di3:
32911 new_icode = CODE_FOR_rotlv2di3;
32912 goto xop_rotl;
32913 case CODE_FOR_xop_rotlv4si3:
32914 new_icode = CODE_FOR_rotlv4si3;
32915 goto xop_rotl;
32916 case CODE_FOR_xop_rotlv8hi3:
32917 new_icode = CODE_FOR_rotlv8hi3;
32918 goto xop_rotl;
32919 case CODE_FOR_xop_rotlv16qi3:
32920 new_icode = CODE_FOR_rotlv16qi3;
32921 xop_rotl:
32922 if (CONST_INT_P (op))
32923 {
32924 int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
32925 op = GEN_INT (INTVAL (op) & mask);
32926 gcc_checking_assert
32927 (insn_data[icode].operand[i + 1].predicate (op, mode));
32928 }
32929 else
32930 {
32931 gcc_checking_assert
32932 (nargs == 2
32933 && insn_data[new_icode].operand[0].mode == tmode
32934 && insn_data[new_icode].operand[1].mode == tmode
32935 && insn_data[new_icode].operand[2].mode == mode
32936 && insn_data[new_icode].operand[0].predicate
32937 == insn_data[icode].operand[0].predicate
32938 && insn_data[new_icode].operand[1].predicate
32939 == insn_data[icode].operand[1].predicate);
32940 icode = new_icode;
32941 goto non_constant;
32942 }
32943 break;
32944 default:
32945 gcc_unreachable ();
32946 }
32947 }
32948 }
32949 else
32950 {
32951 non_constant:
32952 if (VECTOR_MODE_P (mode))
32953 op = safe_vector_operand (op, mode);
32954
32955 /* If we aren't optimizing, only allow one memory operand to be
32956 generated. */
32957 if (memory_operand (op, mode))
32958 num_memory++;
32959
32960 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
32961
32962 if (optimize
32963 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
32964 || num_memory > 1)
32965 op = force_reg (mode, op);
32966 }
32967
32968 args[i].op = op;
32969 args[i].mode = mode;
32970 }
32971
32972 switch (nargs)
32973 {
32974 case 1:
32975 pat = GEN_FCN (icode) (target, args[0].op);
32976 break;
32977
32978 case 2:
32979 if (tf_p)
32980 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
32981 GEN_INT ((int)sub_code));
32982 else if (! comparison_p)
32983 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
32984 else
32985 {
32986 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
32987 args[0].op,
32988 args[1].op);
32989
32990 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
32991 }
32992 break;
32993
32994 case 3:
32995 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
32996 break;
32997
32998 case 4:
32999 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
33000 break;
33001
33002 default:
33003 gcc_unreachable ();
33004 }
33005
33006 if (! pat)
33007 return 0;
33008
33009 emit_insn (pat);
33010 return target;
33011 }
33012
33013 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
33014 insns with vec_merge. */
33015
33016 static rtx
33017 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
33018 rtx target)
33019 {
33020 rtx pat;
33021 tree arg0 = CALL_EXPR_ARG (exp, 0);
33022 rtx op1, op0 = expand_normal (arg0);
33023 enum machine_mode tmode = insn_data[icode].operand[0].mode;
33024 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
33025
33026 if (optimize || !target
33027 || GET_MODE (target) != tmode
33028 || !insn_data[icode].operand[0].predicate (target, tmode))
33029 target = gen_reg_rtx (tmode);
33030
33031 if (VECTOR_MODE_P (mode0))
33032 op0 = safe_vector_operand (op0, mode0);
33033
33034 if ((optimize && !register_operand (op0, mode0))
33035 || !insn_data[icode].operand[1].predicate (op0, mode0))
33036 op0 = copy_to_mode_reg (mode0, op0);
33037
33038 op1 = op0;
33039 if (!insn_data[icode].operand[2].predicate (op1, mode0))
33040 op1 = copy_to_mode_reg (mode0, op1);
33041
33042 pat = GEN_FCN (icode) (target, op0, op1);
33043 if (! pat)
33044 return 0;
33045 emit_insn (pat);
33046 return target;
33047 }
33048
33049 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
33050
33051 static rtx
33052 ix86_expand_sse_compare (const struct builtin_description *d,
33053 tree exp, rtx target, bool swap)
33054 {
33055 rtx pat;
33056 tree arg0 = CALL_EXPR_ARG (exp, 0);
33057 tree arg1 = CALL_EXPR_ARG (exp, 1);
33058 rtx op0 = expand_normal (arg0);
33059 rtx op1 = expand_normal (arg1);
33060 rtx op2;
33061 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33062 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33063 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33064 enum rtx_code comparison = d->comparison;
33065
33066 if (VECTOR_MODE_P (mode0))
33067 op0 = safe_vector_operand (op0, mode0);
33068 if (VECTOR_MODE_P (mode1))
33069 op1 = safe_vector_operand (op1, mode1);
33070
33071 /* Swap operands if we have a comparison that isn't available in
33072 hardware. */
33073 if (swap)
33074 {
33075 rtx tmp = gen_reg_rtx (mode1);
33076 emit_move_insn (tmp, op1);
33077 op1 = op0;
33078 op0 = tmp;
33079 }
33080
33081 if (optimize || !target
33082 || GET_MODE (target) != tmode
33083 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33084 target = gen_reg_rtx (tmode);
33085
33086 if ((optimize && !register_operand (op0, mode0))
33087 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
33088 op0 = copy_to_mode_reg (mode0, op0);
33089 if ((optimize && !register_operand (op1, mode1))
33090 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
33091 op1 = copy_to_mode_reg (mode1, op1);
33092
33093 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
33094 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33095 if (! pat)
33096 return 0;
33097 emit_insn (pat);
33098 return target;
33099 }
33100
33101 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
33102
33103 static rtx
33104 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
33105 rtx target)
33106 {
33107 rtx pat;
33108 tree arg0 = CALL_EXPR_ARG (exp, 0);
33109 tree arg1 = CALL_EXPR_ARG (exp, 1);
33110 rtx op0 = expand_normal (arg0);
33111 rtx op1 = expand_normal (arg1);
33112 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33113 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33114 enum rtx_code comparison = d->comparison;
33115
33116 if (VECTOR_MODE_P (mode0))
33117 op0 = safe_vector_operand (op0, mode0);
33118 if (VECTOR_MODE_P (mode1))
33119 op1 = safe_vector_operand (op1, mode1);
33120
33121 /* Swap operands if we have a comparison that isn't available in
33122 hardware. */
33123 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
33124 {
33125 rtx tmp = op1;
33126 op1 = op0;
33127 op0 = tmp;
33128 }
33129
33130 target = gen_reg_rtx (SImode);
33131 emit_move_insn (target, const0_rtx);
33132 target = gen_rtx_SUBREG (QImode, target, 0);
33133
33134 if ((optimize && !register_operand (op0, mode0))
33135 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33136 op0 = copy_to_mode_reg (mode0, op0);
33137 if ((optimize && !register_operand (op1, mode1))
33138 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33139 op1 = copy_to_mode_reg (mode1, op1);
33140
33141 pat = GEN_FCN (d->icode) (op0, op1);
33142 if (! pat)
33143 return 0;
33144 emit_insn (pat);
33145 emit_insn (gen_rtx_SET (VOIDmode,
33146 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33147 gen_rtx_fmt_ee (comparison, QImode,
33148 SET_DEST (pat),
33149 const0_rtx)));
33150
33151 return SUBREG_REG (target);
33152 }
33153
33154 /* Subroutines of ix86_expand_args_builtin to take care of round insns. */
33155
33156 static rtx
33157 ix86_expand_sse_round (const struct builtin_description *d, tree exp,
33158 rtx target)
33159 {
33160 rtx pat;
33161 tree arg0 = CALL_EXPR_ARG (exp, 0);
33162 rtx op1, op0 = expand_normal (arg0);
33163 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33164 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33165
33166 if (optimize || target == 0
33167 || GET_MODE (target) != tmode
33168 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33169 target = gen_reg_rtx (tmode);
33170
33171 if (VECTOR_MODE_P (mode0))
33172 op0 = safe_vector_operand (op0, mode0);
33173
33174 if ((optimize && !register_operand (op0, mode0))
33175 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33176 op0 = copy_to_mode_reg (mode0, op0);
33177
33178 op1 = GEN_INT (d->comparison);
33179
33180 pat = GEN_FCN (d->icode) (target, op0, op1);
33181 if (! pat)
33182 return 0;
33183 emit_insn (pat);
33184 return target;
33185 }
33186
33187 static rtx
33188 ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
33189 tree exp, rtx target)
33190 {
33191 rtx pat;
33192 tree arg0 = CALL_EXPR_ARG (exp, 0);
33193 tree arg1 = CALL_EXPR_ARG (exp, 1);
33194 rtx op0 = expand_normal (arg0);
33195 rtx op1 = expand_normal (arg1);
33196 rtx op2;
33197 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
33198 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
33199 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
33200
33201 if (optimize || target == 0
33202 || GET_MODE (target) != tmode
33203 || !insn_data[d->icode].operand[0].predicate (target, tmode))
33204 target = gen_reg_rtx (tmode);
33205
33206 op0 = safe_vector_operand (op0, mode0);
33207 op1 = safe_vector_operand (op1, mode1);
33208
33209 if ((optimize && !register_operand (op0, mode0))
33210 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33211 op0 = copy_to_mode_reg (mode0, op0);
33212 if ((optimize && !register_operand (op1, mode1))
33213 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33214 op1 = copy_to_mode_reg (mode1, op1);
33215
33216 op2 = GEN_INT (d->comparison);
33217
33218 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
33219 if (! pat)
33220 return 0;
33221 emit_insn (pat);
33222 return target;
33223 }
33224
33225 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
33226
33227 static rtx
33228 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
33229 rtx target)
33230 {
33231 rtx pat;
33232 tree arg0 = CALL_EXPR_ARG (exp, 0);
33233 tree arg1 = CALL_EXPR_ARG (exp, 1);
33234 rtx op0 = expand_normal (arg0);
33235 rtx op1 = expand_normal (arg1);
33236 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
33237 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
33238 enum rtx_code comparison = d->comparison;
33239
33240 if (VECTOR_MODE_P (mode0))
33241 op0 = safe_vector_operand (op0, mode0);
33242 if (VECTOR_MODE_P (mode1))
33243 op1 = safe_vector_operand (op1, mode1);
33244
33245 target = gen_reg_rtx (SImode);
33246 emit_move_insn (target, const0_rtx);
33247 target = gen_rtx_SUBREG (QImode, target, 0);
33248
33249 if ((optimize && !register_operand (op0, mode0))
33250 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
33251 op0 = copy_to_mode_reg (mode0, op0);
33252 if ((optimize && !register_operand (op1, mode1))
33253 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
33254 op1 = copy_to_mode_reg (mode1, op1);
33255
33256 pat = GEN_FCN (d->icode) (op0, op1);
33257 if (! pat)
33258 return 0;
33259 emit_insn (pat);
33260 emit_insn (gen_rtx_SET (VOIDmode,
33261 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33262 gen_rtx_fmt_ee (comparison, QImode,
33263 SET_DEST (pat),
33264 const0_rtx)));
33265
33266 return SUBREG_REG (target);
33267 }
33268
33269 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
33270
33271 static rtx
33272 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
33273 tree exp, rtx target)
33274 {
33275 rtx pat;
33276 tree arg0 = CALL_EXPR_ARG (exp, 0);
33277 tree arg1 = CALL_EXPR_ARG (exp, 1);
33278 tree arg2 = CALL_EXPR_ARG (exp, 2);
33279 tree arg3 = CALL_EXPR_ARG (exp, 3);
33280 tree arg4 = CALL_EXPR_ARG (exp, 4);
33281 rtx scratch0, scratch1;
33282 rtx op0 = expand_normal (arg0);
33283 rtx op1 = expand_normal (arg1);
33284 rtx op2 = expand_normal (arg2);
33285 rtx op3 = expand_normal (arg3);
33286 rtx op4 = expand_normal (arg4);
33287 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
33288
33289 tmode0 = insn_data[d->icode].operand[0].mode;
33290 tmode1 = insn_data[d->icode].operand[1].mode;
33291 modev2 = insn_data[d->icode].operand[2].mode;
33292 modei3 = insn_data[d->icode].operand[3].mode;
33293 modev4 = insn_data[d->icode].operand[4].mode;
33294 modei5 = insn_data[d->icode].operand[5].mode;
33295 modeimm = insn_data[d->icode].operand[6].mode;
33296
33297 if (VECTOR_MODE_P (modev2))
33298 op0 = safe_vector_operand (op0, modev2);
33299 if (VECTOR_MODE_P (modev4))
33300 op2 = safe_vector_operand (op2, modev4);
33301
33302 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33303 op0 = copy_to_mode_reg (modev2, op0);
33304 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
33305 op1 = copy_to_mode_reg (modei3, op1);
33306 if ((optimize && !register_operand (op2, modev4))
33307 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
33308 op2 = copy_to_mode_reg (modev4, op2);
33309 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
33310 op3 = copy_to_mode_reg (modei5, op3);
33311
33312 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
33313 {
33314 error ("the fifth argument must be an 8-bit immediate");
33315 return const0_rtx;
33316 }
33317
33318 if (d->code == IX86_BUILTIN_PCMPESTRI128)
33319 {
33320 if (optimize || !target
33321 || GET_MODE (target) != tmode0
33322 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33323 target = gen_reg_rtx (tmode0);
33324
33325 scratch1 = gen_reg_rtx (tmode1);
33326
33327 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
33328 }
33329 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
33330 {
33331 if (optimize || !target
33332 || GET_MODE (target) != tmode1
33333 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33334 target = gen_reg_rtx (tmode1);
33335
33336 scratch0 = gen_reg_rtx (tmode0);
33337
33338 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
33339 }
33340 else
33341 {
33342 gcc_assert (d->flag);
33343
33344 scratch0 = gen_reg_rtx (tmode0);
33345 scratch1 = gen_reg_rtx (tmode1);
33346
33347 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
33348 }
33349
33350 if (! pat)
33351 return 0;
33352
33353 emit_insn (pat);
33354
33355 if (d->flag)
33356 {
33357 target = gen_reg_rtx (SImode);
33358 emit_move_insn (target, const0_rtx);
33359 target = gen_rtx_SUBREG (QImode, target, 0);
33360
33361 emit_insn
33362 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33363 gen_rtx_fmt_ee (EQ, QImode,
33364 gen_rtx_REG ((enum machine_mode) d->flag,
33365 FLAGS_REG),
33366 const0_rtx)));
33367 return SUBREG_REG (target);
33368 }
33369 else
33370 return target;
33371 }
33372
33373
33374 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
33375
33376 static rtx
33377 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
33378 tree exp, rtx target)
33379 {
33380 rtx pat;
33381 tree arg0 = CALL_EXPR_ARG (exp, 0);
33382 tree arg1 = CALL_EXPR_ARG (exp, 1);
33383 tree arg2 = CALL_EXPR_ARG (exp, 2);
33384 rtx scratch0, scratch1;
33385 rtx op0 = expand_normal (arg0);
33386 rtx op1 = expand_normal (arg1);
33387 rtx op2 = expand_normal (arg2);
33388 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
33389
33390 tmode0 = insn_data[d->icode].operand[0].mode;
33391 tmode1 = insn_data[d->icode].operand[1].mode;
33392 modev2 = insn_data[d->icode].operand[2].mode;
33393 modev3 = insn_data[d->icode].operand[3].mode;
33394 modeimm = insn_data[d->icode].operand[4].mode;
33395
33396 if (VECTOR_MODE_P (modev2))
33397 op0 = safe_vector_operand (op0, modev2);
33398 if (VECTOR_MODE_P (modev3))
33399 op1 = safe_vector_operand (op1, modev3);
33400
33401 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
33402 op0 = copy_to_mode_reg (modev2, op0);
33403 if ((optimize && !register_operand (op1, modev3))
33404 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
33405 op1 = copy_to_mode_reg (modev3, op1);
33406
33407 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
33408 {
33409 error ("the third argument must be an 8-bit immediate");
33410 return const0_rtx;
33411 }
33412
33413 if (d->code == IX86_BUILTIN_PCMPISTRI128)
33414 {
33415 if (optimize || !target
33416 || GET_MODE (target) != tmode0
33417 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
33418 target = gen_reg_rtx (tmode0);
33419
33420 scratch1 = gen_reg_rtx (tmode1);
33421
33422 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
33423 }
33424 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
33425 {
33426 if (optimize || !target
33427 || GET_MODE (target) != tmode1
33428 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
33429 target = gen_reg_rtx (tmode1);
33430
33431 scratch0 = gen_reg_rtx (tmode0);
33432
33433 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
33434 }
33435 else
33436 {
33437 gcc_assert (d->flag);
33438
33439 scratch0 = gen_reg_rtx (tmode0);
33440 scratch1 = gen_reg_rtx (tmode1);
33441
33442 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
33443 }
33444
33445 if (! pat)
33446 return 0;
33447
33448 emit_insn (pat);
33449
33450 if (d->flag)
33451 {
33452 target = gen_reg_rtx (SImode);
33453 emit_move_insn (target, const0_rtx);
33454 target = gen_rtx_SUBREG (QImode, target, 0);
33455
33456 emit_insn
33457 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
33458 gen_rtx_fmt_ee (EQ, QImode,
33459 gen_rtx_REG ((enum machine_mode) d->flag,
33460 FLAGS_REG),
33461 const0_rtx)));
33462 return SUBREG_REG (target);
33463 }
33464 else
33465 return target;
33466 }
33467
33468 /* Subroutine of ix86_expand_builtin to take care of insns with
33469 variable number of operands. */
33470
33471 static rtx
33472 ix86_expand_args_builtin (const struct builtin_description *d,
33473 tree exp, rtx target)
33474 {
33475 rtx pat, real_target;
33476 unsigned int i, nargs;
33477 unsigned int nargs_constant = 0;
33478 unsigned int mask_pos = 0;
33479 int num_memory = 0;
33480 struct
33481 {
33482 rtx op;
33483 enum machine_mode mode;
33484 } args[6];
33485 bool last_arg_count = false;
33486 enum insn_code icode = d->icode;
33487 const struct insn_data_d *insn_p = &insn_data[icode];
33488 enum machine_mode tmode = insn_p->operand[0].mode;
33489 enum machine_mode rmode = VOIDmode;
33490 bool swap = false;
33491 enum rtx_code comparison = d->comparison;
33492
33493 switch ((enum ix86_builtin_func_type) d->flag)
33494 {
33495 case V2DF_FTYPE_V2DF_ROUND:
33496 case V4DF_FTYPE_V4DF_ROUND:
33497 case V4SF_FTYPE_V4SF_ROUND:
33498 case V8SF_FTYPE_V8SF_ROUND:
33499 case V4SI_FTYPE_V4SF_ROUND:
33500 case V8SI_FTYPE_V8SF_ROUND:
33501 return ix86_expand_sse_round (d, exp, target);
33502 case V4SI_FTYPE_V2DF_V2DF_ROUND:
33503 case V8SI_FTYPE_V4DF_V4DF_ROUND:
33504 case V16SI_FTYPE_V8DF_V8DF_ROUND:
33505 return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
33506 case INT_FTYPE_V8SF_V8SF_PTEST:
33507 case INT_FTYPE_V4DI_V4DI_PTEST:
33508 case INT_FTYPE_V4DF_V4DF_PTEST:
33509 case INT_FTYPE_V4SF_V4SF_PTEST:
33510 case INT_FTYPE_V2DI_V2DI_PTEST:
33511 case INT_FTYPE_V2DF_V2DF_PTEST:
33512 return ix86_expand_sse_ptest (d, exp, target);
33513 case FLOAT128_FTYPE_FLOAT128:
33514 case FLOAT_FTYPE_FLOAT:
33515 case INT_FTYPE_INT:
33516 case UINT64_FTYPE_INT:
33517 case UINT16_FTYPE_UINT16:
33518 case INT64_FTYPE_INT64:
33519 case INT64_FTYPE_V4SF:
33520 case INT64_FTYPE_V2DF:
33521 case INT_FTYPE_V16QI:
33522 case INT_FTYPE_V8QI:
33523 case INT_FTYPE_V8SF:
33524 case INT_FTYPE_V4DF:
33525 case INT_FTYPE_V4SF:
33526 case INT_FTYPE_V2DF:
33527 case INT_FTYPE_V32QI:
33528 case V16QI_FTYPE_V16QI:
33529 case V8SI_FTYPE_V8SF:
33530 case V8SI_FTYPE_V4SI:
33531 case V8HI_FTYPE_V8HI:
33532 case V8HI_FTYPE_V16QI:
33533 case V8QI_FTYPE_V8QI:
33534 case V8SF_FTYPE_V8SF:
33535 case V8SF_FTYPE_V8SI:
33536 case V8SF_FTYPE_V4SF:
33537 case V8SF_FTYPE_V8HI:
33538 case V4SI_FTYPE_V4SI:
33539 case V4SI_FTYPE_V16QI:
33540 case V4SI_FTYPE_V4SF:
33541 case V4SI_FTYPE_V8SI:
33542 case V4SI_FTYPE_V8HI:
33543 case V4SI_FTYPE_V4DF:
33544 case V4SI_FTYPE_V2DF:
33545 case V4HI_FTYPE_V4HI:
33546 case V4DF_FTYPE_V4DF:
33547 case V4DF_FTYPE_V4SI:
33548 case V4DF_FTYPE_V4SF:
33549 case V4DF_FTYPE_V2DF:
33550 case V4SF_FTYPE_V4SF:
33551 case V4SF_FTYPE_V4SI:
33552 case V4SF_FTYPE_V8SF:
33553 case V4SF_FTYPE_V4DF:
33554 case V4SF_FTYPE_V8HI:
33555 case V4SF_FTYPE_V2DF:
33556 case V2DI_FTYPE_V2DI:
33557 case V2DI_FTYPE_V16QI:
33558 case V2DI_FTYPE_V8HI:
33559 case V2DI_FTYPE_V4SI:
33560 case V2DF_FTYPE_V2DF:
33561 case V2DF_FTYPE_V4SI:
33562 case V2DF_FTYPE_V4DF:
33563 case V2DF_FTYPE_V4SF:
33564 case V2DF_FTYPE_V2SI:
33565 case V2SI_FTYPE_V2SI:
33566 case V2SI_FTYPE_V4SF:
33567 case V2SI_FTYPE_V2SF:
33568 case V2SI_FTYPE_V2DF:
33569 case V2SF_FTYPE_V2SF:
33570 case V2SF_FTYPE_V2SI:
33571 case V32QI_FTYPE_V32QI:
33572 case V32QI_FTYPE_V16QI:
33573 case V16HI_FTYPE_V16HI:
33574 case V16HI_FTYPE_V8HI:
33575 case V8SI_FTYPE_V8SI:
33576 case V16HI_FTYPE_V16QI:
33577 case V8SI_FTYPE_V16QI:
33578 case V4DI_FTYPE_V16QI:
33579 case V8SI_FTYPE_V8HI:
33580 case V4DI_FTYPE_V8HI:
33581 case V4DI_FTYPE_V4SI:
33582 case V4DI_FTYPE_V2DI:
33583 case HI_FTYPE_HI:
33584 case UINT_FTYPE_V2DF:
33585 case UINT_FTYPE_V4SF:
33586 case UINT64_FTYPE_V2DF:
33587 case UINT64_FTYPE_V4SF:
33588 case V16QI_FTYPE_V8DI:
33589 case V16HI_FTYPE_V16SI:
33590 case V16SI_FTYPE_HI:
33591 case V16SI_FTYPE_V16SI:
33592 case V16SI_FTYPE_INT:
33593 case V16SF_FTYPE_FLOAT:
33594 case V16SF_FTYPE_V4SF:
33595 case V16SF_FTYPE_V16SF:
33596 case V8HI_FTYPE_V8DI:
33597 case V8UHI_FTYPE_V8UHI:
33598 case V8SI_FTYPE_V8DI:
33599 case V8USI_FTYPE_V8USI:
33600 case V8SF_FTYPE_V8DF:
33601 case V8DI_FTYPE_QI:
33602 case V8DI_FTYPE_INT64:
33603 case V8DI_FTYPE_V4DI:
33604 case V8DI_FTYPE_V8DI:
33605 case V8DF_FTYPE_DOUBLE:
33606 case V8DF_FTYPE_V4DF:
33607 case V8DF_FTYPE_V8DF:
33608 case V8DF_FTYPE_V8SI:
33609 nargs = 1;
33610 break;
33611 case V4SF_FTYPE_V4SF_VEC_MERGE:
33612 case V2DF_FTYPE_V2DF_VEC_MERGE:
33613 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
33614 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
33615 case V16QI_FTYPE_V16QI_V16QI:
33616 case V16QI_FTYPE_V8HI_V8HI:
33617 case V16SI_FTYPE_V16SI_V16SI:
33618 case V16SF_FTYPE_V16SF_V16SF:
33619 case V16SF_FTYPE_V16SF_V16SI:
33620 case V8QI_FTYPE_V8QI_V8QI:
33621 case V8QI_FTYPE_V4HI_V4HI:
33622 case V8HI_FTYPE_V8HI_V8HI:
33623 case V8HI_FTYPE_V16QI_V16QI:
33624 case V8HI_FTYPE_V4SI_V4SI:
33625 case V8SF_FTYPE_V8SF_V8SF:
33626 case V8SF_FTYPE_V8SF_V8SI:
33627 case V8DI_FTYPE_V8DI_V8DI:
33628 case V8DF_FTYPE_V8DF_V8DF:
33629 case V8DF_FTYPE_V8DF_V8DI:
33630 case V4SI_FTYPE_V4SI_V4SI:
33631 case V4SI_FTYPE_V8HI_V8HI:
33632 case V4SI_FTYPE_V4SF_V4SF:
33633 case V4SI_FTYPE_V2DF_V2DF:
33634 case V4HI_FTYPE_V4HI_V4HI:
33635 case V4HI_FTYPE_V8QI_V8QI:
33636 case V4HI_FTYPE_V2SI_V2SI:
33637 case V4DF_FTYPE_V4DF_V4DF:
33638 case V4DF_FTYPE_V4DF_V4DI:
33639 case V4SF_FTYPE_V4SF_V4SF:
33640 case V4SF_FTYPE_V4SF_V4SI:
33641 case V4SF_FTYPE_V4SF_V2SI:
33642 case V4SF_FTYPE_V4SF_V2DF:
33643 case V4SF_FTYPE_V4SF_UINT:
33644 case V4SF_FTYPE_V4SF_UINT64:
33645 case V4SF_FTYPE_V4SF_DI:
33646 case V4SF_FTYPE_V4SF_SI:
33647 case V2DI_FTYPE_V2DI_V2DI:
33648 case V2DI_FTYPE_V16QI_V16QI:
33649 case V2DI_FTYPE_V4SI_V4SI:
33650 case V2UDI_FTYPE_V4USI_V4USI:
33651 case V2DI_FTYPE_V2DI_V16QI:
33652 case V2DI_FTYPE_V2DF_V2DF:
33653 case V2SI_FTYPE_V2SI_V2SI:
33654 case V2SI_FTYPE_V4HI_V4HI:
33655 case V2SI_FTYPE_V2SF_V2SF:
33656 case V2DF_FTYPE_V2DF_V2DF:
33657 case V2DF_FTYPE_V2DF_V4SF:
33658 case V2DF_FTYPE_V2DF_V2DI:
33659 case V2DF_FTYPE_V2DF_DI:
33660 case V2DF_FTYPE_V2DF_SI:
33661 case V2DF_FTYPE_V2DF_UINT:
33662 case V2DF_FTYPE_V2DF_UINT64:
33663 case V2SF_FTYPE_V2SF_V2SF:
33664 case V1DI_FTYPE_V1DI_V1DI:
33665 case V1DI_FTYPE_V8QI_V8QI:
33666 case V1DI_FTYPE_V2SI_V2SI:
33667 case V32QI_FTYPE_V16HI_V16HI:
33668 case V16HI_FTYPE_V8SI_V8SI:
33669 case V32QI_FTYPE_V32QI_V32QI:
33670 case V16HI_FTYPE_V32QI_V32QI:
33671 case V16HI_FTYPE_V16HI_V16HI:
33672 case V8SI_FTYPE_V4DF_V4DF:
33673 case V8SI_FTYPE_V8SI_V8SI:
33674 case V8SI_FTYPE_V16HI_V16HI:
33675 case V4DI_FTYPE_V4DI_V4DI:
33676 case V4DI_FTYPE_V8SI_V8SI:
33677 case V4UDI_FTYPE_V8USI_V8USI:
33678 case QI_FTYPE_V8DI_V8DI:
33679 case HI_FTYPE_V16SI_V16SI:
33680 if (comparison == UNKNOWN)
33681 return ix86_expand_binop_builtin (icode, exp, target);
33682 nargs = 2;
33683 break;
33684 case V4SF_FTYPE_V4SF_V4SF_SWAP:
33685 case V2DF_FTYPE_V2DF_V2DF_SWAP:
33686 gcc_assert (comparison != UNKNOWN);
33687 nargs = 2;
33688 swap = true;
33689 break;
33690 case V16HI_FTYPE_V16HI_V8HI_COUNT:
33691 case V16HI_FTYPE_V16HI_SI_COUNT:
33692 case V8SI_FTYPE_V8SI_V4SI_COUNT:
33693 case V8SI_FTYPE_V8SI_SI_COUNT:
33694 case V4DI_FTYPE_V4DI_V2DI_COUNT:
33695 case V4DI_FTYPE_V4DI_INT_COUNT:
33696 case V8HI_FTYPE_V8HI_V8HI_COUNT:
33697 case V8HI_FTYPE_V8HI_SI_COUNT:
33698 case V4SI_FTYPE_V4SI_V4SI_COUNT:
33699 case V4SI_FTYPE_V4SI_SI_COUNT:
33700 case V4HI_FTYPE_V4HI_V4HI_COUNT:
33701 case V4HI_FTYPE_V4HI_SI_COUNT:
33702 case V2DI_FTYPE_V2DI_V2DI_COUNT:
33703 case V2DI_FTYPE_V2DI_SI_COUNT:
33704 case V2SI_FTYPE_V2SI_V2SI_COUNT:
33705 case V2SI_FTYPE_V2SI_SI_COUNT:
33706 case V1DI_FTYPE_V1DI_V1DI_COUNT:
33707 case V1DI_FTYPE_V1DI_SI_COUNT:
33708 nargs = 2;
33709 last_arg_count = true;
33710 break;
33711 case UINT64_FTYPE_UINT64_UINT64:
33712 case UINT_FTYPE_UINT_UINT:
33713 case UINT_FTYPE_UINT_USHORT:
33714 case UINT_FTYPE_UINT_UCHAR:
33715 case UINT16_FTYPE_UINT16_INT:
33716 case UINT8_FTYPE_UINT8_INT:
33717 case HI_FTYPE_HI_HI:
33718 case V16SI_FTYPE_V8DF_V8DF:
33719 nargs = 2;
33720 break;
33721 case V2DI_FTYPE_V2DI_INT_CONVERT:
33722 nargs = 2;
33723 rmode = V1TImode;
33724 nargs_constant = 1;
33725 break;
33726 case V4DI_FTYPE_V4DI_INT_CONVERT:
33727 nargs = 2;
33728 rmode = V2TImode;
33729 nargs_constant = 1;
33730 break;
33731 case V8HI_FTYPE_V8HI_INT:
33732 case V8HI_FTYPE_V8SF_INT:
33733 case V16HI_FTYPE_V16SF_INT:
33734 case V8HI_FTYPE_V4SF_INT:
33735 case V8SF_FTYPE_V8SF_INT:
33736 case V4SF_FTYPE_V16SF_INT:
33737 case V16SF_FTYPE_V16SF_INT:
33738 case V4SI_FTYPE_V4SI_INT:
33739 case V4SI_FTYPE_V8SI_INT:
33740 case V4HI_FTYPE_V4HI_INT:
33741 case V4DF_FTYPE_V4DF_INT:
33742 case V4DF_FTYPE_V8DF_INT:
33743 case V4SF_FTYPE_V4SF_INT:
33744 case V4SF_FTYPE_V8SF_INT:
33745 case V2DI_FTYPE_V2DI_INT:
33746 case V2DF_FTYPE_V2DF_INT:
33747 case V2DF_FTYPE_V4DF_INT:
33748 case V16HI_FTYPE_V16HI_INT:
33749 case V8SI_FTYPE_V8SI_INT:
33750 case V16SI_FTYPE_V16SI_INT:
33751 case V4SI_FTYPE_V16SI_INT:
33752 case V4DI_FTYPE_V4DI_INT:
33753 case V2DI_FTYPE_V4DI_INT:
33754 case V4DI_FTYPE_V8DI_INT:
33755 case HI_FTYPE_HI_INT:
33756 nargs = 2;
33757 nargs_constant = 1;
33758 break;
33759 case V16QI_FTYPE_V16QI_V16QI_V16QI:
33760 case V8SF_FTYPE_V8SF_V8SF_V8SF:
33761 case V4DF_FTYPE_V4DF_V4DF_V4DF:
33762 case V4SF_FTYPE_V4SF_V4SF_V4SF:
33763 case V2DF_FTYPE_V2DF_V2DF_V2DF:
33764 case V32QI_FTYPE_V32QI_V32QI_V32QI:
33765 case HI_FTYPE_V16SI_V16SI_HI:
33766 case QI_FTYPE_V8DI_V8DI_QI:
33767 case V16HI_FTYPE_V16SI_V16HI_HI:
33768 case V16QI_FTYPE_V16SI_V16QI_HI:
33769 case V16QI_FTYPE_V8DI_V16QI_QI:
33770 case V16SF_FTYPE_V16SF_V16SF_HI:
33771 case V16SF_FTYPE_V16SF_V16SF_V16SF:
33772 case V16SF_FTYPE_V16SF_V16SI_V16SF:
33773 case V16SF_FTYPE_V16SI_V16SF_HI:
33774 case V16SF_FTYPE_V16SI_V16SF_V16SF:
33775 case V16SF_FTYPE_V4SF_V16SF_HI:
33776 case V16SI_FTYPE_SI_V16SI_HI:
33777 case V16SI_FTYPE_V16HI_V16SI_HI:
33778 case V16SI_FTYPE_V16QI_V16SI_HI:
33779 case V16SI_FTYPE_V16SF_V16SI_HI:
33780 case V16SI_FTYPE_V16SI_V16SI_HI:
33781 case V16SI_FTYPE_V16SI_V16SI_V16SI:
33782 case V16SI_FTYPE_V4SI_V16SI_HI:
33783 case V2DI_FTYPE_V2DI_V2DI_V2DI:
33784 case V4DI_FTYPE_V4DI_V4DI_V4DI:
33785 case V8DF_FTYPE_V2DF_V8DF_QI:
33786 case V8DF_FTYPE_V4DF_V8DF_QI:
33787 case V8DF_FTYPE_V8DF_V8DF_QI:
33788 case V8DF_FTYPE_V8DF_V8DF_V8DF:
33789 case V8DF_FTYPE_V8DF_V8DI_V8DF:
33790 case V8DF_FTYPE_V8DI_V8DF_V8DF:
33791 case V8DF_FTYPE_V8SF_V8DF_QI:
33792 case V8DF_FTYPE_V8SI_V8DF_QI:
33793 case V8DI_FTYPE_DI_V8DI_QI:
33794 case V8DI_FTYPE_V16QI_V8DI_QI:
33795 case V8DI_FTYPE_V2DI_V8DI_QI:
33796 case V8DI_FTYPE_V4DI_V8DI_QI:
33797 case V8DI_FTYPE_V8DI_V8DI_QI:
33798 case V8DI_FTYPE_V8DI_V8DI_V8DI:
33799 case V8DI_FTYPE_V8HI_V8DI_QI:
33800 case V8DI_FTYPE_V8SI_V8DI_QI:
33801 case V8HI_FTYPE_V8DI_V8HI_QI:
33802 case V8SF_FTYPE_V8DF_V8SF_QI:
33803 case V8SI_FTYPE_V8DF_V8SI_QI:
33804 case V8SI_FTYPE_V8DI_V8SI_QI:
33805 case V4SI_FTYPE_V4SI_V4SI_V4SI:
33806 nargs = 3;
33807 break;
33808 case V32QI_FTYPE_V32QI_V32QI_INT:
33809 case V16HI_FTYPE_V16HI_V16HI_INT:
33810 case V16QI_FTYPE_V16QI_V16QI_INT:
33811 case V4DI_FTYPE_V4DI_V4DI_INT:
33812 case V8HI_FTYPE_V8HI_V8HI_INT:
33813 case V8SI_FTYPE_V8SI_V8SI_INT:
33814 case V8SI_FTYPE_V8SI_V4SI_INT:
33815 case V8SF_FTYPE_V8SF_V8SF_INT:
33816 case V8SF_FTYPE_V8SF_V4SF_INT:
33817 case V4SI_FTYPE_V4SI_V4SI_INT:
33818 case V4DF_FTYPE_V4DF_V4DF_INT:
33819 case V16SF_FTYPE_V16SF_V16SF_INT:
33820 case V16SF_FTYPE_V16SF_V4SF_INT:
33821 case V16SI_FTYPE_V16SI_V4SI_INT:
33822 case V4DF_FTYPE_V4DF_V2DF_INT:
33823 case V4SF_FTYPE_V4SF_V4SF_INT:
33824 case V2DI_FTYPE_V2DI_V2DI_INT:
33825 case V4DI_FTYPE_V4DI_V2DI_INT:
33826 case V2DF_FTYPE_V2DF_V2DF_INT:
33827 case QI_FTYPE_V8DI_V8DI_INT:
33828 case QI_FTYPE_V8DF_V8DF_INT:
33829 case QI_FTYPE_V2DF_V2DF_INT:
33830 case QI_FTYPE_V4SF_V4SF_INT:
33831 case HI_FTYPE_V16SI_V16SI_INT:
33832 case HI_FTYPE_V16SF_V16SF_INT:
33833 nargs = 3;
33834 nargs_constant = 1;
33835 break;
33836 case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
33837 nargs = 3;
33838 rmode = V4DImode;
33839 nargs_constant = 1;
33840 break;
33841 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
33842 nargs = 3;
33843 rmode = V2DImode;
33844 nargs_constant = 1;
33845 break;
33846 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
33847 nargs = 3;
33848 rmode = DImode;
33849 nargs_constant = 1;
33850 break;
33851 case V2DI_FTYPE_V2DI_UINT_UINT:
33852 nargs = 3;
33853 nargs_constant = 2;
33854 break;
33855 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
33856 case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
33857 case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
33858 case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
33859 case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
33860 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
33861 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
33862 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
33863 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
33864 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
33865 case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
33866 case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
33867 case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
33868 case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
33869 case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
33870 case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
33871 nargs = 4;
33872 break;
33873 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
33874 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
33875 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
33876 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
33877 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
33878 nargs = 4;
33879 nargs_constant = 1;
33880 break;
33881 case QI_FTYPE_V2DF_V2DF_INT_QI:
33882 case QI_FTYPE_V4SF_V4SF_INT_QI:
33883 nargs = 4;
33884 mask_pos = 1;
33885 nargs_constant = 1;
33886 break;
33887 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
33888 nargs = 4;
33889 nargs_constant = 2;
33890 break;
33891 case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
33892 case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
33893 nargs = 4;
33894 break;
33895 case QI_FTYPE_V8DI_V8DI_INT_QI:
33896 case HI_FTYPE_V16SI_V16SI_INT_HI:
33897 case QI_FTYPE_V8DF_V8DF_INT_QI:
33898 case HI_FTYPE_V16SF_V16SF_INT_HI:
33899 mask_pos = 1;
33900 nargs = 4;
33901 nargs_constant = 1;
33902 break;
33903 case V8DF_FTYPE_V8DF_INT_V8DF_QI:
33904 case V16SF_FTYPE_V16SF_INT_V16SF_HI:
33905 case V16HI_FTYPE_V16SF_INT_V16HI_HI:
33906 case V16SI_FTYPE_V16SI_INT_V16SI_HI:
33907 case V4SI_FTYPE_V16SI_INT_V4SI_QI:
33908 case V4DI_FTYPE_V8DI_INT_V4DI_QI:
33909 case V4DF_FTYPE_V8DF_INT_V4DF_QI:
33910 case V4SF_FTYPE_V16SF_INT_V4SF_QI:
33911 case V8DI_FTYPE_V8DI_INT_V8DI_QI:
33912 nargs = 4;
33913 mask_pos = 2;
33914 nargs_constant = 1;
33915 break;
33916 case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
33917 case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
33918 case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
33919 case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
33920 case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
33921 case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
33922 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
33923 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
33924 case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
33925 case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
33926 nargs = 5;
33927 mask_pos = 2;
33928 nargs_constant = 1;
33929 break;
33930 case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
33931 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
33932 case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
33933 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
33934 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
33935 nargs = 5;
33936 mask_pos = 1;
33937 nargs_constant = 1;
33938 break;
33939
33940 default:
33941 gcc_unreachable ();
33942 }
33943
33944 gcc_assert (nargs <= ARRAY_SIZE (args));
33945
33946 if (comparison != UNKNOWN)
33947 {
33948 gcc_assert (nargs == 2);
33949 return ix86_expand_sse_compare (d, exp, target, swap);
33950 }
33951
33952 if (rmode == VOIDmode || rmode == tmode)
33953 {
33954 if (optimize
33955 || target == 0
33956 || GET_MODE (target) != tmode
33957 || !insn_p->operand[0].predicate (target, tmode))
33958 target = gen_reg_rtx (tmode);
33959 real_target = target;
33960 }
33961 else
33962 {
33963 real_target = gen_reg_rtx (tmode);
33964 target = simplify_gen_subreg (rmode, real_target, tmode, 0);
33965 }
33966
33967 for (i = 0; i < nargs; i++)
33968 {
33969 tree arg = CALL_EXPR_ARG (exp, i);
33970 rtx op = expand_normal (arg);
33971 enum machine_mode mode = insn_p->operand[i + 1].mode;
33972 bool match = insn_p->operand[i + 1].predicate (op, mode);
33973
33974 if (last_arg_count && (i + 1) == nargs)
33975 {
33976 /* SIMD shift insns take either an 8-bit immediate or
33977 register as count. But builtin functions take int as
33978 count. If count doesn't match, we put it in register. */
33979 if (!match)
33980 {
33981 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
33982 if (!insn_p->operand[i + 1].predicate (op, mode))
33983 op = copy_to_reg (op);
33984 }
33985 }
33986 else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
33987 (!mask_pos && (nargs - i) <= nargs_constant))
33988 {
33989 if (!match)
33990 switch (icode)
33991 {
33992 case CODE_FOR_avx2_inserti128:
33993 case CODE_FOR_avx2_extracti128:
33994 error ("the last argument must be an 1-bit immediate");
33995 return const0_rtx;
33996
33997 case CODE_FOR_avx512f_cmpv8di3_mask:
33998 case CODE_FOR_avx512f_cmpv16si3_mask:
33999 case CODE_FOR_avx512f_ucmpv8di3_mask:
34000 case CODE_FOR_avx512f_ucmpv16si3_mask:
34001 error ("the last argument must be a 3-bit immediate");
34002 return const0_rtx;
34003
34004 case CODE_FOR_sse4_1_roundsd:
34005 case CODE_FOR_sse4_1_roundss:
34006
34007 case CODE_FOR_sse4_1_roundpd:
34008 case CODE_FOR_sse4_1_roundps:
34009 case CODE_FOR_avx_roundpd256:
34010 case CODE_FOR_avx_roundps256:
34011
34012 case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
34013 case CODE_FOR_sse4_1_roundps_sfix:
34014 case CODE_FOR_avx_roundpd_vec_pack_sfix256:
34015 case CODE_FOR_avx_roundps_sfix256:
34016
34017 case CODE_FOR_sse4_1_blendps:
34018 case CODE_FOR_avx_blendpd256:
34019 case CODE_FOR_avx_vpermilv4df:
34020 case CODE_FOR_avx512f_getmantv8df_mask:
34021 case CODE_FOR_avx512f_getmantv16sf_mask:
34022 error ("the last argument must be a 4-bit immediate");
34023 return const0_rtx;
34024
34025 case CODE_FOR_sha1rnds4:
34026 case CODE_FOR_sse4_1_blendpd:
34027 case CODE_FOR_avx_vpermilv2df:
34028 case CODE_FOR_xop_vpermil2v2df3:
34029 case CODE_FOR_xop_vpermil2v4sf3:
34030 case CODE_FOR_xop_vpermil2v4df3:
34031 case CODE_FOR_xop_vpermil2v8sf3:
34032 case CODE_FOR_avx512f_vinsertf32x4_mask:
34033 case CODE_FOR_avx512f_vinserti32x4_mask:
34034 case CODE_FOR_avx512f_vextractf32x4_mask:
34035 case CODE_FOR_avx512f_vextracti32x4_mask:
34036 error ("the last argument must be a 2-bit immediate");
34037 return const0_rtx;
34038
34039 case CODE_FOR_avx_vextractf128v4df:
34040 case CODE_FOR_avx_vextractf128v8sf:
34041 case CODE_FOR_avx_vextractf128v8si:
34042 case CODE_FOR_avx_vinsertf128v4df:
34043 case CODE_FOR_avx_vinsertf128v8sf:
34044 case CODE_FOR_avx_vinsertf128v8si:
34045 case CODE_FOR_avx512f_vinsertf64x4_mask:
34046 case CODE_FOR_avx512f_vinserti64x4_mask:
34047 case CODE_FOR_avx512f_vextractf64x4_mask:
34048 case CODE_FOR_avx512f_vextracti64x4_mask:
34049 error ("the last argument must be a 1-bit immediate");
34050 return const0_rtx;
34051
34052 case CODE_FOR_avx_vmcmpv2df3:
34053 case CODE_FOR_avx_vmcmpv4sf3:
34054 case CODE_FOR_avx_cmpv2df3:
34055 case CODE_FOR_avx_cmpv4sf3:
34056 case CODE_FOR_avx_cmpv4df3:
34057 case CODE_FOR_avx_cmpv8sf3:
34058 case CODE_FOR_avx512f_cmpv8df3_mask:
34059 case CODE_FOR_avx512f_cmpv16sf3_mask:
34060 case CODE_FOR_avx512f_vmcmpv2df3_mask:
34061 case CODE_FOR_avx512f_vmcmpv4sf3_mask:
34062 error ("the last argument must be a 5-bit immediate");
34063 return const0_rtx;
34064
34065 default:
34066 switch (nargs_constant)
34067 {
34068 case 2:
34069 if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
34070 (!mask_pos && (nargs - i) == nargs_constant))
34071 {
34072 error ("the next to last argument must be an 8-bit immediate");
34073 break;
34074 }
34075 case 1:
34076 error ("the last argument must be an 8-bit immediate");
34077 break;
34078 default:
34079 gcc_unreachable ();
34080 }
34081 return const0_rtx;
34082 }
34083 }
34084 else
34085 {
34086 if (VECTOR_MODE_P (mode))
34087 op = safe_vector_operand (op, mode);
34088
34089 /* If we aren't optimizing, only allow one memory operand to
34090 be generated. */
34091 if (memory_operand (op, mode))
34092 num_memory++;
34093
34094 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34095 {
34096 if (optimize || !match || num_memory > 1)
34097 op = copy_to_mode_reg (mode, op);
34098 }
34099 else
34100 {
34101 op = copy_to_reg (op);
34102 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34103 }
34104 }
34105
34106 args[i].op = op;
34107 args[i].mode = mode;
34108 }
34109
34110 switch (nargs)
34111 {
34112 case 1:
34113 pat = GEN_FCN (icode) (real_target, args[0].op);
34114 break;
34115 case 2:
34116 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
34117 break;
34118 case 3:
34119 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34120 args[2].op);
34121 break;
34122 case 4:
34123 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34124 args[2].op, args[3].op);
34125 break;
34126 case 5:
34127 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34128 args[2].op, args[3].op, args[4].op);
34129 case 6:
34130 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
34131 args[2].op, args[3].op, args[4].op,
34132 args[5].op);
34133 break;
34134 default:
34135 gcc_unreachable ();
34136 }
34137
34138 if (! pat)
34139 return 0;
34140
34141 emit_insn (pat);
34142 return target;
34143 }
34144
34145 /* Transform pattern of following layout:
34146 (parallel [
34147 set (A B)
34148 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
34149 ])
34150 into:
34151 (set (A B))
34152
34153 Or:
34154 (parallel [ A B
34155 ...
34156 (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
34157 ...
34158 ])
34159 into:
34160 (parallel [ A B ... ]) */
34161
34162 static rtx
34163 ix86_erase_embedded_rounding (rtx pat)
34164 {
34165 if (GET_CODE (pat) == INSN)
34166 pat = PATTERN (pat);
34167
34168 gcc_assert (GET_CODE (pat) == PARALLEL);
34169
34170 if (XVECLEN (pat, 0) == 2)
34171 {
34172 rtx p0 = XVECEXP (pat, 0, 0);
34173 rtx p1 = XVECEXP (pat, 0, 1);
34174
34175 gcc_assert (GET_CODE (p0) == SET
34176 && GET_CODE (p1) == UNSPEC
34177 && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
34178
34179 return p0;
34180 }
34181 else
34182 {
34183 rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
34184 int i = 0;
34185 int j = 0;
34186
34187 for (; i < XVECLEN (pat, 0); ++i)
34188 {
34189 rtx elem = XVECEXP (pat, 0, i);
34190 if (GET_CODE (elem) != UNSPEC
34191 || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
34192 res [j++] = elem;
34193 }
34194
34195 /* No more than 1 occurence was removed. */
34196 gcc_assert (j >= XVECLEN (pat, 0) - 1);
34197
34198 return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
34199 }
34200 }
34201
34202 /* Subroutine of ix86_expand_round_builtin to take care of comi insns
34203 with rounding. */
34204 static rtx
34205 ix86_expand_sse_comi_round (const struct builtin_description *d,
34206 tree exp, rtx target)
34207 {
34208 rtx pat, set_dst;
34209 tree arg0 = CALL_EXPR_ARG (exp, 0);
34210 tree arg1 = CALL_EXPR_ARG (exp, 1);
34211 tree arg2 = CALL_EXPR_ARG (exp, 2);
34212 tree arg3 = CALL_EXPR_ARG (exp, 3);
34213 rtx op0 = expand_normal (arg0);
34214 rtx op1 = expand_normal (arg1);
34215 rtx op2 = expand_normal (arg2);
34216 rtx op3 = expand_normal (arg3);
34217 enum insn_code icode = d->icode;
34218 const struct insn_data_d *insn_p = &insn_data[icode];
34219 enum machine_mode mode0 = insn_p->operand[0].mode;
34220 enum machine_mode mode1 = insn_p->operand[1].mode;
34221 enum rtx_code comparison = UNEQ;
34222 bool need_ucomi = false;
34223
34224 /* See avxintrin.h for values. */
34225 enum rtx_code comi_comparisons[32] =
34226 {
34227 UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
34228 UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
34229 UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
34230 };
34231 bool need_ucomi_values[32] =
34232 {
34233 true, false, false, true, true, false, false, true,
34234 true, false, false, true, true, false, false, true,
34235 false, true, true, false, false, true, true, false,
34236 false, true, true, false, false, true, true, false
34237 };
34238
34239 if (!CONST_INT_P (op2))
34240 {
34241 error ("the third argument must be comparison constant");
34242 return const0_rtx;
34243 }
34244 if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
34245 {
34246 error ("incorect comparison mode");
34247 return const0_rtx;
34248 }
34249
34250 if (!insn_p->operand[2].predicate (op3, SImode))
34251 {
34252 error ("incorrect rounding operand");
34253 return const0_rtx;
34254 }
34255
34256 comparison = comi_comparisons[INTVAL (op2)];
34257 need_ucomi = need_ucomi_values[INTVAL (op2)];
34258
34259 if (VECTOR_MODE_P (mode0))
34260 op0 = safe_vector_operand (op0, mode0);
34261 if (VECTOR_MODE_P (mode1))
34262 op1 = safe_vector_operand (op1, mode1);
34263
34264 target = gen_reg_rtx (SImode);
34265 emit_move_insn (target, const0_rtx);
34266 target = gen_rtx_SUBREG (QImode, target, 0);
34267
34268 if ((optimize && !register_operand (op0, mode0))
34269 || !insn_p->operand[0].predicate (op0, mode0))
34270 op0 = copy_to_mode_reg (mode0, op0);
34271 if ((optimize && !register_operand (op1, mode1))
34272 || !insn_p->operand[1].predicate (op1, mode1))
34273 op1 = copy_to_mode_reg (mode1, op1);
34274
34275 if (need_ucomi)
34276 icode = icode == CODE_FOR_sse_comi_round
34277 ? CODE_FOR_sse_ucomi_round
34278 : CODE_FOR_sse2_ucomi_round;
34279
34280 pat = GEN_FCN (icode) (op0, op1, op3);
34281 if (! pat)
34282 return 0;
34283
34284 /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point. */
34285 if (INTVAL (op3) == NO_ROUND)
34286 {
34287 pat = ix86_erase_embedded_rounding (pat);
34288 if (! pat)
34289 return 0;
34290
34291 set_dst = SET_DEST (pat);
34292 }
34293 else
34294 {
34295 gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
34296 set_dst = SET_DEST (XVECEXP (pat, 0, 0));
34297 }
34298
34299 emit_insn (pat);
34300 emit_insn (gen_rtx_SET (VOIDmode,
34301 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
34302 gen_rtx_fmt_ee (comparison, QImode,
34303 set_dst,
34304 const0_rtx)));
34305
34306 return SUBREG_REG (target);
34307 }
34308
34309 static rtx
34310 ix86_expand_round_builtin (const struct builtin_description *d,
34311 tree exp, rtx target)
34312 {
34313 rtx pat;
34314 unsigned int i, nargs;
34315 struct
34316 {
34317 rtx op;
34318 enum machine_mode mode;
34319 } args[6];
34320 enum insn_code icode = d->icode;
34321 const struct insn_data_d *insn_p = &insn_data[icode];
34322 enum machine_mode tmode = insn_p->operand[0].mode;
34323 unsigned int nargs_constant = 0;
34324 unsigned int redundant_embed_rnd = 0;
34325
34326 switch ((enum ix86_builtin_func_type) d->flag)
34327 {
34328 case UINT64_FTYPE_V2DF_INT:
34329 case UINT64_FTYPE_V4SF_INT:
34330 case UINT_FTYPE_V2DF_INT:
34331 case UINT_FTYPE_V4SF_INT:
34332 case INT64_FTYPE_V2DF_INT:
34333 case INT64_FTYPE_V4SF_INT:
34334 case INT_FTYPE_V2DF_INT:
34335 case INT_FTYPE_V4SF_INT:
34336 nargs = 2;
34337 break;
34338 case V4SF_FTYPE_V4SF_UINT_INT:
34339 case V4SF_FTYPE_V4SF_UINT64_INT:
34340 case V2DF_FTYPE_V2DF_UINT64_INT:
34341 case V4SF_FTYPE_V4SF_INT_INT:
34342 case V4SF_FTYPE_V4SF_INT64_INT:
34343 case V2DF_FTYPE_V2DF_INT64_INT:
34344 case V4SF_FTYPE_V4SF_V4SF_INT:
34345 case V2DF_FTYPE_V2DF_V2DF_INT:
34346 case V4SF_FTYPE_V4SF_V2DF_INT:
34347 case V2DF_FTYPE_V2DF_V4SF_INT:
34348 nargs = 3;
34349 break;
34350 case V8SF_FTYPE_V8DF_V8SF_QI_INT:
34351 case V8DF_FTYPE_V8DF_V8DF_QI_INT:
34352 case V8SI_FTYPE_V8DF_V8SI_QI_INT:
34353 case V16SF_FTYPE_V16SF_V16SF_HI_INT:
34354 case V16SF_FTYPE_V16SI_V16SF_HI_INT:
34355 case V16SI_FTYPE_V16SF_V16SI_HI_INT:
34356 case V8DF_FTYPE_V8SF_V8DF_QI_INT:
34357 case V16SF_FTYPE_V16HI_V16SF_HI_INT:
34358 case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
34359 case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
34360 nargs = 4;
34361 break;
34362 case V4SF_FTYPE_V4SF_V4SF_INT_INT:
34363 case V2DF_FTYPE_V2DF_V2DF_INT_INT:
34364 nargs_constant = 2;
34365 nargs = 4;
34366 break;
34367 case INT_FTYPE_V4SF_V4SF_INT_INT:
34368 case INT_FTYPE_V2DF_V2DF_INT_INT:
34369 return ix86_expand_sse_comi_round (d, exp, target);
34370 case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
34371 case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
34372 case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
34373 case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
34374 case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
34375 case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
34376 nargs = 5;
34377 break;
34378 case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
34379 case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
34380 nargs_constant = 4;
34381 nargs = 5;
34382 break;
34383 case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
34384 case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
34385 case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
34386 case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
34387 nargs_constant = 3;
34388 nargs = 5;
34389 break;
34390 case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
34391 case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
34392 nargs = 6;
34393 nargs_constant = 4;
34394 break;
34395 case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
34396 case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
34397 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
34398 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
34399 nargs = 6;
34400 nargs_constant = 3;
34401 break;
34402 default:
34403 gcc_unreachable ();
34404 }
34405 gcc_assert (nargs <= ARRAY_SIZE (args));
34406
34407 if (optimize
34408 || target == 0
34409 || GET_MODE (target) != tmode
34410 || !insn_p->operand[0].predicate (target, tmode))
34411 target = gen_reg_rtx (tmode);
34412
34413 for (i = 0; i < nargs; i++)
34414 {
34415 tree arg = CALL_EXPR_ARG (exp, i);
34416 rtx op = expand_normal (arg);
34417 enum machine_mode mode = insn_p->operand[i + 1].mode;
34418 bool match = insn_p->operand[i + 1].predicate (op, mode);
34419
34420 if (i == nargs - nargs_constant)
34421 {
34422 if (!match)
34423 {
34424 switch (icode)
34425 {
34426 case CODE_FOR_avx512f_getmantv8df_mask_round:
34427 case CODE_FOR_avx512f_getmantv16sf_mask_round:
34428 case CODE_FOR_avx512f_getmantv2df_round:
34429 case CODE_FOR_avx512f_getmantv4sf_round:
34430 error ("the immediate argument must be a 4-bit immediate");
34431 return const0_rtx;
34432 case CODE_FOR_avx512f_cmpv8df3_mask_round:
34433 case CODE_FOR_avx512f_cmpv16sf3_mask_round:
34434 case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
34435 case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
34436 error ("the immediate argument must be a 5-bit immediate");
34437 return const0_rtx;
34438 default:
34439 error ("the immediate argument must be an 8-bit immediate");
34440 return const0_rtx;
34441 }
34442 }
34443 }
34444 else if (i == nargs-1)
34445 {
34446 if (!insn_p->operand[nargs].predicate (op, SImode))
34447 {
34448 error ("incorrect rounding operand");
34449 return const0_rtx;
34450 }
34451
34452 /* If there is no rounding use normal version of the pattern. */
34453 if (INTVAL (op) == NO_ROUND)
34454 redundant_embed_rnd = 1;
34455 }
34456 else
34457 {
34458 if (VECTOR_MODE_P (mode))
34459 op = safe_vector_operand (op, mode);
34460
34461 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34462 {
34463 if (optimize || !match)
34464 op = copy_to_mode_reg (mode, op);
34465 }
34466 else
34467 {
34468 op = copy_to_reg (op);
34469 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34470 }
34471 }
34472
34473 args[i].op = op;
34474 args[i].mode = mode;
34475 }
34476
34477 switch (nargs)
34478 {
34479 case 1:
34480 pat = GEN_FCN (icode) (target, args[0].op);
34481 break;
34482 case 2:
34483 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34484 break;
34485 case 3:
34486 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34487 args[2].op);
34488 break;
34489 case 4:
34490 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34491 args[2].op, args[3].op);
34492 break;
34493 case 5:
34494 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34495 args[2].op, args[3].op, args[4].op);
34496 case 6:
34497 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
34498 args[2].op, args[3].op, args[4].op,
34499 args[5].op);
34500 break;
34501 default:
34502 gcc_unreachable ();
34503 }
34504
34505 if (!pat)
34506 return 0;
34507
34508 if (redundant_embed_rnd)
34509 pat = ix86_erase_embedded_rounding (pat);
34510
34511 emit_insn (pat);
34512 return target;
34513 }
34514
34515 /* Subroutine of ix86_expand_builtin to take care of special insns
34516 with variable number of operands. */
34517
34518 static rtx
34519 ix86_expand_special_args_builtin (const struct builtin_description *d,
34520 tree exp, rtx target)
34521 {
34522 tree arg;
34523 rtx pat, op;
34524 unsigned int i, nargs, arg_adjust, memory;
34525 bool aligned_mem = false;
34526 struct
34527 {
34528 rtx op;
34529 enum machine_mode mode;
34530 } args[3];
34531 enum insn_code icode = d->icode;
34532 bool last_arg_constant = false;
34533 const struct insn_data_d *insn_p = &insn_data[icode];
34534 enum machine_mode tmode = insn_p->operand[0].mode;
34535 enum { load, store } klass;
34536
34537 switch ((enum ix86_builtin_func_type) d->flag)
34538 {
34539 case VOID_FTYPE_VOID:
34540 emit_insn (GEN_FCN (icode) (target));
34541 return 0;
34542 case VOID_FTYPE_UINT64:
34543 case VOID_FTYPE_UNSIGNED:
34544 nargs = 0;
34545 klass = store;
34546 memory = 0;
34547 break;
34548
34549 case INT_FTYPE_VOID:
34550 case UINT64_FTYPE_VOID:
34551 case UNSIGNED_FTYPE_VOID:
34552 nargs = 0;
34553 klass = load;
34554 memory = 0;
34555 break;
34556 case UINT64_FTYPE_PUNSIGNED:
34557 case V2DI_FTYPE_PV2DI:
34558 case V4DI_FTYPE_PV4DI:
34559 case V32QI_FTYPE_PCCHAR:
34560 case V16QI_FTYPE_PCCHAR:
34561 case V8SF_FTYPE_PCV4SF:
34562 case V8SF_FTYPE_PCFLOAT:
34563 case V4SF_FTYPE_PCFLOAT:
34564 case V4DF_FTYPE_PCV2DF:
34565 case V4DF_FTYPE_PCDOUBLE:
34566 case V2DF_FTYPE_PCDOUBLE:
34567 case VOID_FTYPE_PVOID:
34568 case V16SI_FTYPE_PV4SI:
34569 case V16SF_FTYPE_PV4SF:
34570 case V8DI_FTYPE_PV4DI:
34571 case V8DI_FTYPE_PV8DI:
34572 case V8DF_FTYPE_PV4DF:
34573 nargs = 1;
34574 klass = load;
34575 memory = 0;
34576 switch (icode)
34577 {
34578 case CODE_FOR_sse4_1_movntdqa:
34579 case CODE_FOR_avx2_movntdqa:
34580 case CODE_FOR_avx512f_movntdqa:
34581 aligned_mem = true;
34582 break;
34583 default:
34584 break;
34585 }
34586 break;
34587 case VOID_FTYPE_PV2SF_V4SF:
34588 case VOID_FTYPE_PV8DI_V8DI:
34589 case VOID_FTYPE_PV4DI_V4DI:
34590 case VOID_FTYPE_PV2DI_V2DI:
34591 case VOID_FTYPE_PCHAR_V32QI:
34592 case VOID_FTYPE_PCHAR_V16QI:
34593 case VOID_FTYPE_PFLOAT_V16SF:
34594 case VOID_FTYPE_PFLOAT_V8SF:
34595 case VOID_FTYPE_PFLOAT_V4SF:
34596 case VOID_FTYPE_PDOUBLE_V8DF:
34597 case VOID_FTYPE_PDOUBLE_V4DF:
34598 case VOID_FTYPE_PDOUBLE_V2DF:
34599 case VOID_FTYPE_PLONGLONG_LONGLONG:
34600 case VOID_FTYPE_PULONGLONG_ULONGLONG:
34601 case VOID_FTYPE_PINT_INT:
34602 nargs = 1;
34603 klass = store;
34604 /* Reserve memory operand for target. */
34605 memory = ARRAY_SIZE (args);
34606 switch (icode)
34607 {
34608 /* These builtins and instructions require the memory
34609 to be properly aligned. */
34610 case CODE_FOR_avx_movntv4di:
34611 case CODE_FOR_sse2_movntv2di:
34612 case CODE_FOR_avx_movntv8sf:
34613 case CODE_FOR_sse_movntv4sf:
34614 case CODE_FOR_sse4a_vmmovntv4sf:
34615 case CODE_FOR_avx_movntv4df:
34616 case CODE_FOR_sse2_movntv2df:
34617 case CODE_FOR_sse4a_vmmovntv2df:
34618 case CODE_FOR_sse2_movntidi:
34619 case CODE_FOR_sse_movntq:
34620 case CODE_FOR_sse2_movntisi:
34621 case CODE_FOR_avx512f_movntv16sf:
34622 case CODE_FOR_avx512f_movntv8df:
34623 case CODE_FOR_avx512f_movntv8di:
34624 aligned_mem = true;
34625 break;
34626 default:
34627 break;
34628 }
34629 break;
34630 case V4SF_FTYPE_V4SF_PCV2SF:
34631 case V2DF_FTYPE_V2DF_PCDOUBLE:
34632 nargs = 2;
34633 klass = load;
34634 memory = 1;
34635 break;
34636 case V8SF_FTYPE_PCV8SF_V8SI:
34637 case V4DF_FTYPE_PCV4DF_V4DI:
34638 case V4SF_FTYPE_PCV4SF_V4SI:
34639 case V2DF_FTYPE_PCV2DF_V2DI:
34640 case V8SI_FTYPE_PCV8SI_V8SI:
34641 case V4DI_FTYPE_PCV4DI_V4DI:
34642 case V4SI_FTYPE_PCV4SI_V4SI:
34643 case V2DI_FTYPE_PCV2DI_V2DI:
34644 nargs = 2;
34645 klass = load;
34646 memory = 0;
34647 break;
34648 case VOID_FTYPE_PV8DF_V8DF_QI:
34649 case VOID_FTYPE_PV16SF_V16SF_HI:
34650 case VOID_FTYPE_PV8DI_V8DI_QI:
34651 case VOID_FTYPE_PV16SI_V16SI_HI:
34652 switch (icode)
34653 {
34654 /* These builtins and instructions require the memory
34655 to be properly aligned. */
34656 case CODE_FOR_avx512f_storev16sf_mask:
34657 case CODE_FOR_avx512f_storev16si_mask:
34658 case CODE_FOR_avx512f_storev8df_mask:
34659 case CODE_FOR_avx512f_storev8di_mask:
34660 aligned_mem = true;
34661 break;
34662 default:
34663 break;
34664 }
34665 /* FALLTHRU */
34666 case VOID_FTYPE_PV8SF_V8SI_V8SF:
34667 case VOID_FTYPE_PV4DF_V4DI_V4DF:
34668 case VOID_FTYPE_PV4SF_V4SI_V4SF:
34669 case VOID_FTYPE_PV2DF_V2DI_V2DF:
34670 case VOID_FTYPE_PV8SI_V8SI_V8SI:
34671 case VOID_FTYPE_PV4DI_V4DI_V4DI:
34672 case VOID_FTYPE_PV4SI_V4SI_V4SI:
34673 case VOID_FTYPE_PV2DI_V2DI_V2DI:
34674 case VOID_FTYPE_PDOUBLE_V2DF_QI:
34675 case VOID_FTYPE_PFLOAT_V4SF_QI:
34676 case VOID_FTYPE_PV8SI_V8DI_QI:
34677 case VOID_FTYPE_PV8HI_V8DI_QI:
34678 case VOID_FTYPE_PV16HI_V16SI_HI:
34679 case VOID_FTYPE_PV16QI_V8DI_QI:
34680 case VOID_FTYPE_PV16QI_V16SI_HI:
34681 nargs = 2;
34682 klass = store;
34683 /* Reserve memory operand for target. */
34684 memory = ARRAY_SIZE (args);
34685 break;
34686 case V16SF_FTYPE_PCV16SF_V16SF_HI:
34687 case V16SI_FTYPE_PCV16SI_V16SI_HI:
34688 case V8DF_FTYPE_PCV8DF_V8DF_QI:
34689 case V8DI_FTYPE_PCV8DI_V8DI_QI:
34690 case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
34691 case V4SF_FTYPE_PCFLOAT_V4SF_QI:
34692 nargs = 3;
34693 klass = load;
34694 memory = 0;
34695 switch (icode)
34696 {
34697 /* These builtins and instructions require the memory
34698 to be properly aligned. */
34699 case CODE_FOR_avx512f_loadv16sf_mask:
34700 case CODE_FOR_avx512f_loadv16si_mask:
34701 case CODE_FOR_avx512f_loadv8df_mask:
34702 case CODE_FOR_avx512f_loadv8di_mask:
34703 aligned_mem = true;
34704 break;
34705 default:
34706 break;
34707 }
34708 break;
34709 case VOID_FTYPE_UINT_UINT_UINT:
34710 case VOID_FTYPE_UINT64_UINT_UINT:
34711 case UCHAR_FTYPE_UINT_UINT_UINT:
34712 case UCHAR_FTYPE_UINT64_UINT_UINT:
34713 nargs = 3;
34714 klass = load;
34715 memory = ARRAY_SIZE (args);
34716 last_arg_constant = true;
34717 break;
34718 default:
34719 gcc_unreachable ();
34720 }
34721
34722 gcc_assert (nargs <= ARRAY_SIZE (args));
34723
34724 if (klass == store)
34725 {
34726 arg = CALL_EXPR_ARG (exp, 0);
34727 op = expand_normal (arg);
34728 gcc_assert (target == 0);
34729 if (memory)
34730 {
34731 op = ix86_zero_extend_to_Pmode (op);
34732 target = gen_rtx_MEM (tmode, op);
34733 /* target at this point has just BITS_PER_UNIT MEM_ALIGN
34734 on it. Try to improve it using get_pointer_alignment,
34735 and if the special builtin is one that requires strict
34736 mode alignment, also from it's GET_MODE_ALIGNMENT.
34737 Failure to do so could lead to ix86_legitimate_combined_insn
34738 rejecting all changes to such insns. */
34739 unsigned int align = get_pointer_alignment (arg);
34740 if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
34741 align = GET_MODE_ALIGNMENT (tmode);
34742 if (MEM_ALIGN (target) < align)
34743 set_mem_align (target, align);
34744 }
34745 else
34746 target = force_reg (tmode, op);
34747 arg_adjust = 1;
34748 }
34749 else
34750 {
34751 arg_adjust = 0;
34752 if (optimize
34753 || target == 0
34754 || !register_operand (target, tmode)
34755 || GET_MODE (target) != tmode)
34756 target = gen_reg_rtx (tmode);
34757 }
34758
34759 for (i = 0; i < nargs; i++)
34760 {
34761 enum machine_mode mode = insn_p->operand[i + 1].mode;
34762 bool match;
34763
34764 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
34765 op = expand_normal (arg);
34766 match = insn_p->operand[i + 1].predicate (op, mode);
34767
34768 if (last_arg_constant && (i + 1) == nargs)
34769 {
34770 if (!match)
34771 {
34772 if (icode == CODE_FOR_lwp_lwpvalsi3
34773 || icode == CODE_FOR_lwp_lwpinssi3
34774 || icode == CODE_FOR_lwp_lwpvaldi3
34775 || icode == CODE_FOR_lwp_lwpinsdi3)
34776 error ("the last argument must be a 32-bit immediate");
34777 else
34778 error ("the last argument must be an 8-bit immediate");
34779 return const0_rtx;
34780 }
34781 }
34782 else
34783 {
34784 if (i == memory)
34785 {
34786 /* This must be the memory operand. */
34787 op = ix86_zero_extend_to_Pmode (op);
34788 op = gen_rtx_MEM (mode, op);
34789 /* op at this point has just BITS_PER_UNIT MEM_ALIGN
34790 on it. Try to improve it using get_pointer_alignment,
34791 and if the special builtin is one that requires strict
34792 mode alignment, also from it's GET_MODE_ALIGNMENT.
34793 Failure to do so could lead to ix86_legitimate_combined_insn
34794 rejecting all changes to such insns. */
34795 unsigned int align = get_pointer_alignment (arg);
34796 if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
34797 align = GET_MODE_ALIGNMENT (mode);
34798 if (MEM_ALIGN (op) < align)
34799 set_mem_align (op, align);
34800 }
34801 else
34802 {
34803 /* This must be register. */
34804 if (VECTOR_MODE_P (mode))
34805 op = safe_vector_operand (op, mode);
34806
34807 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
34808 op = copy_to_mode_reg (mode, op);
34809 else
34810 {
34811 op = copy_to_reg (op);
34812 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
34813 }
34814 }
34815 }
34816
34817 args[i].op = op;
34818 args[i].mode = mode;
34819 }
34820
34821 switch (nargs)
34822 {
34823 case 0:
34824 pat = GEN_FCN (icode) (target);
34825 break;
34826 case 1:
34827 pat = GEN_FCN (icode) (target, args[0].op);
34828 break;
34829 case 2:
34830 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
34831 break;
34832 case 3:
34833 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
34834 break;
34835 default:
34836 gcc_unreachable ();
34837 }
34838
34839 if (! pat)
34840 return 0;
34841 emit_insn (pat);
34842 return klass == store ? 0 : target;
34843 }
34844
34845 /* Return the integer constant in ARG. Constrain it to be in the range
34846 of the subparts of VEC_TYPE; issue an error if not. */
34847
34848 static int
34849 get_element_number (tree vec_type, tree arg)
34850 {
34851 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
34852
34853 if (!tree_fits_uhwi_p (arg)
34854 || (elt = tree_to_uhwi (arg), elt > max))
34855 {
34856 error ("selector must be an integer constant in the range 0..%wi", max);
34857 return 0;
34858 }
34859
34860 return elt;
34861 }
34862
34863 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34864 ix86_expand_vector_init. We DO have language-level syntax for this, in
34865 the form of (type){ init-list }. Except that since we can't place emms
34866 instructions from inside the compiler, we can't allow the use of MMX
34867 registers unless the user explicitly asks for it. So we do *not* define
34868 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
34869 we have builtins invoked by mmintrin.h that gives us license to emit
34870 these sorts of instructions. */
34871
34872 static rtx
34873 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
34874 {
34875 enum machine_mode tmode = TYPE_MODE (type);
34876 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
34877 int i, n_elt = GET_MODE_NUNITS (tmode);
34878 rtvec v = rtvec_alloc (n_elt);
34879
34880 gcc_assert (VECTOR_MODE_P (tmode));
34881 gcc_assert (call_expr_nargs (exp) == n_elt);
34882
34883 for (i = 0; i < n_elt; ++i)
34884 {
34885 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
34886 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
34887 }
34888
34889 if (!target || !register_operand (target, tmode))
34890 target = gen_reg_rtx (tmode);
34891
34892 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
34893 return target;
34894 }
34895
34896 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34897 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
34898 had a language-level syntax for referencing vector elements. */
34899
34900 static rtx
34901 ix86_expand_vec_ext_builtin (tree exp, rtx target)
34902 {
34903 enum machine_mode tmode, mode0;
34904 tree arg0, arg1;
34905 int elt;
34906 rtx op0;
34907
34908 arg0 = CALL_EXPR_ARG (exp, 0);
34909 arg1 = CALL_EXPR_ARG (exp, 1);
34910
34911 op0 = expand_normal (arg0);
34912 elt = get_element_number (TREE_TYPE (arg0), arg1);
34913
34914 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34915 mode0 = TYPE_MODE (TREE_TYPE (arg0));
34916 gcc_assert (VECTOR_MODE_P (mode0));
34917
34918 op0 = force_reg (mode0, op0);
34919
34920 if (optimize || !target || !register_operand (target, tmode))
34921 target = gen_reg_rtx (tmode);
34922
34923 ix86_expand_vector_extract (true, target, op0, elt);
34924
34925 return target;
34926 }
34927
34928 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
34929 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
34930 a language-level syntax for referencing vector elements. */
34931
34932 static rtx
34933 ix86_expand_vec_set_builtin (tree exp)
34934 {
34935 enum machine_mode tmode, mode1;
34936 tree arg0, arg1, arg2;
34937 int elt;
34938 rtx op0, op1, target;
34939
34940 arg0 = CALL_EXPR_ARG (exp, 0);
34941 arg1 = CALL_EXPR_ARG (exp, 1);
34942 arg2 = CALL_EXPR_ARG (exp, 2);
34943
34944 tmode = TYPE_MODE (TREE_TYPE (arg0));
34945 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
34946 gcc_assert (VECTOR_MODE_P (tmode));
34947
34948 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
34949 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
34950 elt = get_element_number (TREE_TYPE (arg0), arg2);
34951
34952 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
34953 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
34954
34955 op0 = force_reg (tmode, op0);
34956 op1 = force_reg (mode1, op1);
34957
34958 /* OP0 is the source of these builtin functions and shouldn't be
34959 modified. Create a copy, use it and return it as target. */
34960 target = gen_reg_rtx (tmode);
34961 emit_move_insn (target, op0);
34962 ix86_expand_vector_set (true, target, op1, elt);
34963
34964 return target;
34965 }
34966
34967 /* Expand an expression EXP that calls a built-in function,
34968 with result going to TARGET if that's convenient
34969 (and in mode MODE if that's convenient).
34970 SUBTARGET may be used as the target for computing one of EXP's operands.
34971 IGNORE is nonzero if the value is to be ignored. */
34972
34973 static rtx
34974 ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
34975 enum machine_mode mode, int ignore)
34976 {
34977 const struct builtin_description *d;
34978 size_t i;
34979 enum insn_code icode;
34980 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
34981 tree arg0, arg1, arg2, arg3, arg4;
34982 rtx op0, op1, op2, op3, op4, pat, insn;
34983 enum machine_mode mode0, mode1, mode2, mode3, mode4;
34984 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
34985
34986 /* For CPU builtins that can be folded, fold first and expand the fold. */
34987 switch (fcode)
34988 {
34989 case IX86_BUILTIN_CPU_INIT:
34990 {
34991 /* Make it call __cpu_indicator_init in libgcc. */
34992 tree call_expr, fndecl, type;
34993 type = build_function_type_list (integer_type_node, NULL_TREE);
34994 fndecl = build_fn_decl ("__cpu_indicator_init", type);
34995 call_expr = build_call_expr (fndecl, 0);
34996 return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
34997 }
34998 case IX86_BUILTIN_CPU_IS:
34999 case IX86_BUILTIN_CPU_SUPPORTS:
35000 {
35001 tree arg0 = CALL_EXPR_ARG (exp, 0);
35002 tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
35003 gcc_assert (fold_expr != NULL_TREE);
35004 return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
35005 }
35006 }
35007
35008 /* Determine whether the builtin function is available under the current ISA.
35009 Originally the builtin was not created if it wasn't applicable to the
35010 current ISA based on the command line switches. With function specific
35011 options, we need to check in the context of the function making the call
35012 whether it is supported. */
35013 if (ix86_builtins_isa[fcode].isa
35014 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
35015 {
35016 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
35017 NULL, (enum fpmath_unit) 0, false);
35018
35019 if (!opts)
35020 error ("%qE needs unknown isa option", fndecl);
35021 else
35022 {
35023 gcc_assert (opts != NULL);
35024 error ("%qE needs isa option %s", fndecl, opts);
35025 free (opts);
35026 }
35027 return const0_rtx;
35028 }
35029
35030 switch (fcode)
35031 {
35032 case IX86_BUILTIN_MASKMOVQ:
35033 case IX86_BUILTIN_MASKMOVDQU:
35034 icode = (fcode == IX86_BUILTIN_MASKMOVQ
35035 ? CODE_FOR_mmx_maskmovq
35036 : CODE_FOR_sse2_maskmovdqu);
35037 /* Note the arg order is different from the operand order. */
35038 arg1 = CALL_EXPR_ARG (exp, 0);
35039 arg2 = CALL_EXPR_ARG (exp, 1);
35040 arg0 = CALL_EXPR_ARG (exp, 2);
35041 op0 = expand_normal (arg0);
35042 op1 = expand_normal (arg1);
35043 op2 = expand_normal (arg2);
35044 mode0 = insn_data[icode].operand[0].mode;
35045 mode1 = insn_data[icode].operand[1].mode;
35046 mode2 = insn_data[icode].operand[2].mode;
35047
35048 op0 = ix86_zero_extend_to_Pmode (op0);
35049 op0 = gen_rtx_MEM (mode1, op0);
35050
35051 if (!insn_data[icode].operand[0].predicate (op0, mode0))
35052 op0 = copy_to_mode_reg (mode0, op0);
35053 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35054 op1 = copy_to_mode_reg (mode1, op1);
35055 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35056 op2 = copy_to_mode_reg (mode2, op2);
35057 pat = GEN_FCN (icode) (op0, op1, op2);
35058 if (! pat)
35059 return 0;
35060 emit_insn (pat);
35061 return 0;
35062
35063 case IX86_BUILTIN_LDMXCSR:
35064 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
35065 target = assign_386_stack_local (SImode, SLOT_TEMP);
35066 emit_move_insn (target, op0);
35067 emit_insn (gen_sse_ldmxcsr (target));
35068 return 0;
35069
35070 case IX86_BUILTIN_STMXCSR:
35071 target = assign_386_stack_local (SImode, SLOT_TEMP);
35072 emit_insn (gen_sse_stmxcsr (target));
35073 return copy_to_mode_reg (SImode, target);
35074
35075 case IX86_BUILTIN_CLFLUSH:
35076 arg0 = CALL_EXPR_ARG (exp, 0);
35077 op0 = expand_normal (arg0);
35078 icode = CODE_FOR_sse2_clflush;
35079 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35080 op0 = ix86_zero_extend_to_Pmode (op0);
35081
35082 emit_insn (gen_sse2_clflush (op0));
35083 return 0;
35084
35085 case IX86_BUILTIN_MONITOR:
35086 arg0 = CALL_EXPR_ARG (exp, 0);
35087 arg1 = CALL_EXPR_ARG (exp, 1);
35088 arg2 = CALL_EXPR_ARG (exp, 2);
35089 op0 = expand_normal (arg0);
35090 op1 = expand_normal (arg1);
35091 op2 = expand_normal (arg2);
35092 if (!REG_P (op0))
35093 op0 = ix86_zero_extend_to_Pmode (op0);
35094 if (!REG_P (op1))
35095 op1 = copy_to_mode_reg (SImode, op1);
35096 if (!REG_P (op2))
35097 op2 = copy_to_mode_reg (SImode, op2);
35098 emit_insn (ix86_gen_monitor (op0, op1, op2));
35099 return 0;
35100
35101 case IX86_BUILTIN_MWAIT:
35102 arg0 = CALL_EXPR_ARG (exp, 0);
35103 arg1 = CALL_EXPR_ARG (exp, 1);
35104 op0 = expand_normal (arg0);
35105 op1 = expand_normal (arg1);
35106 if (!REG_P (op0))
35107 op0 = copy_to_mode_reg (SImode, op0);
35108 if (!REG_P (op1))
35109 op1 = copy_to_mode_reg (SImode, op1);
35110 emit_insn (gen_sse3_mwait (op0, op1));
35111 return 0;
35112
35113 case IX86_BUILTIN_VEC_INIT_V2SI:
35114 case IX86_BUILTIN_VEC_INIT_V4HI:
35115 case IX86_BUILTIN_VEC_INIT_V8QI:
35116 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
35117
35118 case IX86_BUILTIN_VEC_EXT_V2DF:
35119 case IX86_BUILTIN_VEC_EXT_V2DI:
35120 case IX86_BUILTIN_VEC_EXT_V4SF:
35121 case IX86_BUILTIN_VEC_EXT_V4SI:
35122 case IX86_BUILTIN_VEC_EXT_V8HI:
35123 case IX86_BUILTIN_VEC_EXT_V2SI:
35124 case IX86_BUILTIN_VEC_EXT_V4HI:
35125 case IX86_BUILTIN_VEC_EXT_V16QI:
35126 return ix86_expand_vec_ext_builtin (exp, target);
35127
35128 case IX86_BUILTIN_VEC_SET_V2DI:
35129 case IX86_BUILTIN_VEC_SET_V4SF:
35130 case IX86_BUILTIN_VEC_SET_V4SI:
35131 case IX86_BUILTIN_VEC_SET_V8HI:
35132 case IX86_BUILTIN_VEC_SET_V4HI:
35133 case IX86_BUILTIN_VEC_SET_V16QI:
35134 return ix86_expand_vec_set_builtin (exp);
35135
35136 case IX86_BUILTIN_INFQ:
35137 case IX86_BUILTIN_HUGE_VALQ:
35138 {
35139 REAL_VALUE_TYPE inf;
35140 rtx tmp;
35141
35142 real_inf (&inf);
35143 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
35144
35145 tmp = validize_mem (force_const_mem (mode, tmp));
35146
35147 if (target == 0)
35148 target = gen_reg_rtx (mode);
35149
35150 emit_move_insn (target, tmp);
35151 return target;
35152 }
35153
35154 case IX86_BUILTIN_RDPMC:
35155 case IX86_BUILTIN_RDTSC:
35156 case IX86_BUILTIN_RDTSCP:
35157
35158 op0 = gen_reg_rtx (DImode);
35159 op1 = gen_reg_rtx (DImode);
35160
35161 if (fcode == IX86_BUILTIN_RDPMC)
35162 {
35163 arg0 = CALL_EXPR_ARG (exp, 0);
35164 op2 = expand_normal (arg0);
35165 if (!register_operand (op2, SImode))
35166 op2 = copy_to_mode_reg (SImode, op2);
35167
35168 insn = (TARGET_64BIT
35169 ? gen_rdpmc_rex64 (op0, op1, op2)
35170 : gen_rdpmc (op0, op2));
35171 emit_insn (insn);
35172 }
35173 else if (fcode == IX86_BUILTIN_RDTSC)
35174 {
35175 insn = (TARGET_64BIT
35176 ? gen_rdtsc_rex64 (op0, op1)
35177 : gen_rdtsc (op0));
35178 emit_insn (insn);
35179 }
35180 else
35181 {
35182 op2 = gen_reg_rtx (SImode);
35183
35184 insn = (TARGET_64BIT
35185 ? gen_rdtscp_rex64 (op0, op1, op2)
35186 : gen_rdtscp (op0, op2));
35187 emit_insn (insn);
35188
35189 arg0 = CALL_EXPR_ARG (exp, 0);
35190 op4 = expand_normal (arg0);
35191 if (!address_operand (op4, VOIDmode))
35192 {
35193 op4 = convert_memory_address (Pmode, op4);
35194 op4 = copy_addr_to_reg (op4);
35195 }
35196 emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
35197 }
35198
35199 if (target == 0)
35200 {
35201 /* mode is VOIDmode if __builtin_rd* has been called
35202 without lhs. */
35203 if (mode == VOIDmode)
35204 return target;
35205 target = gen_reg_rtx (mode);
35206 }
35207
35208 if (TARGET_64BIT)
35209 {
35210 op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
35211 op1, 1, OPTAB_DIRECT);
35212 op0 = expand_simple_binop (DImode, IOR, op0, op1,
35213 op0, 1, OPTAB_DIRECT);
35214 }
35215
35216 emit_move_insn (target, op0);
35217 return target;
35218
35219 case IX86_BUILTIN_FXSAVE:
35220 case IX86_BUILTIN_FXRSTOR:
35221 case IX86_BUILTIN_FXSAVE64:
35222 case IX86_BUILTIN_FXRSTOR64:
35223 case IX86_BUILTIN_FNSTENV:
35224 case IX86_BUILTIN_FLDENV:
35225 case IX86_BUILTIN_FNSTSW:
35226 mode0 = BLKmode;
35227 switch (fcode)
35228 {
35229 case IX86_BUILTIN_FXSAVE:
35230 icode = CODE_FOR_fxsave;
35231 break;
35232 case IX86_BUILTIN_FXRSTOR:
35233 icode = CODE_FOR_fxrstor;
35234 break;
35235 case IX86_BUILTIN_FXSAVE64:
35236 icode = CODE_FOR_fxsave64;
35237 break;
35238 case IX86_BUILTIN_FXRSTOR64:
35239 icode = CODE_FOR_fxrstor64;
35240 break;
35241 case IX86_BUILTIN_FNSTENV:
35242 icode = CODE_FOR_fnstenv;
35243 break;
35244 case IX86_BUILTIN_FLDENV:
35245 icode = CODE_FOR_fldenv;
35246 break;
35247 case IX86_BUILTIN_FNSTSW:
35248 icode = CODE_FOR_fnstsw;
35249 mode0 = HImode;
35250 break;
35251 default:
35252 gcc_unreachable ();
35253 }
35254
35255 arg0 = CALL_EXPR_ARG (exp, 0);
35256 op0 = expand_normal (arg0);
35257
35258 if (!address_operand (op0, VOIDmode))
35259 {
35260 op0 = convert_memory_address (Pmode, op0);
35261 op0 = copy_addr_to_reg (op0);
35262 }
35263 op0 = gen_rtx_MEM (mode0, op0);
35264
35265 pat = GEN_FCN (icode) (op0);
35266 if (pat)
35267 emit_insn (pat);
35268 return 0;
35269
35270 case IX86_BUILTIN_XSAVE:
35271 case IX86_BUILTIN_XRSTOR:
35272 case IX86_BUILTIN_XSAVE64:
35273 case IX86_BUILTIN_XRSTOR64:
35274 case IX86_BUILTIN_XSAVEOPT:
35275 case IX86_BUILTIN_XSAVEOPT64:
35276 arg0 = CALL_EXPR_ARG (exp, 0);
35277 arg1 = CALL_EXPR_ARG (exp, 1);
35278 op0 = expand_normal (arg0);
35279 op1 = expand_normal (arg1);
35280
35281 if (!address_operand (op0, VOIDmode))
35282 {
35283 op0 = convert_memory_address (Pmode, op0);
35284 op0 = copy_addr_to_reg (op0);
35285 }
35286 op0 = gen_rtx_MEM (BLKmode, op0);
35287
35288 op1 = force_reg (DImode, op1);
35289
35290 if (TARGET_64BIT)
35291 {
35292 op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
35293 NULL, 1, OPTAB_DIRECT);
35294 switch (fcode)
35295 {
35296 case IX86_BUILTIN_XSAVE:
35297 icode = CODE_FOR_xsave_rex64;
35298 break;
35299 case IX86_BUILTIN_XRSTOR:
35300 icode = CODE_FOR_xrstor_rex64;
35301 break;
35302 case IX86_BUILTIN_XSAVE64:
35303 icode = CODE_FOR_xsave64;
35304 break;
35305 case IX86_BUILTIN_XRSTOR64:
35306 icode = CODE_FOR_xrstor64;
35307 break;
35308 case IX86_BUILTIN_XSAVEOPT:
35309 icode = CODE_FOR_xsaveopt_rex64;
35310 break;
35311 case IX86_BUILTIN_XSAVEOPT64:
35312 icode = CODE_FOR_xsaveopt64;
35313 break;
35314 default:
35315 gcc_unreachable ();
35316 }
35317
35318 op2 = gen_lowpart (SImode, op2);
35319 op1 = gen_lowpart (SImode, op1);
35320 pat = GEN_FCN (icode) (op0, op1, op2);
35321 }
35322 else
35323 {
35324 switch (fcode)
35325 {
35326 case IX86_BUILTIN_XSAVE:
35327 icode = CODE_FOR_xsave;
35328 break;
35329 case IX86_BUILTIN_XRSTOR:
35330 icode = CODE_FOR_xrstor;
35331 break;
35332 case IX86_BUILTIN_XSAVEOPT:
35333 icode = CODE_FOR_xsaveopt;
35334 break;
35335 default:
35336 gcc_unreachable ();
35337 }
35338 pat = GEN_FCN (icode) (op0, op1);
35339 }
35340
35341 if (pat)
35342 emit_insn (pat);
35343 return 0;
35344
35345 case IX86_BUILTIN_LLWPCB:
35346 arg0 = CALL_EXPR_ARG (exp, 0);
35347 op0 = expand_normal (arg0);
35348 icode = CODE_FOR_lwp_llwpcb;
35349 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35350 op0 = ix86_zero_extend_to_Pmode (op0);
35351 emit_insn (gen_lwp_llwpcb (op0));
35352 return 0;
35353
35354 case IX86_BUILTIN_SLWPCB:
35355 icode = CODE_FOR_lwp_slwpcb;
35356 if (!target
35357 || !insn_data[icode].operand[0].predicate (target, Pmode))
35358 target = gen_reg_rtx (Pmode);
35359 emit_insn (gen_lwp_slwpcb (target));
35360 return target;
35361
35362 case IX86_BUILTIN_BEXTRI32:
35363 case IX86_BUILTIN_BEXTRI64:
35364 arg0 = CALL_EXPR_ARG (exp, 0);
35365 arg1 = CALL_EXPR_ARG (exp, 1);
35366 op0 = expand_normal (arg0);
35367 op1 = expand_normal (arg1);
35368 icode = (fcode == IX86_BUILTIN_BEXTRI32
35369 ? CODE_FOR_tbm_bextri_si
35370 : CODE_FOR_tbm_bextri_di);
35371 if (!CONST_INT_P (op1))
35372 {
35373 error ("last argument must be an immediate");
35374 return const0_rtx;
35375 }
35376 else
35377 {
35378 unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
35379 unsigned char lsb_index = INTVAL (op1) & 0xFF;
35380 op1 = GEN_INT (length);
35381 op2 = GEN_INT (lsb_index);
35382 pat = GEN_FCN (icode) (target, op0, op1, op2);
35383 if (pat)
35384 emit_insn (pat);
35385 return target;
35386 }
35387
35388 case IX86_BUILTIN_RDRAND16_STEP:
35389 icode = CODE_FOR_rdrandhi_1;
35390 mode0 = HImode;
35391 goto rdrand_step;
35392
35393 case IX86_BUILTIN_RDRAND32_STEP:
35394 icode = CODE_FOR_rdrandsi_1;
35395 mode0 = SImode;
35396 goto rdrand_step;
35397
35398 case IX86_BUILTIN_RDRAND64_STEP:
35399 icode = CODE_FOR_rdranddi_1;
35400 mode0 = DImode;
35401
35402 rdrand_step:
35403 op0 = gen_reg_rtx (mode0);
35404 emit_insn (GEN_FCN (icode) (op0));
35405
35406 arg0 = CALL_EXPR_ARG (exp, 0);
35407 op1 = expand_normal (arg0);
35408 if (!address_operand (op1, VOIDmode))
35409 {
35410 op1 = convert_memory_address (Pmode, op1);
35411 op1 = copy_addr_to_reg (op1);
35412 }
35413 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35414
35415 op1 = gen_reg_rtx (SImode);
35416 emit_move_insn (op1, CONST1_RTX (SImode));
35417
35418 /* Emit SImode conditional move. */
35419 if (mode0 == HImode)
35420 {
35421 op2 = gen_reg_rtx (SImode);
35422 emit_insn (gen_zero_extendhisi2 (op2, op0));
35423 }
35424 else if (mode0 == SImode)
35425 op2 = op0;
35426 else
35427 op2 = gen_rtx_SUBREG (SImode, op0, 0);
35428
35429 if (target == 0
35430 || !register_operand (target, SImode))
35431 target = gen_reg_rtx (SImode);
35432
35433 pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
35434 const0_rtx);
35435 emit_insn (gen_rtx_SET (VOIDmode, target,
35436 gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
35437 return target;
35438
35439 case IX86_BUILTIN_RDSEED16_STEP:
35440 icode = CODE_FOR_rdseedhi_1;
35441 mode0 = HImode;
35442 goto rdseed_step;
35443
35444 case IX86_BUILTIN_RDSEED32_STEP:
35445 icode = CODE_FOR_rdseedsi_1;
35446 mode0 = SImode;
35447 goto rdseed_step;
35448
35449 case IX86_BUILTIN_RDSEED64_STEP:
35450 icode = CODE_FOR_rdseeddi_1;
35451 mode0 = DImode;
35452
35453 rdseed_step:
35454 op0 = gen_reg_rtx (mode0);
35455 emit_insn (GEN_FCN (icode) (op0));
35456
35457 arg0 = CALL_EXPR_ARG (exp, 0);
35458 op1 = expand_normal (arg0);
35459 if (!address_operand (op1, VOIDmode))
35460 {
35461 op1 = convert_memory_address (Pmode, op1);
35462 op1 = copy_addr_to_reg (op1);
35463 }
35464 emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
35465
35466 op2 = gen_reg_rtx (QImode);
35467
35468 pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
35469 const0_rtx);
35470 emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
35471
35472 if (target == 0
35473 || !register_operand (target, SImode))
35474 target = gen_reg_rtx (SImode);
35475
35476 emit_insn (gen_zero_extendqisi2 (target, op2));
35477 return target;
35478
35479 case IX86_BUILTIN_ADDCARRYX32:
35480 icode = TARGET_ADX ? CODE_FOR_adcxsi3 : CODE_FOR_addsi3_carry;
35481 mode0 = SImode;
35482 goto addcarryx;
35483
35484 case IX86_BUILTIN_ADDCARRYX64:
35485 icode = TARGET_ADX ? CODE_FOR_adcxdi3 : CODE_FOR_adddi3_carry;
35486 mode0 = DImode;
35487
35488 addcarryx:
35489 arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in. */
35490 arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1. */
35491 arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2. */
35492 arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out. */
35493
35494 op0 = gen_reg_rtx (QImode);
35495
35496 /* Generate CF from input operand. */
35497 op1 = expand_normal (arg0);
35498 op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
35499 emit_insn (gen_addqi3_cc (op0, op1, constm1_rtx));
35500
35501 /* Gen ADCX instruction to compute X+Y+CF. */
35502 op2 = expand_normal (arg1);
35503 op3 = expand_normal (arg2);
35504
35505 if (!REG_P (op2))
35506 op2 = copy_to_mode_reg (mode0, op2);
35507 if (!REG_P (op3))
35508 op3 = copy_to_mode_reg (mode0, op3);
35509
35510 op0 = gen_reg_rtx (mode0);
35511
35512 op4 = gen_rtx_REG (CCCmode, FLAGS_REG);
35513 pat = gen_rtx_LTU (VOIDmode, op4, const0_rtx);
35514 emit_insn (GEN_FCN (icode) (op0, op2, op3, op4, pat));
35515
35516 /* Store the result. */
35517 op4 = expand_normal (arg3);
35518 if (!address_operand (op4, VOIDmode))
35519 {
35520 op4 = convert_memory_address (Pmode, op4);
35521 op4 = copy_addr_to_reg (op4);
35522 }
35523 emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
35524
35525 /* Return current CF value. */
35526 if (target == 0)
35527 target = gen_reg_rtx (QImode);
35528
35529 PUT_MODE (pat, QImode);
35530 emit_insn (gen_rtx_SET (VOIDmode, target, pat));
35531 return target;
35532
35533 case IX86_BUILTIN_READ_FLAGS:
35534 emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
35535
35536 if (optimize
35537 || target == NULL_RTX
35538 || !nonimmediate_operand (target, word_mode)
35539 || GET_MODE (target) != word_mode)
35540 target = gen_reg_rtx (word_mode);
35541
35542 emit_insn (gen_pop (target));
35543 return target;
35544
35545 case IX86_BUILTIN_WRITE_FLAGS:
35546
35547 arg0 = CALL_EXPR_ARG (exp, 0);
35548 op0 = expand_normal (arg0);
35549 if (!general_no_elim_operand (op0, word_mode))
35550 op0 = copy_to_mode_reg (word_mode, op0);
35551
35552 emit_insn (gen_push (op0));
35553 emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
35554 return 0;
35555
35556 case IX86_BUILTIN_KORTESTC16:
35557 icode = CODE_FOR_kortestchi;
35558 mode0 = HImode;
35559 mode1 = CCCmode;
35560 goto kortest;
35561
35562 case IX86_BUILTIN_KORTESTZ16:
35563 icode = CODE_FOR_kortestzhi;
35564 mode0 = HImode;
35565 mode1 = CCZmode;
35566
35567 kortest:
35568 arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1. */
35569 arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2. */
35570 op0 = expand_normal (arg0);
35571 op1 = expand_normal (arg1);
35572
35573 op0 = copy_to_reg (op0);
35574 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
35575 op1 = copy_to_reg (op1);
35576 op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
35577
35578 target = gen_reg_rtx (QImode);
35579 emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
35580
35581 /* Emit kortest. */
35582 emit_insn (GEN_FCN (icode) (op0, op1));
35583 /* And use setcc to return result from flags. */
35584 ix86_expand_setcc (target, EQ,
35585 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
35586 return target;
35587
35588 case IX86_BUILTIN_GATHERSIV2DF:
35589 icode = CODE_FOR_avx2_gathersiv2df;
35590 goto gather_gen;
35591 case IX86_BUILTIN_GATHERSIV4DF:
35592 icode = CODE_FOR_avx2_gathersiv4df;
35593 goto gather_gen;
35594 case IX86_BUILTIN_GATHERDIV2DF:
35595 icode = CODE_FOR_avx2_gatherdiv2df;
35596 goto gather_gen;
35597 case IX86_BUILTIN_GATHERDIV4DF:
35598 icode = CODE_FOR_avx2_gatherdiv4df;
35599 goto gather_gen;
35600 case IX86_BUILTIN_GATHERSIV4SF:
35601 icode = CODE_FOR_avx2_gathersiv4sf;
35602 goto gather_gen;
35603 case IX86_BUILTIN_GATHERSIV8SF:
35604 icode = CODE_FOR_avx2_gathersiv8sf;
35605 goto gather_gen;
35606 case IX86_BUILTIN_GATHERDIV4SF:
35607 icode = CODE_FOR_avx2_gatherdiv4sf;
35608 goto gather_gen;
35609 case IX86_BUILTIN_GATHERDIV8SF:
35610 icode = CODE_FOR_avx2_gatherdiv8sf;
35611 goto gather_gen;
35612 case IX86_BUILTIN_GATHERSIV2DI:
35613 icode = CODE_FOR_avx2_gathersiv2di;
35614 goto gather_gen;
35615 case IX86_BUILTIN_GATHERSIV4DI:
35616 icode = CODE_FOR_avx2_gathersiv4di;
35617 goto gather_gen;
35618 case IX86_BUILTIN_GATHERDIV2DI:
35619 icode = CODE_FOR_avx2_gatherdiv2di;
35620 goto gather_gen;
35621 case IX86_BUILTIN_GATHERDIV4DI:
35622 icode = CODE_FOR_avx2_gatherdiv4di;
35623 goto gather_gen;
35624 case IX86_BUILTIN_GATHERSIV4SI:
35625 icode = CODE_FOR_avx2_gathersiv4si;
35626 goto gather_gen;
35627 case IX86_BUILTIN_GATHERSIV8SI:
35628 icode = CODE_FOR_avx2_gathersiv8si;
35629 goto gather_gen;
35630 case IX86_BUILTIN_GATHERDIV4SI:
35631 icode = CODE_FOR_avx2_gatherdiv4si;
35632 goto gather_gen;
35633 case IX86_BUILTIN_GATHERDIV8SI:
35634 icode = CODE_FOR_avx2_gatherdiv8si;
35635 goto gather_gen;
35636 case IX86_BUILTIN_GATHERALTSIV4DF:
35637 icode = CODE_FOR_avx2_gathersiv4df;
35638 goto gather_gen;
35639 case IX86_BUILTIN_GATHERALTDIV8SF:
35640 icode = CODE_FOR_avx2_gatherdiv8sf;
35641 goto gather_gen;
35642 case IX86_BUILTIN_GATHERALTSIV4DI:
35643 icode = CODE_FOR_avx2_gathersiv4di;
35644 goto gather_gen;
35645 case IX86_BUILTIN_GATHERALTDIV8SI:
35646 icode = CODE_FOR_avx2_gatherdiv8si;
35647 goto gather_gen;
35648 case IX86_BUILTIN_GATHER3SIV16SF:
35649 icode = CODE_FOR_avx512f_gathersiv16sf;
35650 goto gather_gen;
35651 case IX86_BUILTIN_GATHER3SIV8DF:
35652 icode = CODE_FOR_avx512f_gathersiv8df;
35653 goto gather_gen;
35654 case IX86_BUILTIN_GATHER3DIV16SF:
35655 icode = CODE_FOR_avx512f_gatherdiv16sf;
35656 goto gather_gen;
35657 case IX86_BUILTIN_GATHER3DIV8DF:
35658 icode = CODE_FOR_avx512f_gatherdiv8df;
35659 goto gather_gen;
35660 case IX86_BUILTIN_GATHER3SIV16SI:
35661 icode = CODE_FOR_avx512f_gathersiv16si;
35662 goto gather_gen;
35663 case IX86_BUILTIN_GATHER3SIV8DI:
35664 icode = CODE_FOR_avx512f_gathersiv8di;
35665 goto gather_gen;
35666 case IX86_BUILTIN_GATHER3DIV16SI:
35667 icode = CODE_FOR_avx512f_gatherdiv16si;
35668 goto gather_gen;
35669 case IX86_BUILTIN_GATHER3DIV8DI:
35670 icode = CODE_FOR_avx512f_gatherdiv8di;
35671 goto gather_gen;
35672 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35673 icode = CODE_FOR_avx512f_gathersiv8df;
35674 goto gather_gen;
35675 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35676 icode = CODE_FOR_avx512f_gatherdiv16sf;
35677 goto gather_gen;
35678 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35679 icode = CODE_FOR_avx512f_gathersiv8di;
35680 goto gather_gen;
35681 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35682 icode = CODE_FOR_avx512f_gatherdiv16si;
35683 goto gather_gen;
35684 case IX86_BUILTIN_SCATTERSIV16SF:
35685 icode = CODE_FOR_avx512f_scattersiv16sf;
35686 goto scatter_gen;
35687 case IX86_BUILTIN_SCATTERSIV8DF:
35688 icode = CODE_FOR_avx512f_scattersiv8df;
35689 goto scatter_gen;
35690 case IX86_BUILTIN_SCATTERDIV16SF:
35691 icode = CODE_FOR_avx512f_scatterdiv16sf;
35692 goto scatter_gen;
35693 case IX86_BUILTIN_SCATTERDIV8DF:
35694 icode = CODE_FOR_avx512f_scatterdiv8df;
35695 goto scatter_gen;
35696 case IX86_BUILTIN_SCATTERSIV16SI:
35697 icode = CODE_FOR_avx512f_scattersiv16si;
35698 goto scatter_gen;
35699 case IX86_BUILTIN_SCATTERSIV8DI:
35700 icode = CODE_FOR_avx512f_scattersiv8di;
35701 goto scatter_gen;
35702 case IX86_BUILTIN_SCATTERDIV16SI:
35703 icode = CODE_FOR_avx512f_scatterdiv16si;
35704 goto scatter_gen;
35705 case IX86_BUILTIN_SCATTERDIV8DI:
35706 icode = CODE_FOR_avx512f_scatterdiv8di;
35707 goto scatter_gen;
35708
35709 case IX86_BUILTIN_GATHERPFDPD:
35710 icode = CODE_FOR_avx512pf_gatherpfv8sidf;
35711 goto vec_prefetch_gen;
35712 case IX86_BUILTIN_GATHERPFDPS:
35713 icode = CODE_FOR_avx512pf_gatherpfv16sisf;
35714 goto vec_prefetch_gen;
35715 case IX86_BUILTIN_GATHERPFQPD:
35716 icode = CODE_FOR_avx512pf_gatherpfv8didf;
35717 goto vec_prefetch_gen;
35718 case IX86_BUILTIN_GATHERPFQPS:
35719 icode = CODE_FOR_avx512pf_gatherpfv8disf;
35720 goto vec_prefetch_gen;
35721 case IX86_BUILTIN_SCATTERPFDPD:
35722 icode = CODE_FOR_avx512pf_scatterpfv8sidf;
35723 goto vec_prefetch_gen;
35724 case IX86_BUILTIN_SCATTERPFDPS:
35725 icode = CODE_FOR_avx512pf_scatterpfv16sisf;
35726 goto vec_prefetch_gen;
35727 case IX86_BUILTIN_SCATTERPFQPD:
35728 icode = CODE_FOR_avx512pf_scatterpfv8didf;
35729 goto vec_prefetch_gen;
35730 case IX86_BUILTIN_SCATTERPFQPS:
35731 icode = CODE_FOR_avx512pf_scatterpfv8disf;
35732 goto vec_prefetch_gen;
35733
35734 gather_gen:
35735 rtx half;
35736 rtx (*gen) (rtx, rtx);
35737
35738 arg0 = CALL_EXPR_ARG (exp, 0);
35739 arg1 = CALL_EXPR_ARG (exp, 1);
35740 arg2 = CALL_EXPR_ARG (exp, 2);
35741 arg3 = CALL_EXPR_ARG (exp, 3);
35742 arg4 = CALL_EXPR_ARG (exp, 4);
35743 op0 = expand_normal (arg0);
35744 op1 = expand_normal (arg1);
35745 op2 = expand_normal (arg2);
35746 op3 = expand_normal (arg3);
35747 op4 = expand_normal (arg4);
35748 /* Note the arg order is different from the operand order. */
35749 mode0 = insn_data[icode].operand[1].mode;
35750 mode2 = insn_data[icode].operand[3].mode;
35751 mode3 = insn_data[icode].operand[4].mode;
35752 mode4 = insn_data[icode].operand[5].mode;
35753
35754 if (target == NULL_RTX
35755 || GET_MODE (target) != insn_data[icode].operand[0].mode
35756 || !insn_data[icode].operand[0].predicate (target,
35757 GET_MODE (target)))
35758 subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
35759 else
35760 subtarget = target;
35761
35762 switch (fcode)
35763 {
35764 case IX86_BUILTIN_GATHER3ALTSIV8DF:
35765 case IX86_BUILTIN_GATHER3ALTSIV8DI:
35766 half = gen_reg_rtx (V8SImode);
35767 if (!nonimmediate_operand (op2, V16SImode))
35768 op2 = copy_to_mode_reg (V16SImode, op2);
35769 emit_insn (gen_vec_extract_lo_v16si (half, op2));
35770 op2 = half;
35771 break;
35772 case IX86_BUILTIN_GATHERALTSIV4DF:
35773 case IX86_BUILTIN_GATHERALTSIV4DI:
35774 half = gen_reg_rtx (V4SImode);
35775 if (!nonimmediate_operand (op2, V8SImode))
35776 op2 = copy_to_mode_reg (V8SImode, op2);
35777 emit_insn (gen_vec_extract_lo_v8si (half, op2));
35778 op2 = half;
35779 break;
35780 case IX86_BUILTIN_GATHER3ALTDIV16SF:
35781 case IX86_BUILTIN_GATHER3ALTDIV16SI:
35782 half = gen_reg_rtx (mode0);
35783 if (mode0 == V8SFmode)
35784 gen = gen_vec_extract_lo_v16sf;
35785 else
35786 gen = gen_vec_extract_lo_v16si;
35787 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35788 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35789 emit_insn (gen (half, op0));
35790 op0 = half;
35791 if (GET_MODE (op3) != VOIDmode)
35792 {
35793 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35794 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35795 emit_insn (gen (half, op3));
35796 op3 = half;
35797 }
35798 break;
35799 case IX86_BUILTIN_GATHERALTDIV8SF:
35800 case IX86_BUILTIN_GATHERALTDIV8SI:
35801 half = gen_reg_rtx (mode0);
35802 if (mode0 == V4SFmode)
35803 gen = gen_vec_extract_lo_v8sf;
35804 else
35805 gen = gen_vec_extract_lo_v8si;
35806 if (!nonimmediate_operand (op0, GET_MODE (op0)))
35807 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
35808 emit_insn (gen (half, op0));
35809 op0 = half;
35810 if (GET_MODE (op3) != VOIDmode)
35811 {
35812 if (!nonimmediate_operand (op3, GET_MODE (op3)))
35813 op3 = copy_to_mode_reg (GET_MODE (op3), op3);
35814 emit_insn (gen (half, op3));
35815 op3 = half;
35816 }
35817 break;
35818 default:
35819 break;
35820 }
35821
35822 /* Force memory operand only with base register here. But we
35823 don't want to do it on memory operand for other builtin
35824 functions. */
35825 op1 = ix86_zero_extend_to_Pmode (op1);
35826
35827 if (!insn_data[icode].operand[1].predicate (op0, mode0))
35828 op0 = copy_to_mode_reg (mode0, op0);
35829 if (!insn_data[icode].operand[2].predicate (op1, Pmode))
35830 op1 = copy_to_mode_reg (Pmode, op1);
35831 if (!insn_data[icode].operand[3].predicate (op2, mode2))
35832 op2 = copy_to_mode_reg (mode2, op2);
35833 if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
35834 {
35835 if (!insn_data[icode].operand[4].predicate (op3, mode3))
35836 op3 = copy_to_mode_reg (mode3, op3);
35837 }
35838 else
35839 {
35840 op3 = copy_to_reg (op3);
35841 op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
35842 }
35843 if (!insn_data[icode].operand[5].predicate (op4, mode4))
35844 {
35845 error ("the last argument must be scale 1, 2, 4, 8");
35846 return const0_rtx;
35847 }
35848
35849 /* Optimize. If mask is known to have all high bits set,
35850 replace op0 with pc_rtx to signal that the instruction
35851 overwrites the whole destination and doesn't use its
35852 previous contents. */
35853 if (optimize)
35854 {
35855 if (TREE_CODE (arg3) == INTEGER_CST)
35856 {
35857 if (integer_all_onesp (arg3))
35858 op0 = pc_rtx;
35859 }
35860 else if (TREE_CODE (arg3) == VECTOR_CST)
35861 {
35862 unsigned int negative = 0;
35863 for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
35864 {
35865 tree cst = VECTOR_CST_ELT (arg3, i);
35866 if (TREE_CODE (cst) == INTEGER_CST
35867 && tree_int_cst_sign_bit (cst))
35868 negative++;
35869 else if (TREE_CODE (cst) == REAL_CST
35870 && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
35871 negative++;
35872 }
35873 if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
35874 op0 = pc_rtx;
35875 }
35876 else if (TREE_CODE (arg3) == SSA_NAME
35877 && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
35878 {
35879 /* Recognize also when mask is like:
35880 __v2df src = _mm_setzero_pd ();
35881 __v2df mask = _mm_cmpeq_pd (src, src);
35882 or
35883 __v8sf src = _mm256_setzero_ps ();
35884 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
35885 as that is a cheaper way to load all ones into
35886 a register than having to load a constant from
35887 memory. */
35888 gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
35889 if (is_gimple_call (def_stmt))
35890 {
35891 tree fndecl = gimple_call_fndecl (def_stmt);
35892 if (fndecl
35893 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35894 switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
35895 {
35896 case IX86_BUILTIN_CMPPD:
35897 case IX86_BUILTIN_CMPPS:
35898 case IX86_BUILTIN_CMPPD256:
35899 case IX86_BUILTIN_CMPPS256:
35900 if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
35901 break;
35902 /* FALLTHRU */
35903 case IX86_BUILTIN_CMPEQPD:
35904 case IX86_BUILTIN_CMPEQPS:
35905 if (initializer_zerop (gimple_call_arg (def_stmt, 0))
35906 && initializer_zerop (gimple_call_arg (def_stmt,
35907 1)))
35908 op0 = pc_rtx;
35909 break;
35910 default:
35911 break;
35912 }
35913 }
35914 }
35915 }
35916
35917 pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
35918 if (! pat)
35919 return const0_rtx;
35920 emit_insn (pat);
35921
35922 switch (fcode)
35923 {
35924 case IX86_BUILTIN_GATHER3DIV16SF:
35925 if (target == NULL_RTX)
35926 target = gen_reg_rtx (V8SFmode);
35927 emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
35928 break;
35929 case IX86_BUILTIN_GATHER3DIV16SI:
35930 if (target == NULL_RTX)
35931 target = gen_reg_rtx (V8SImode);
35932 emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
35933 break;
35934 case IX86_BUILTIN_GATHERDIV8SF:
35935 if (target == NULL_RTX)
35936 target = gen_reg_rtx (V4SFmode);
35937 emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
35938 break;
35939 case IX86_BUILTIN_GATHERDIV8SI:
35940 if (target == NULL_RTX)
35941 target = gen_reg_rtx (V4SImode);
35942 emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
35943 break;
35944 default:
35945 target = subtarget;
35946 break;
35947 }
35948 return target;
35949
35950 scatter_gen:
35951 arg0 = CALL_EXPR_ARG (exp, 0);
35952 arg1 = CALL_EXPR_ARG (exp, 1);
35953 arg2 = CALL_EXPR_ARG (exp, 2);
35954 arg3 = CALL_EXPR_ARG (exp, 3);
35955 arg4 = CALL_EXPR_ARG (exp, 4);
35956 op0 = expand_normal (arg0);
35957 op1 = expand_normal (arg1);
35958 op2 = expand_normal (arg2);
35959 op3 = expand_normal (arg3);
35960 op4 = expand_normal (arg4);
35961 mode1 = insn_data[icode].operand[1].mode;
35962 mode2 = insn_data[icode].operand[2].mode;
35963 mode3 = insn_data[icode].operand[3].mode;
35964 mode4 = insn_data[icode].operand[4].mode;
35965
35966 /* Force memory operand only with base register here. But we
35967 don't want to do it on memory operand for other builtin
35968 functions. */
35969 op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
35970
35971 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
35972 op0 = copy_to_mode_reg (Pmode, op0);
35973
35974 if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
35975 {
35976 if (!insn_data[icode].operand[1].predicate (op1, mode1))
35977 op1 = copy_to_mode_reg (mode1, op1);
35978 }
35979 else
35980 {
35981 op1 = copy_to_reg (op1);
35982 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
35983 }
35984
35985 if (!insn_data[icode].operand[2].predicate (op2, mode2))
35986 op2 = copy_to_mode_reg (mode2, op2);
35987
35988 if (!insn_data[icode].operand[3].predicate (op3, mode3))
35989 op3 = copy_to_mode_reg (mode3, op3);
35990
35991 if (!insn_data[icode].operand[4].predicate (op4, mode4))
35992 {
35993 error ("the last argument must be scale 1, 2, 4, 8");
35994 return const0_rtx;
35995 }
35996
35997 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
35998 if (! pat)
35999 return const0_rtx;
36000
36001 emit_insn (pat);
36002 return 0;
36003
36004 vec_prefetch_gen:
36005 arg0 = CALL_EXPR_ARG (exp, 0);
36006 arg1 = CALL_EXPR_ARG (exp, 1);
36007 arg2 = CALL_EXPR_ARG (exp, 2);
36008 arg3 = CALL_EXPR_ARG (exp, 3);
36009 arg4 = CALL_EXPR_ARG (exp, 4);
36010 op0 = expand_normal (arg0);
36011 op1 = expand_normal (arg1);
36012 op2 = expand_normal (arg2);
36013 op3 = expand_normal (arg3);
36014 op4 = expand_normal (arg4);
36015 mode0 = insn_data[icode].operand[0].mode;
36016 mode1 = insn_data[icode].operand[1].mode;
36017 mode3 = insn_data[icode].operand[3].mode;
36018 mode4 = insn_data[icode].operand[4].mode;
36019
36020 if (GET_MODE (op0) == mode0
36021 || (GET_MODE (op0) == VOIDmode && op0 != constm1_rtx))
36022 {
36023 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36024 op0 = copy_to_mode_reg (mode0, op0);
36025 }
36026 else if (op0 != constm1_rtx)
36027 {
36028 op0 = copy_to_reg (op0);
36029 op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
36030 }
36031
36032 if (!insn_data[icode].operand[1].predicate (op1, mode1))
36033 op1 = copy_to_mode_reg (mode1, op1);
36034
36035 /* Force memory operand only with base register here. But we
36036 don't want to do it on memory operand for other builtin
36037 functions. */
36038 op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
36039
36040 if (!insn_data[icode].operand[2].predicate (op2, Pmode))
36041 op2 = copy_to_mode_reg (Pmode, op2);
36042
36043 if (!insn_data[icode].operand[3].predicate (op3, mode3))
36044 {
36045 error ("the forth argument must be scale 1, 2, 4, 8");
36046 return const0_rtx;
36047 }
36048
36049 if (!insn_data[icode].operand[4].predicate (op4, mode4))
36050 {
36051 error ("incorrect hint operand");
36052 return const0_rtx;
36053 }
36054
36055 pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
36056 if (! pat)
36057 return const0_rtx;
36058
36059 emit_insn (pat);
36060
36061 return 0;
36062
36063 case IX86_BUILTIN_XABORT:
36064 icode = CODE_FOR_xabort;
36065 arg0 = CALL_EXPR_ARG (exp, 0);
36066 op0 = expand_normal (arg0);
36067 mode0 = insn_data[icode].operand[0].mode;
36068 if (!insn_data[icode].operand[0].predicate (op0, mode0))
36069 {
36070 error ("the xabort's argument must be an 8-bit immediate");
36071 return const0_rtx;
36072 }
36073 emit_insn (gen_xabort (op0));
36074 return 0;
36075
36076 default:
36077 break;
36078 }
36079
36080 for (i = 0, d = bdesc_special_args;
36081 i < ARRAY_SIZE (bdesc_special_args);
36082 i++, d++)
36083 if (d->code == fcode)
36084 return ix86_expand_special_args_builtin (d, exp, target);
36085
36086 for (i = 0, d = bdesc_args;
36087 i < ARRAY_SIZE (bdesc_args);
36088 i++, d++)
36089 if (d->code == fcode)
36090 switch (fcode)
36091 {
36092 case IX86_BUILTIN_FABSQ:
36093 case IX86_BUILTIN_COPYSIGNQ:
36094 if (!TARGET_SSE)
36095 /* Emit a normal call if SSE isn't available. */
36096 return expand_call (exp, target, ignore);
36097 default:
36098 return ix86_expand_args_builtin (d, exp, target);
36099 }
36100
36101 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
36102 if (d->code == fcode)
36103 return ix86_expand_sse_comi (d, exp, target);
36104
36105 for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
36106 if (d->code == fcode)
36107 return ix86_expand_round_builtin (d, exp, target);
36108
36109 for (i = 0, d = bdesc_pcmpestr;
36110 i < ARRAY_SIZE (bdesc_pcmpestr);
36111 i++, d++)
36112 if (d->code == fcode)
36113 return ix86_expand_sse_pcmpestr (d, exp, target);
36114
36115 for (i = 0, d = bdesc_pcmpistr;
36116 i < ARRAY_SIZE (bdesc_pcmpistr);
36117 i++, d++)
36118 if (d->code == fcode)
36119 return ix86_expand_sse_pcmpistr (d, exp, target);
36120
36121 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
36122 if (d->code == fcode)
36123 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
36124 (enum ix86_builtin_func_type)
36125 d->flag, d->comparison);
36126
36127 gcc_unreachable ();
36128 }
36129
36130 /* This returns the target-specific builtin with code CODE if
36131 current_function_decl has visibility on this builtin, which is checked
36132 using isa flags. Returns NULL_TREE otherwise. */
36133
36134 static tree ix86_get_builtin (enum ix86_builtins code)
36135 {
36136 struct cl_target_option *opts;
36137 tree target_tree = NULL_TREE;
36138
36139 /* Determine the isa flags of current_function_decl. */
36140
36141 if (current_function_decl)
36142 target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
36143
36144 if (target_tree == NULL)
36145 target_tree = target_option_default_node;
36146
36147 opts = TREE_TARGET_OPTION (target_tree);
36148
36149 if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
36150 return ix86_builtin_decl (code, true);
36151 else
36152 return NULL_TREE;
36153 }
36154
36155 /* Returns a function decl for a vectorized version of the builtin function
36156 with builtin function code FN and the result vector type TYPE, or NULL_TREE
36157 if it is not available. */
36158
36159 static tree
36160 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
36161 tree type_in)
36162 {
36163 enum machine_mode in_mode, out_mode;
36164 int in_n, out_n;
36165 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
36166
36167 if (TREE_CODE (type_out) != VECTOR_TYPE
36168 || TREE_CODE (type_in) != VECTOR_TYPE
36169 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
36170 return NULL_TREE;
36171
36172 out_mode = TYPE_MODE (TREE_TYPE (type_out));
36173 out_n = TYPE_VECTOR_SUBPARTS (type_out);
36174 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36175 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36176
36177 switch (fn)
36178 {
36179 case BUILT_IN_SQRT:
36180 if (out_mode == DFmode && in_mode == DFmode)
36181 {
36182 if (out_n == 2 && in_n == 2)
36183 return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
36184 else if (out_n == 4 && in_n == 4)
36185 return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
36186 else if (out_n == 8 && in_n == 8)
36187 return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
36188 }
36189 break;
36190
36191 case BUILT_IN_EXP2F:
36192 if (out_mode == SFmode && in_mode == SFmode)
36193 {
36194 if (out_n == 16 && in_n == 16)
36195 return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
36196 }
36197 break;
36198
36199 case BUILT_IN_SQRTF:
36200 if (out_mode == SFmode && in_mode == SFmode)
36201 {
36202 if (out_n == 4 && in_n == 4)
36203 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
36204 else if (out_n == 8 && in_n == 8)
36205 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
36206 else if (out_n == 16 && in_n == 16)
36207 return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
36208 }
36209 break;
36210
36211 case BUILT_IN_IFLOOR:
36212 case BUILT_IN_LFLOOR:
36213 case BUILT_IN_LLFLOOR:
36214 /* The round insn does not trap on denormals. */
36215 if (flag_trapping_math || !TARGET_ROUND)
36216 break;
36217
36218 if (out_mode == SImode && in_mode == DFmode)
36219 {
36220 if (out_n == 4 && in_n == 2)
36221 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
36222 else if (out_n == 8 && in_n == 4)
36223 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
36224 else if (out_n == 16 && in_n == 8)
36225 return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
36226 }
36227 break;
36228
36229 case BUILT_IN_IFLOORF:
36230 case BUILT_IN_LFLOORF:
36231 case BUILT_IN_LLFLOORF:
36232 /* The round insn does not trap on denormals. */
36233 if (flag_trapping_math || !TARGET_ROUND)
36234 break;
36235
36236 if (out_mode == SImode && in_mode == SFmode)
36237 {
36238 if (out_n == 4 && in_n == 4)
36239 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
36240 else if (out_n == 8 && in_n == 8)
36241 return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
36242 }
36243 break;
36244
36245 case BUILT_IN_ICEIL:
36246 case BUILT_IN_LCEIL:
36247 case BUILT_IN_LLCEIL:
36248 /* The round insn does not trap on denormals. */
36249 if (flag_trapping_math || !TARGET_ROUND)
36250 break;
36251
36252 if (out_mode == SImode && in_mode == DFmode)
36253 {
36254 if (out_n == 4 && in_n == 2)
36255 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
36256 else if (out_n == 8 && in_n == 4)
36257 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
36258 else if (out_n == 16 && in_n == 8)
36259 return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
36260 }
36261 break;
36262
36263 case BUILT_IN_ICEILF:
36264 case BUILT_IN_LCEILF:
36265 case BUILT_IN_LLCEILF:
36266 /* The round insn does not trap on denormals. */
36267 if (flag_trapping_math || !TARGET_ROUND)
36268 break;
36269
36270 if (out_mode == SImode && in_mode == SFmode)
36271 {
36272 if (out_n == 4 && in_n == 4)
36273 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
36274 else if (out_n == 8 && in_n == 8)
36275 return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
36276 }
36277 break;
36278
36279 case BUILT_IN_IRINT:
36280 case BUILT_IN_LRINT:
36281 case BUILT_IN_LLRINT:
36282 if (out_mode == SImode && in_mode == DFmode)
36283 {
36284 if (out_n == 4 && in_n == 2)
36285 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
36286 else if (out_n == 8 && in_n == 4)
36287 return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
36288 }
36289 break;
36290
36291 case BUILT_IN_IRINTF:
36292 case BUILT_IN_LRINTF:
36293 case BUILT_IN_LLRINTF:
36294 if (out_mode == SImode && in_mode == SFmode)
36295 {
36296 if (out_n == 4 && in_n == 4)
36297 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
36298 else if (out_n == 8 && in_n == 8)
36299 return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
36300 }
36301 break;
36302
36303 case BUILT_IN_IROUND:
36304 case BUILT_IN_LROUND:
36305 case BUILT_IN_LLROUND:
36306 /* The round insn does not trap on denormals. */
36307 if (flag_trapping_math || !TARGET_ROUND)
36308 break;
36309
36310 if (out_mode == SImode && in_mode == DFmode)
36311 {
36312 if (out_n == 4 && in_n == 2)
36313 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
36314 else if (out_n == 8 && in_n == 4)
36315 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
36316 else if (out_n == 16 && in_n == 8)
36317 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
36318 }
36319 break;
36320
36321 case BUILT_IN_IROUNDF:
36322 case BUILT_IN_LROUNDF:
36323 case BUILT_IN_LLROUNDF:
36324 /* The round insn does not trap on denormals. */
36325 if (flag_trapping_math || !TARGET_ROUND)
36326 break;
36327
36328 if (out_mode == SImode && in_mode == SFmode)
36329 {
36330 if (out_n == 4 && in_n == 4)
36331 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
36332 else if (out_n == 8 && in_n == 8)
36333 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
36334 }
36335 break;
36336
36337 case BUILT_IN_COPYSIGN:
36338 if (out_mode == DFmode && in_mode == DFmode)
36339 {
36340 if (out_n == 2 && in_n == 2)
36341 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
36342 else if (out_n == 4 && in_n == 4)
36343 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
36344 else if (out_n == 8 && in_n == 8)
36345 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
36346 }
36347 break;
36348
36349 case BUILT_IN_COPYSIGNF:
36350 if (out_mode == SFmode && in_mode == SFmode)
36351 {
36352 if (out_n == 4 && in_n == 4)
36353 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
36354 else if (out_n == 8 && in_n == 8)
36355 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
36356 else if (out_n == 16 && in_n == 16)
36357 return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
36358 }
36359 break;
36360
36361 case BUILT_IN_FLOOR:
36362 /* The round insn does not trap on denormals. */
36363 if (flag_trapping_math || !TARGET_ROUND)
36364 break;
36365
36366 if (out_mode == DFmode && in_mode == DFmode)
36367 {
36368 if (out_n == 2 && in_n == 2)
36369 return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
36370 else if (out_n == 4 && in_n == 4)
36371 return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
36372 }
36373 break;
36374
36375 case BUILT_IN_FLOORF:
36376 /* The round insn does not trap on denormals. */
36377 if (flag_trapping_math || !TARGET_ROUND)
36378 break;
36379
36380 if (out_mode == SFmode && in_mode == SFmode)
36381 {
36382 if (out_n == 4 && in_n == 4)
36383 return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
36384 else if (out_n == 8 && in_n == 8)
36385 return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
36386 }
36387 break;
36388
36389 case BUILT_IN_CEIL:
36390 /* The round insn does not trap on denormals. */
36391 if (flag_trapping_math || !TARGET_ROUND)
36392 break;
36393
36394 if (out_mode == DFmode && in_mode == DFmode)
36395 {
36396 if (out_n == 2 && in_n == 2)
36397 return ix86_get_builtin (IX86_BUILTIN_CEILPD);
36398 else if (out_n == 4 && in_n == 4)
36399 return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
36400 }
36401 break;
36402
36403 case BUILT_IN_CEILF:
36404 /* The round insn does not trap on denormals. */
36405 if (flag_trapping_math || !TARGET_ROUND)
36406 break;
36407
36408 if (out_mode == SFmode && in_mode == SFmode)
36409 {
36410 if (out_n == 4 && in_n == 4)
36411 return ix86_get_builtin (IX86_BUILTIN_CEILPS);
36412 else if (out_n == 8 && in_n == 8)
36413 return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
36414 }
36415 break;
36416
36417 case BUILT_IN_TRUNC:
36418 /* The round insn does not trap on denormals. */
36419 if (flag_trapping_math || !TARGET_ROUND)
36420 break;
36421
36422 if (out_mode == DFmode && in_mode == DFmode)
36423 {
36424 if (out_n == 2 && in_n == 2)
36425 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
36426 else if (out_n == 4 && in_n == 4)
36427 return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
36428 }
36429 break;
36430
36431 case BUILT_IN_TRUNCF:
36432 /* The round insn does not trap on denormals. */
36433 if (flag_trapping_math || !TARGET_ROUND)
36434 break;
36435
36436 if (out_mode == SFmode && in_mode == SFmode)
36437 {
36438 if (out_n == 4 && in_n == 4)
36439 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
36440 else if (out_n == 8 && in_n == 8)
36441 return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
36442 }
36443 break;
36444
36445 case BUILT_IN_RINT:
36446 /* The round insn does not trap on denormals. */
36447 if (flag_trapping_math || !TARGET_ROUND)
36448 break;
36449
36450 if (out_mode == DFmode && in_mode == DFmode)
36451 {
36452 if (out_n == 2 && in_n == 2)
36453 return ix86_get_builtin (IX86_BUILTIN_RINTPD);
36454 else if (out_n == 4 && in_n == 4)
36455 return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
36456 }
36457 break;
36458
36459 case BUILT_IN_RINTF:
36460 /* The round insn does not trap on denormals. */
36461 if (flag_trapping_math || !TARGET_ROUND)
36462 break;
36463
36464 if (out_mode == SFmode && in_mode == SFmode)
36465 {
36466 if (out_n == 4 && in_n == 4)
36467 return ix86_get_builtin (IX86_BUILTIN_RINTPS);
36468 else if (out_n == 8 && in_n == 8)
36469 return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
36470 }
36471 break;
36472
36473 case BUILT_IN_ROUND:
36474 /* The round insn does not trap on denormals. */
36475 if (flag_trapping_math || !TARGET_ROUND)
36476 break;
36477
36478 if (out_mode == DFmode && in_mode == DFmode)
36479 {
36480 if (out_n == 2 && in_n == 2)
36481 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
36482 else if (out_n == 4 && in_n == 4)
36483 return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
36484 }
36485 break;
36486
36487 case BUILT_IN_ROUNDF:
36488 /* The round insn does not trap on denormals. */
36489 if (flag_trapping_math || !TARGET_ROUND)
36490 break;
36491
36492 if (out_mode == SFmode && in_mode == SFmode)
36493 {
36494 if (out_n == 4 && in_n == 4)
36495 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
36496 else if (out_n == 8 && in_n == 8)
36497 return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
36498 }
36499 break;
36500
36501 case BUILT_IN_FMA:
36502 if (out_mode == DFmode && in_mode == DFmode)
36503 {
36504 if (out_n == 2 && in_n == 2)
36505 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
36506 if (out_n == 4 && in_n == 4)
36507 return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
36508 }
36509 break;
36510
36511 case BUILT_IN_FMAF:
36512 if (out_mode == SFmode && in_mode == SFmode)
36513 {
36514 if (out_n == 4 && in_n == 4)
36515 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
36516 if (out_n == 8 && in_n == 8)
36517 return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
36518 }
36519 break;
36520
36521 default:
36522 break;
36523 }
36524
36525 /* Dispatch to a handler for a vectorization library. */
36526 if (ix86_veclib_handler)
36527 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
36528 type_in);
36529
36530 return NULL_TREE;
36531 }
36532
36533 /* Handler for an SVML-style interface to
36534 a library with vectorized intrinsics. */
36535
36536 static tree
36537 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
36538 {
36539 char name[20];
36540 tree fntype, new_fndecl, args;
36541 unsigned arity;
36542 const char *bname;
36543 enum machine_mode el_mode, in_mode;
36544 int n, in_n;
36545
36546 /* The SVML is suitable for unsafe math only. */
36547 if (!flag_unsafe_math_optimizations)
36548 return NULL_TREE;
36549
36550 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36551 n = TYPE_VECTOR_SUBPARTS (type_out);
36552 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36553 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36554 if (el_mode != in_mode
36555 || n != in_n)
36556 return NULL_TREE;
36557
36558 switch (fn)
36559 {
36560 case BUILT_IN_EXP:
36561 case BUILT_IN_LOG:
36562 case BUILT_IN_LOG10:
36563 case BUILT_IN_POW:
36564 case BUILT_IN_TANH:
36565 case BUILT_IN_TAN:
36566 case BUILT_IN_ATAN:
36567 case BUILT_IN_ATAN2:
36568 case BUILT_IN_ATANH:
36569 case BUILT_IN_CBRT:
36570 case BUILT_IN_SINH:
36571 case BUILT_IN_SIN:
36572 case BUILT_IN_ASINH:
36573 case BUILT_IN_ASIN:
36574 case BUILT_IN_COSH:
36575 case BUILT_IN_COS:
36576 case BUILT_IN_ACOSH:
36577 case BUILT_IN_ACOS:
36578 if (el_mode != DFmode || n != 2)
36579 return NULL_TREE;
36580 break;
36581
36582 case BUILT_IN_EXPF:
36583 case BUILT_IN_LOGF:
36584 case BUILT_IN_LOG10F:
36585 case BUILT_IN_POWF:
36586 case BUILT_IN_TANHF:
36587 case BUILT_IN_TANF:
36588 case BUILT_IN_ATANF:
36589 case BUILT_IN_ATAN2F:
36590 case BUILT_IN_ATANHF:
36591 case BUILT_IN_CBRTF:
36592 case BUILT_IN_SINHF:
36593 case BUILT_IN_SINF:
36594 case BUILT_IN_ASINHF:
36595 case BUILT_IN_ASINF:
36596 case BUILT_IN_COSHF:
36597 case BUILT_IN_COSF:
36598 case BUILT_IN_ACOSHF:
36599 case BUILT_IN_ACOSF:
36600 if (el_mode != SFmode || n != 4)
36601 return NULL_TREE;
36602 break;
36603
36604 default:
36605 return NULL_TREE;
36606 }
36607
36608 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36609
36610 if (fn == BUILT_IN_LOGF)
36611 strcpy (name, "vmlsLn4");
36612 else if (fn == BUILT_IN_LOG)
36613 strcpy (name, "vmldLn2");
36614 else if (n == 4)
36615 {
36616 sprintf (name, "vmls%s", bname+10);
36617 name[strlen (name)-1] = '4';
36618 }
36619 else
36620 sprintf (name, "vmld%s2", bname+10);
36621
36622 /* Convert to uppercase. */
36623 name[4] &= ~0x20;
36624
36625 arity = 0;
36626 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36627 args;
36628 args = TREE_CHAIN (args))
36629 arity++;
36630
36631 if (arity == 1)
36632 fntype = build_function_type_list (type_out, type_in, NULL);
36633 else
36634 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36635
36636 /* Build a function declaration for the vectorized function. */
36637 new_fndecl = build_decl (BUILTINS_LOCATION,
36638 FUNCTION_DECL, get_identifier (name), fntype);
36639 TREE_PUBLIC (new_fndecl) = 1;
36640 DECL_EXTERNAL (new_fndecl) = 1;
36641 DECL_IS_NOVOPS (new_fndecl) = 1;
36642 TREE_READONLY (new_fndecl) = 1;
36643
36644 return new_fndecl;
36645 }
36646
36647 /* Handler for an ACML-style interface to
36648 a library with vectorized intrinsics. */
36649
36650 static tree
36651 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
36652 {
36653 char name[20] = "__vr.._";
36654 tree fntype, new_fndecl, args;
36655 unsigned arity;
36656 const char *bname;
36657 enum machine_mode el_mode, in_mode;
36658 int n, in_n;
36659
36660 /* The ACML is 64bits only and suitable for unsafe math only as
36661 it does not correctly support parts of IEEE with the required
36662 precision such as denormals. */
36663 if (!TARGET_64BIT
36664 || !flag_unsafe_math_optimizations)
36665 return NULL_TREE;
36666
36667 el_mode = TYPE_MODE (TREE_TYPE (type_out));
36668 n = TYPE_VECTOR_SUBPARTS (type_out);
36669 in_mode = TYPE_MODE (TREE_TYPE (type_in));
36670 in_n = TYPE_VECTOR_SUBPARTS (type_in);
36671 if (el_mode != in_mode
36672 || n != in_n)
36673 return NULL_TREE;
36674
36675 switch (fn)
36676 {
36677 case BUILT_IN_SIN:
36678 case BUILT_IN_COS:
36679 case BUILT_IN_EXP:
36680 case BUILT_IN_LOG:
36681 case BUILT_IN_LOG2:
36682 case BUILT_IN_LOG10:
36683 name[4] = 'd';
36684 name[5] = '2';
36685 if (el_mode != DFmode
36686 || n != 2)
36687 return NULL_TREE;
36688 break;
36689
36690 case BUILT_IN_SINF:
36691 case BUILT_IN_COSF:
36692 case BUILT_IN_EXPF:
36693 case BUILT_IN_POWF:
36694 case BUILT_IN_LOGF:
36695 case BUILT_IN_LOG2F:
36696 case BUILT_IN_LOG10F:
36697 name[4] = 's';
36698 name[5] = '4';
36699 if (el_mode != SFmode
36700 || n != 4)
36701 return NULL_TREE;
36702 break;
36703
36704 default:
36705 return NULL_TREE;
36706 }
36707
36708 bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
36709 sprintf (name + 7, "%s", bname+10);
36710
36711 arity = 0;
36712 for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
36713 args;
36714 args = TREE_CHAIN (args))
36715 arity++;
36716
36717 if (arity == 1)
36718 fntype = build_function_type_list (type_out, type_in, NULL);
36719 else
36720 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
36721
36722 /* Build a function declaration for the vectorized function. */
36723 new_fndecl = build_decl (BUILTINS_LOCATION,
36724 FUNCTION_DECL, get_identifier (name), fntype);
36725 TREE_PUBLIC (new_fndecl) = 1;
36726 DECL_EXTERNAL (new_fndecl) = 1;
36727 DECL_IS_NOVOPS (new_fndecl) = 1;
36728 TREE_READONLY (new_fndecl) = 1;
36729
36730 return new_fndecl;
36731 }
36732
36733 /* Returns a decl of a function that implements gather load with
36734 memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
36735 Return NULL_TREE if it is not available. */
36736
36737 static tree
36738 ix86_vectorize_builtin_gather (const_tree mem_vectype,
36739 const_tree index_type, int scale)
36740 {
36741 bool si;
36742 enum ix86_builtins code;
36743
36744 if (! TARGET_AVX2)
36745 return NULL_TREE;
36746
36747 if ((TREE_CODE (index_type) != INTEGER_TYPE
36748 && !POINTER_TYPE_P (index_type))
36749 || (TYPE_MODE (index_type) != SImode
36750 && TYPE_MODE (index_type) != DImode))
36751 return NULL_TREE;
36752
36753 if (TYPE_PRECISION (index_type) > POINTER_SIZE)
36754 return NULL_TREE;
36755
36756 /* v*gather* insn sign extends index to pointer mode. */
36757 if (TYPE_PRECISION (index_type) < POINTER_SIZE
36758 && TYPE_UNSIGNED (index_type))
36759 return NULL_TREE;
36760
36761 if (scale <= 0
36762 || scale > 8
36763 || (scale & (scale - 1)) != 0)
36764 return NULL_TREE;
36765
36766 si = TYPE_MODE (index_type) == SImode;
36767 switch (TYPE_MODE (mem_vectype))
36768 {
36769 case V2DFmode:
36770 code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
36771 break;
36772 case V4DFmode:
36773 code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
36774 break;
36775 case V2DImode:
36776 code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
36777 break;
36778 case V4DImode:
36779 code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
36780 break;
36781 case V4SFmode:
36782 code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
36783 break;
36784 case V8SFmode:
36785 code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
36786 break;
36787 case V4SImode:
36788 code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
36789 break;
36790 case V8SImode:
36791 code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
36792 break;
36793 case V8DFmode:
36794 if (TARGET_AVX512F)
36795 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
36796 else
36797 return NULL_TREE;
36798 break;
36799 case V8DImode:
36800 if (TARGET_AVX512F)
36801 code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
36802 else
36803 return NULL_TREE;
36804 break;
36805 case V16SFmode:
36806 if (TARGET_AVX512F)
36807 code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
36808 else
36809 return NULL_TREE;
36810 break;
36811 case V16SImode:
36812 if (TARGET_AVX512F)
36813 code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
36814 else
36815 return NULL_TREE;
36816 break;
36817 default:
36818 return NULL_TREE;
36819 }
36820
36821 return ix86_get_builtin (code);
36822 }
36823
36824 /* Returns a code for a target-specific builtin that implements
36825 reciprocal of the function, or NULL_TREE if not available. */
36826
36827 static tree
36828 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
36829 bool sqrt ATTRIBUTE_UNUSED)
36830 {
36831 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
36832 && flag_finite_math_only && !flag_trapping_math
36833 && flag_unsafe_math_optimizations))
36834 return NULL_TREE;
36835
36836 if (md_fn)
36837 /* Machine dependent builtins. */
36838 switch (fn)
36839 {
36840 /* Vectorized version of sqrt to rsqrt conversion. */
36841 case IX86_BUILTIN_SQRTPS_NR:
36842 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
36843
36844 case IX86_BUILTIN_SQRTPS_NR256:
36845 return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
36846
36847 default:
36848 return NULL_TREE;
36849 }
36850 else
36851 /* Normal builtins. */
36852 switch (fn)
36853 {
36854 /* Sqrt to rsqrt conversion. */
36855 case BUILT_IN_SQRTF:
36856 return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
36857
36858 default:
36859 return NULL_TREE;
36860 }
36861 }
36862 \f
36863 /* Helper for avx_vpermilps256_operand et al. This is also used by
36864 the expansion functions to turn the parallel back into a mask.
36865 The return value is 0 for no match and the imm8+1 for a match. */
36866
36867 int
36868 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
36869 {
36870 unsigned i, nelt = GET_MODE_NUNITS (mode);
36871 unsigned mask = 0;
36872 unsigned char ipar[16] = {}; /* Silence -Wuninitialized warning. */
36873
36874 if (XVECLEN (par, 0) != (int) nelt)
36875 return 0;
36876
36877 /* Validate that all of the elements are constants, and not totally
36878 out of range. Copy the data into an integral array to make the
36879 subsequent checks easier. */
36880 for (i = 0; i < nelt; ++i)
36881 {
36882 rtx er = XVECEXP (par, 0, i);
36883 unsigned HOST_WIDE_INT ei;
36884
36885 if (!CONST_INT_P (er))
36886 return 0;
36887 ei = INTVAL (er);
36888 if (ei >= nelt)
36889 return 0;
36890 ipar[i] = ei;
36891 }
36892
36893 switch (mode)
36894 {
36895 case V8DFmode:
36896 /* In the 512-bit DFmode case, we can only move elements within
36897 a 128-bit lane. First fill the second part of the mask,
36898 then fallthru. */
36899 for (i = 4; i < 6; ++i)
36900 {
36901 if (ipar[i] < 4 || ipar[i] >= 6)
36902 return 0;
36903 mask |= (ipar[i] - 4) << i;
36904 }
36905 for (i = 6; i < 8; ++i)
36906 {
36907 if (ipar[i] < 6)
36908 return 0;
36909 mask |= (ipar[i] - 6) << i;
36910 }
36911 /* FALLTHRU */
36912
36913 case V4DFmode:
36914 /* In the 256-bit DFmode case, we can only move elements within
36915 a 128-bit lane. */
36916 for (i = 0; i < 2; ++i)
36917 {
36918 if (ipar[i] >= 2)
36919 return 0;
36920 mask |= ipar[i] << i;
36921 }
36922 for (i = 2; i < 4; ++i)
36923 {
36924 if (ipar[i] < 2)
36925 return 0;
36926 mask |= (ipar[i] - 2) << i;
36927 }
36928 break;
36929
36930 case V16SFmode:
36931 /* In 512 bit SFmode case, permutation in the upper 256 bits
36932 must mirror the permutation in the lower 256-bits. */
36933 for (i = 0; i < 8; ++i)
36934 if (ipar[i] + 8 != ipar[i + 8])
36935 return 0;
36936 /* FALLTHRU */
36937
36938 case V8SFmode:
36939 /* In 256 bit SFmode case, we have full freedom of
36940 movement within the low 128-bit lane, but the high 128-bit
36941 lane must mirror the exact same pattern. */
36942 for (i = 0; i < 4; ++i)
36943 if (ipar[i] + 4 != ipar[i + 4])
36944 return 0;
36945 nelt = 4;
36946 /* FALLTHRU */
36947
36948 case V2DFmode:
36949 case V4SFmode:
36950 /* In the 128-bit case, we've full freedom in the placement of
36951 the elements from the source operand. */
36952 for (i = 0; i < nelt; ++i)
36953 mask |= ipar[i] << (i * (nelt / 2));
36954 break;
36955
36956 default:
36957 gcc_unreachable ();
36958 }
36959
36960 /* Make sure success has a non-zero value by adding one. */
36961 return mask + 1;
36962 }
36963
36964 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
36965 the expansion functions to turn the parallel back into a mask.
36966 The return value is 0 for no match and the imm8+1 for a match. */
36967
36968 int
36969 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
36970 {
36971 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
36972 unsigned mask = 0;
36973 unsigned char ipar[8] = {}; /* Silence -Wuninitialized warning. */
36974
36975 if (XVECLEN (par, 0) != (int) nelt)
36976 return 0;
36977
36978 /* Validate that all of the elements are constants, and not totally
36979 out of range. Copy the data into an integral array to make the
36980 subsequent checks easier. */
36981 for (i = 0; i < nelt; ++i)
36982 {
36983 rtx er = XVECEXP (par, 0, i);
36984 unsigned HOST_WIDE_INT ei;
36985
36986 if (!CONST_INT_P (er))
36987 return 0;
36988 ei = INTVAL (er);
36989 if (ei >= 2 * nelt)
36990 return 0;
36991 ipar[i] = ei;
36992 }
36993
36994 /* Validate that the halves of the permute are halves. */
36995 for (i = 0; i < nelt2 - 1; ++i)
36996 if (ipar[i] + 1 != ipar[i + 1])
36997 return 0;
36998 for (i = nelt2; i < nelt - 1; ++i)
36999 if (ipar[i] + 1 != ipar[i + 1])
37000 return 0;
37001
37002 /* Reconstruct the mask. */
37003 for (i = 0; i < 2; ++i)
37004 {
37005 unsigned e = ipar[i * nelt2];
37006 if (e % nelt2)
37007 return 0;
37008 e /= nelt2;
37009 mask |= e << (i * 4);
37010 }
37011
37012 /* Make sure success has a non-zero value by adding one. */
37013 return mask + 1;
37014 }
37015 \f
37016 /* Return a register priority for hard reg REGNO. */
37017 static int
37018 ix86_register_priority (int hard_regno)
37019 {
37020 /* ebp and r13 as the base always wants a displacement, r12 as the
37021 base always wants an index. So discourage their usage in an
37022 address. */
37023 if (hard_regno == R12_REG || hard_regno == R13_REG)
37024 return 0;
37025 if (hard_regno == BP_REG)
37026 return 1;
37027 /* New x86-64 int registers result in bigger code size. Discourage
37028 them. */
37029 if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
37030 return 2;
37031 /* New x86-64 SSE registers result in bigger code size. Discourage
37032 them. */
37033 if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
37034 return 2;
37035 /* Usage of AX register results in smaller code. Prefer it. */
37036 if (hard_regno == 0)
37037 return 4;
37038 return 3;
37039 }
37040
37041 /* Implement TARGET_PREFERRED_RELOAD_CLASS.
37042
37043 Put float CONST_DOUBLE in the constant pool instead of fp regs.
37044 QImode must go into class Q_REGS.
37045 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
37046 movdf to do mem-to-mem moves through integer regs. */
37047
37048 static reg_class_t
37049 ix86_preferred_reload_class (rtx x, reg_class_t regclass)
37050 {
37051 enum machine_mode mode = GET_MODE (x);
37052
37053 /* We're only allowed to return a subclass of CLASS. Many of the
37054 following checks fail for NO_REGS, so eliminate that early. */
37055 if (regclass == NO_REGS)
37056 return NO_REGS;
37057
37058 /* All classes can load zeros. */
37059 if (x == CONST0_RTX (mode))
37060 return regclass;
37061
37062 /* Force constants into memory if we are loading a (nonzero) constant into
37063 an MMX, SSE or MASK register. This is because there are no MMX/SSE/MASK
37064 instructions to load from a constant. */
37065 if (CONSTANT_P (x)
37066 && (MAYBE_MMX_CLASS_P (regclass)
37067 || MAYBE_SSE_CLASS_P (regclass)
37068 || MAYBE_MASK_CLASS_P (regclass)))
37069 return NO_REGS;
37070
37071 /* Prefer SSE regs only, if we can use them for math. */
37072 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
37073 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
37074
37075 /* Floating-point constants need more complex checks. */
37076 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
37077 {
37078 /* General regs can load everything. */
37079 if (reg_class_subset_p (regclass, GENERAL_REGS))
37080 return regclass;
37081
37082 /* Floats can load 0 and 1 plus some others. Note that we eliminated
37083 zero above. We only want to wind up preferring 80387 registers if
37084 we plan on doing computation with them. */
37085 if (TARGET_80387
37086 && standard_80387_constant_p (x) > 0)
37087 {
37088 /* Limit class to non-sse. */
37089 if (regclass == FLOAT_SSE_REGS)
37090 return FLOAT_REGS;
37091 if (regclass == FP_TOP_SSE_REGS)
37092 return FP_TOP_REG;
37093 if (regclass == FP_SECOND_SSE_REGS)
37094 return FP_SECOND_REG;
37095 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
37096 return regclass;
37097 }
37098
37099 return NO_REGS;
37100 }
37101
37102 /* Generally when we see PLUS here, it's the function invariant
37103 (plus soft-fp const_int). Which can only be computed into general
37104 regs. */
37105 if (GET_CODE (x) == PLUS)
37106 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
37107
37108 /* QImode constants are easy to load, but non-constant QImode data
37109 must go into Q_REGS. */
37110 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
37111 {
37112 if (reg_class_subset_p (regclass, Q_REGS))
37113 return regclass;
37114 if (reg_class_subset_p (Q_REGS, regclass))
37115 return Q_REGS;
37116 return NO_REGS;
37117 }
37118
37119 return regclass;
37120 }
37121
37122 /* Discourage putting floating-point values in SSE registers unless
37123 SSE math is being used, and likewise for the 387 registers. */
37124 static reg_class_t
37125 ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
37126 {
37127 enum machine_mode mode = GET_MODE (x);
37128
37129 /* Restrict the output reload class to the register bank that we are doing
37130 math on. If we would like not to return a subset of CLASS, reject this
37131 alternative: if reload cannot do this, it will still use its choice. */
37132 mode = GET_MODE (x);
37133 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
37134 return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
37135
37136 if (X87_FLOAT_MODE_P (mode))
37137 {
37138 if (regclass == FP_TOP_SSE_REGS)
37139 return FP_TOP_REG;
37140 else if (regclass == FP_SECOND_SSE_REGS)
37141 return FP_SECOND_REG;
37142 else
37143 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
37144 }
37145
37146 return regclass;
37147 }
37148
37149 static reg_class_t
37150 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
37151 enum machine_mode mode, secondary_reload_info *sri)
37152 {
37153 /* Double-word spills from general registers to non-offsettable memory
37154 references (zero-extended addresses) require special handling. */
37155 if (TARGET_64BIT
37156 && MEM_P (x)
37157 && GET_MODE_SIZE (mode) > UNITS_PER_WORD
37158 && INTEGER_CLASS_P (rclass)
37159 && !offsettable_memref_p (x))
37160 {
37161 sri->icode = (in_p
37162 ? CODE_FOR_reload_noff_load
37163 : CODE_FOR_reload_noff_store);
37164 /* Add the cost of moving address to a temporary. */
37165 sri->extra_cost = 1;
37166
37167 return NO_REGS;
37168 }
37169
37170 /* QImode spills from non-QI registers require
37171 intermediate register on 32bit targets. */
37172 if (mode == QImode
37173 && (MAYBE_MASK_CLASS_P (rclass)
37174 || (!TARGET_64BIT && !in_p
37175 && INTEGER_CLASS_P (rclass)
37176 && MAYBE_NON_Q_CLASS_P (rclass))))
37177 {
37178 int regno;
37179
37180 if (REG_P (x))
37181 regno = REGNO (x);
37182 else
37183 regno = -1;
37184
37185 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
37186 regno = true_regnum (x);
37187
37188 /* Return Q_REGS if the operand is in memory. */
37189 if (regno == -1)
37190 return Q_REGS;
37191 }
37192
37193 /* This condition handles corner case where an expression involving
37194 pointers gets vectorized. We're trying to use the address of a
37195 stack slot as a vector initializer.
37196
37197 (set (reg:V2DI 74 [ vect_cst_.2 ])
37198 (vec_duplicate:V2DI (reg/f:DI 20 frame)))
37199
37200 Eventually frame gets turned into sp+offset like this:
37201
37202 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37203 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37204 (const_int 392 [0x188]))))
37205
37206 That later gets turned into:
37207
37208 (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37209 (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
37210 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
37211
37212 We'll have the following reload recorded:
37213
37214 Reload 0: reload_in (DI) =
37215 (plus:DI (reg/f:DI 7 sp)
37216 (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
37217 reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37218 SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
37219 reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
37220 reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
37221 reload_reg_rtx: (reg:V2DI 22 xmm1)
37222
37223 Which isn't going to work since SSE instructions can't handle scalar
37224 additions. Returning GENERAL_REGS forces the addition into integer
37225 register and reload can handle subsequent reloads without problems. */
37226
37227 if (in_p && GET_CODE (x) == PLUS
37228 && SSE_CLASS_P (rclass)
37229 && SCALAR_INT_MODE_P (mode))
37230 return GENERAL_REGS;
37231
37232 return NO_REGS;
37233 }
37234
37235 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
37236
37237 static bool
37238 ix86_class_likely_spilled_p (reg_class_t rclass)
37239 {
37240 switch (rclass)
37241 {
37242 case AREG:
37243 case DREG:
37244 case CREG:
37245 case BREG:
37246 case AD_REGS:
37247 case SIREG:
37248 case DIREG:
37249 case SSE_FIRST_REG:
37250 case FP_TOP_REG:
37251 case FP_SECOND_REG:
37252 return true;
37253
37254 default:
37255 break;
37256 }
37257
37258 return false;
37259 }
37260
37261 /* If we are copying between general and FP registers, we need a memory
37262 location. The same is true for SSE and MMX registers.
37263
37264 To optimize register_move_cost performance, allow inline variant.
37265
37266 The macro can't work reliably when one of the CLASSES is class containing
37267 registers from multiple units (SSE, MMX, integer). We avoid this by never
37268 combining those units in single alternative in the machine description.
37269 Ensure that this constraint holds to avoid unexpected surprises.
37270
37271 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
37272 enforce these sanity checks. */
37273
37274 static inline bool
37275 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37276 enum machine_mode mode, int strict)
37277 {
37278 if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
37279 return false;
37280 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
37281 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
37282 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
37283 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
37284 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
37285 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
37286 {
37287 gcc_assert (!strict || lra_in_progress);
37288 return true;
37289 }
37290
37291 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
37292 return true;
37293
37294 /* ??? This is a lie. We do have moves between mmx/general, and for
37295 mmx/sse2. But by saying we need secondary memory we discourage the
37296 register allocator from using the mmx registers unless needed. */
37297 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
37298 return true;
37299
37300 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37301 {
37302 /* SSE1 doesn't have any direct moves from other classes. */
37303 if (!TARGET_SSE2)
37304 return true;
37305
37306 /* If the target says that inter-unit moves are more expensive
37307 than moving through memory, then don't generate them. */
37308 if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
37309 || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
37310 return true;
37311
37312 /* Between SSE and general, we have moves no larger than word size. */
37313 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37314 return true;
37315 }
37316
37317 return false;
37318 }
37319
37320 bool
37321 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
37322 enum machine_mode mode, int strict)
37323 {
37324 return inline_secondary_memory_needed (class1, class2, mode, strict);
37325 }
37326
37327 /* Implement the TARGET_CLASS_MAX_NREGS hook.
37328
37329 On the 80386, this is the size of MODE in words,
37330 except in the FP regs, where a single reg is always enough. */
37331
37332 static unsigned char
37333 ix86_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
37334 {
37335 if (MAYBE_INTEGER_CLASS_P (rclass))
37336 {
37337 if (mode == XFmode)
37338 return (TARGET_64BIT ? 2 : 3);
37339 else if (mode == XCmode)
37340 return (TARGET_64BIT ? 4 : 6);
37341 else
37342 return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
37343 }
37344 else
37345 {
37346 if (COMPLEX_MODE_P (mode))
37347 return 2;
37348 else
37349 return 1;
37350 }
37351 }
37352
37353 /* Return true if the registers in CLASS cannot represent the change from
37354 modes FROM to TO. */
37355
37356 bool
37357 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
37358 enum reg_class regclass)
37359 {
37360 if (from == to)
37361 return false;
37362
37363 /* x87 registers can't do subreg at all, as all values are reformatted
37364 to extended precision. */
37365 if (MAYBE_FLOAT_CLASS_P (regclass))
37366 return true;
37367
37368 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
37369 {
37370 /* Vector registers do not support QI or HImode loads. If we don't
37371 disallow a change to these modes, reload will assume it's ok to
37372 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
37373 the vec_dupv4hi pattern. */
37374 if (GET_MODE_SIZE (from) < 4)
37375 return true;
37376
37377 /* Vector registers do not support subreg with nonzero offsets, which
37378 are otherwise valid for integer registers. Since we can't see
37379 whether we have a nonzero offset from here, prohibit all
37380 nonparadoxical subregs changing size. */
37381 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
37382 return true;
37383 }
37384
37385 return false;
37386 }
37387
37388 /* Return the cost of moving data of mode M between a
37389 register and memory. A value of 2 is the default; this cost is
37390 relative to those in `REGISTER_MOVE_COST'.
37391
37392 This function is used extensively by register_move_cost that is used to
37393 build tables at startup. Make it inline in this case.
37394 When IN is 2, return maximum of in and out move cost.
37395
37396 If moving between registers and memory is more expensive than
37397 between two registers, you should define this macro to express the
37398 relative cost.
37399
37400 Model also increased moving costs of QImode registers in non
37401 Q_REGS classes.
37402 */
37403 static inline int
37404 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
37405 int in)
37406 {
37407 int cost;
37408 if (FLOAT_CLASS_P (regclass))
37409 {
37410 int index;
37411 switch (mode)
37412 {
37413 case SFmode:
37414 index = 0;
37415 break;
37416 case DFmode:
37417 index = 1;
37418 break;
37419 case XFmode:
37420 index = 2;
37421 break;
37422 default:
37423 return 100;
37424 }
37425 if (in == 2)
37426 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
37427 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
37428 }
37429 if (SSE_CLASS_P (regclass))
37430 {
37431 int index;
37432 switch (GET_MODE_SIZE (mode))
37433 {
37434 case 4:
37435 index = 0;
37436 break;
37437 case 8:
37438 index = 1;
37439 break;
37440 case 16:
37441 index = 2;
37442 break;
37443 default:
37444 return 100;
37445 }
37446 if (in == 2)
37447 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
37448 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
37449 }
37450 if (MMX_CLASS_P (regclass))
37451 {
37452 int index;
37453 switch (GET_MODE_SIZE (mode))
37454 {
37455 case 4:
37456 index = 0;
37457 break;
37458 case 8:
37459 index = 1;
37460 break;
37461 default:
37462 return 100;
37463 }
37464 if (in)
37465 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
37466 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
37467 }
37468 switch (GET_MODE_SIZE (mode))
37469 {
37470 case 1:
37471 if (Q_CLASS_P (regclass) || TARGET_64BIT)
37472 {
37473 if (!in)
37474 return ix86_cost->int_store[0];
37475 if (TARGET_PARTIAL_REG_DEPENDENCY
37476 && optimize_function_for_speed_p (cfun))
37477 cost = ix86_cost->movzbl_load;
37478 else
37479 cost = ix86_cost->int_load[0];
37480 if (in == 2)
37481 return MAX (cost, ix86_cost->int_store[0]);
37482 return cost;
37483 }
37484 else
37485 {
37486 if (in == 2)
37487 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
37488 if (in)
37489 return ix86_cost->movzbl_load;
37490 else
37491 return ix86_cost->int_store[0] + 4;
37492 }
37493 break;
37494 case 2:
37495 if (in == 2)
37496 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
37497 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
37498 default:
37499 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
37500 if (mode == TFmode)
37501 mode = XFmode;
37502 if (in == 2)
37503 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
37504 else if (in)
37505 cost = ix86_cost->int_load[2];
37506 else
37507 cost = ix86_cost->int_store[2];
37508 return (cost * (((int) GET_MODE_SIZE (mode)
37509 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
37510 }
37511 }
37512
37513 static int
37514 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
37515 bool in)
37516 {
37517 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
37518 }
37519
37520
37521 /* Return the cost of moving data from a register in class CLASS1 to
37522 one in class CLASS2.
37523
37524 It is not required that the cost always equal 2 when FROM is the same as TO;
37525 on some machines it is expensive to move between registers if they are not
37526 general registers. */
37527
37528 static int
37529 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
37530 reg_class_t class2_i)
37531 {
37532 enum reg_class class1 = (enum reg_class) class1_i;
37533 enum reg_class class2 = (enum reg_class) class2_i;
37534
37535 /* In case we require secondary memory, compute cost of the store followed
37536 by load. In order to avoid bad register allocation choices, we need
37537 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
37538
37539 if (inline_secondary_memory_needed (class1, class2, mode, 0))
37540 {
37541 int cost = 1;
37542
37543 cost += inline_memory_move_cost (mode, class1, 2);
37544 cost += inline_memory_move_cost (mode, class2, 2);
37545
37546 /* In case of copying from general_purpose_register we may emit multiple
37547 stores followed by single load causing memory size mismatch stall.
37548 Count this as arbitrarily high cost of 20. */
37549 if (targetm.class_max_nregs (class1, mode)
37550 > targetm.class_max_nregs (class2, mode))
37551 cost += 20;
37552
37553 /* In the case of FP/MMX moves, the registers actually overlap, and we
37554 have to switch modes in order to treat them differently. */
37555 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
37556 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
37557 cost += 20;
37558
37559 return cost;
37560 }
37561
37562 /* Moves between SSE/MMX and integer unit are expensive. */
37563 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
37564 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
37565
37566 /* ??? By keeping returned value relatively high, we limit the number
37567 of moves between integer and MMX/SSE registers for all targets.
37568 Additionally, high value prevents problem with x86_modes_tieable_p(),
37569 where integer modes in MMX/SSE registers are not tieable
37570 because of missing QImode and HImode moves to, from or between
37571 MMX/SSE registers. */
37572 return MAX (8, ix86_cost->mmxsse_to_integer);
37573
37574 if (MAYBE_FLOAT_CLASS_P (class1))
37575 return ix86_cost->fp_move;
37576 if (MAYBE_SSE_CLASS_P (class1))
37577 return ix86_cost->sse_move;
37578 if (MAYBE_MMX_CLASS_P (class1))
37579 return ix86_cost->mmx_move;
37580 return 2;
37581 }
37582
37583 /* Return TRUE if hard register REGNO can hold a value of machine-mode
37584 MODE. */
37585
37586 bool
37587 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
37588 {
37589 /* Flags and only flags can only hold CCmode values. */
37590 if (CC_REGNO_P (regno))
37591 return GET_MODE_CLASS (mode) == MODE_CC;
37592 if (GET_MODE_CLASS (mode) == MODE_CC
37593 || GET_MODE_CLASS (mode) == MODE_RANDOM
37594 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
37595 return false;
37596 if (STACK_REGNO_P (regno))
37597 return VALID_FP_MODE_P (mode);
37598 if (MASK_REGNO_P (regno))
37599 return VALID_MASK_REG_MODE (mode);
37600 if (SSE_REGNO_P (regno))
37601 {
37602 /* We implement the move patterns for all vector modes into and
37603 out of SSE registers, even when no operation instructions
37604 are available. */
37605
37606 /* For AVX-512 we allow, regardless of regno:
37607 - XI mode
37608 - any of 512-bit wide vector mode
37609 - any scalar mode. */
37610 if (TARGET_AVX512F
37611 && (mode == XImode
37612 || VALID_AVX512F_REG_MODE (mode)
37613 || VALID_AVX512F_SCALAR_MODE (mode)))
37614 return true;
37615
37616 /* xmm16-xmm31 are only available for AVX-512. */
37617 if (EXT_REX_SSE_REGNO_P (regno))
37618 return false;
37619
37620 /* OImode and AVX modes are available only when AVX is enabled. */
37621 return ((TARGET_AVX
37622 && VALID_AVX256_REG_OR_OI_MODE (mode))
37623 || VALID_SSE_REG_MODE (mode)
37624 || VALID_SSE2_REG_MODE (mode)
37625 || VALID_MMX_REG_MODE (mode)
37626 || VALID_MMX_REG_MODE_3DNOW (mode));
37627 }
37628 if (MMX_REGNO_P (regno))
37629 {
37630 /* We implement the move patterns for 3DNOW modes even in MMX mode,
37631 so if the register is available at all, then we can move data of
37632 the given mode into or out of it. */
37633 return (VALID_MMX_REG_MODE (mode)
37634 || VALID_MMX_REG_MODE_3DNOW (mode));
37635 }
37636
37637 if (mode == QImode)
37638 {
37639 /* Take care for QImode values - they can be in non-QI regs,
37640 but then they do cause partial register stalls. */
37641 if (ANY_QI_REGNO_P (regno))
37642 return true;
37643 if (!TARGET_PARTIAL_REG_STALL)
37644 return true;
37645 /* LRA checks if the hard register is OK for the given mode.
37646 QImode values can live in non-QI regs, so we allow all
37647 registers here. */
37648 if (lra_in_progress)
37649 return true;
37650 return !can_create_pseudo_p ();
37651 }
37652 /* We handle both integer and floats in the general purpose registers. */
37653 else if (VALID_INT_MODE_P (mode))
37654 return true;
37655 else if (VALID_FP_MODE_P (mode))
37656 return true;
37657 else if (VALID_DFP_MODE_P (mode))
37658 return true;
37659 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
37660 on to use that value in smaller contexts, this can easily force a
37661 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
37662 supporting DImode, allow it. */
37663 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
37664 return true;
37665
37666 return false;
37667 }
37668
37669 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
37670 tieable integer mode. */
37671
37672 static bool
37673 ix86_tieable_integer_mode_p (enum machine_mode mode)
37674 {
37675 switch (mode)
37676 {
37677 case HImode:
37678 case SImode:
37679 return true;
37680
37681 case QImode:
37682 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
37683
37684 case DImode:
37685 return TARGET_64BIT;
37686
37687 default:
37688 return false;
37689 }
37690 }
37691
37692 /* Return true if MODE1 is accessible in a register that can hold MODE2
37693 without copying. That is, all register classes that can hold MODE2
37694 can also hold MODE1. */
37695
37696 bool
37697 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
37698 {
37699 if (mode1 == mode2)
37700 return true;
37701
37702 if (ix86_tieable_integer_mode_p (mode1)
37703 && ix86_tieable_integer_mode_p (mode2))
37704 return true;
37705
37706 /* MODE2 being XFmode implies fp stack or general regs, which means we
37707 can tie any smaller floating point modes to it. Note that we do not
37708 tie this with TFmode. */
37709 if (mode2 == XFmode)
37710 return mode1 == SFmode || mode1 == DFmode;
37711
37712 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
37713 that we can tie it with SFmode. */
37714 if (mode2 == DFmode)
37715 return mode1 == SFmode;
37716
37717 /* If MODE2 is only appropriate for an SSE register, then tie with
37718 any other mode acceptable to SSE registers. */
37719 if (GET_MODE_SIZE (mode2) == 32
37720 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37721 return (GET_MODE_SIZE (mode1) == 32
37722 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37723 if (GET_MODE_SIZE (mode2) == 16
37724 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
37725 return (GET_MODE_SIZE (mode1) == 16
37726 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
37727
37728 /* If MODE2 is appropriate for an MMX register, then tie
37729 with any other mode acceptable to MMX registers. */
37730 if (GET_MODE_SIZE (mode2) == 8
37731 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
37732 return (GET_MODE_SIZE (mode1) == 8
37733 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
37734
37735 return false;
37736 }
37737
37738 /* Return the cost of moving between two registers of mode MODE. */
37739
37740 static int
37741 ix86_set_reg_reg_cost (enum machine_mode mode)
37742 {
37743 unsigned int units = UNITS_PER_WORD;
37744
37745 switch (GET_MODE_CLASS (mode))
37746 {
37747 default:
37748 break;
37749
37750 case MODE_CC:
37751 units = GET_MODE_SIZE (CCmode);
37752 break;
37753
37754 case MODE_FLOAT:
37755 if ((TARGET_SSE && mode == TFmode)
37756 || (TARGET_80387 && mode == XFmode)
37757 || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
37758 || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
37759 units = GET_MODE_SIZE (mode);
37760 break;
37761
37762 case MODE_COMPLEX_FLOAT:
37763 if ((TARGET_SSE && mode == TCmode)
37764 || (TARGET_80387 && mode == XCmode)
37765 || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
37766 || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
37767 units = GET_MODE_SIZE (mode);
37768 break;
37769
37770 case MODE_VECTOR_INT:
37771 case MODE_VECTOR_FLOAT:
37772 if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
37773 || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
37774 || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
37775 || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
37776 || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
37777 units = GET_MODE_SIZE (mode);
37778 }
37779
37780 /* Return the cost of moving between two registers of mode MODE,
37781 assuming that the move will be in pieces of at most UNITS bytes. */
37782 return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
37783 }
37784
37785 /* Compute a (partial) cost for rtx X. Return true if the complete
37786 cost has been computed, and false if subexpressions should be
37787 scanned. In either case, *TOTAL contains the cost result. */
37788
37789 static bool
37790 ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
37791 bool speed)
37792 {
37793 rtx mask;
37794 enum rtx_code code = (enum rtx_code) code_i;
37795 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
37796 enum machine_mode mode = GET_MODE (x);
37797 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
37798
37799 switch (code)
37800 {
37801 case SET:
37802 if (register_operand (SET_DEST (x), VOIDmode)
37803 && reg_or_0_operand (SET_SRC (x), VOIDmode))
37804 {
37805 *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
37806 return true;
37807 }
37808 return false;
37809
37810 case CONST_INT:
37811 case CONST:
37812 case LABEL_REF:
37813 case SYMBOL_REF:
37814 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
37815 *total = 3;
37816 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
37817 *total = 2;
37818 else if (flag_pic && SYMBOLIC_CONST (x)
37819 && (!TARGET_64BIT
37820 || (!GET_CODE (x) != LABEL_REF
37821 && (GET_CODE (x) != SYMBOL_REF
37822 || !SYMBOL_REF_LOCAL_P (x)))))
37823 *total = 1;
37824 else
37825 *total = 0;
37826 return true;
37827
37828 case CONST_DOUBLE:
37829 if (mode == VOIDmode)
37830 {
37831 *total = 0;
37832 return true;
37833 }
37834 switch (standard_80387_constant_p (x))
37835 {
37836 case 1: /* 0.0 */
37837 *total = 1;
37838 return true;
37839 default: /* Other constants */
37840 *total = 2;
37841 return true;
37842 case 0:
37843 case -1:
37844 break;
37845 }
37846 if (SSE_FLOAT_MODE_P (mode))
37847 {
37848 case CONST_VECTOR:
37849 switch (standard_sse_constant_p (x))
37850 {
37851 case 0:
37852 break;
37853 case 1: /* 0: xor eliminates false dependency */
37854 *total = 0;
37855 return true;
37856 default: /* -1: cmp contains false dependency */
37857 *total = 1;
37858 return true;
37859 }
37860 }
37861 /* Fall back to (MEM (SYMBOL_REF)), since that's where
37862 it'll probably end up. Add a penalty for size. */
37863 *total = (COSTS_N_INSNS (1)
37864 + (flag_pic != 0 && !TARGET_64BIT)
37865 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
37866 return true;
37867
37868 case ZERO_EXTEND:
37869 /* The zero extensions is often completely free on x86_64, so make
37870 it as cheap as possible. */
37871 if (TARGET_64BIT && mode == DImode
37872 && GET_MODE (XEXP (x, 0)) == SImode)
37873 *total = 1;
37874 else if (TARGET_ZERO_EXTEND_WITH_AND)
37875 *total = cost->add;
37876 else
37877 *total = cost->movzx;
37878 return false;
37879
37880 case SIGN_EXTEND:
37881 *total = cost->movsx;
37882 return false;
37883
37884 case ASHIFT:
37885 if (SCALAR_INT_MODE_P (mode)
37886 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
37887 && CONST_INT_P (XEXP (x, 1)))
37888 {
37889 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
37890 if (value == 1)
37891 {
37892 *total = cost->add;
37893 return false;
37894 }
37895 if ((value == 2 || value == 3)
37896 && cost->lea <= cost->shift_const)
37897 {
37898 *total = cost->lea;
37899 return false;
37900 }
37901 }
37902 /* FALLTHRU */
37903
37904 case ROTATE:
37905 case ASHIFTRT:
37906 case LSHIFTRT:
37907 case ROTATERT:
37908 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
37909 {
37910 /* ??? Should be SSE vector operation cost. */
37911 /* At least for published AMD latencies, this really is the same
37912 as the latency for a simple fpu operation like fabs. */
37913 /* V*QImode is emulated with 1-11 insns. */
37914 if (mode == V16QImode || mode == V32QImode)
37915 {
37916 int count = 11;
37917 if (TARGET_XOP && mode == V16QImode)
37918 {
37919 /* For XOP we use vpshab, which requires a broadcast of the
37920 value to the variable shift insn. For constants this
37921 means a V16Q const in mem; even when we can perform the
37922 shift with one insn set the cost to prefer paddb. */
37923 if (CONSTANT_P (XEXP (x, 1)))
37924 {
37925 *total = (cost->fabs
37926 + rtx_cost (XEXP (x, 0), code, 0, speed)
37927 + (speed ? 2 : COSTS_N_BYTES (16)));
37928 return true;
37929 }
37930 count = 3;
37931 }
37932 else if (TARGET_SSSE3)
37933 count = 7;
37934 *total = cost->fabs * count;
37935 }
37936 else
37937 *total = cost->fabs;
37938 }
37939 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
37940 {
37941 if (CONST_INT_P (XEXP (x, 1)))
37942 {
37943 if (INTVAL (XEXP (x, 1)) > 32)
37944 *total = cost->shift_const + COSTS_N_INSNS (2);
37945 else
37946 *total = cost->shift_const * 2;
37947 }
37948 else
37949 {
37950 if (GET_CODE (XEXP (x, 1)) == AND)
37951 *total = cost->shift_var * 2;
37952 else
37953 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
37954 }
37955 }
37956 else
37957 {
37958 if (CONST_INT_P (XEXP (x, 1)))
37959 *total = cost->shift_const;
37960 else if (GET_CODE (XEXP (x, 1)) == SUBREG
37961 && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
37962 {
37963 /* Return the cost after shift-and truncation. */
37964 *total = cost->shift_var;
37965 return true;
37966 }
37967 else
37968 *total = cost->shift_var;
37969 }
37970 return false;
37971
37972 case FMA:
37973 {
37974 rtx sub;
37975
37976 gcc_assert (FLOAT_MODE_P (mode));
37977 gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
37978
37979 /* ??? SSE scalar/vector cost should be used here. */
37980 /* ??? Bald assumption that fma has the same cost as fmul. */
37981 *total = cost->fmul;
37982 *total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
37983
37984 /* Negate in op0 or op2 is free: FMS, FNMA, FNMS. */
37985 sub = XEXP (x, 0);
37986 if (GET_CODE (sub) == NEG)
37987 sub = XEXP (sub, 0);
37988 *total += rtx_cost (sub, FMA, 0, speed);
37989
37990 sub = XEXP (x, 2);
37991 if (GET_CODE (sub) == NEG)
37992 sub = XEXP (sub, 0);
37993 *total += rtx_cost (sub, FMA, 2, speed);
37994 return true;
37995 }
37996
37997 case MULT:
37998 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
37999 {
38000 /* ??? SSE scalar cost should be used here. */
38001 *total = cost->fmul;
38002 return false;
38003 }
38004 else if (X87_FLOAT_MODE_P (mode))
38005 {
38006 *total = cost->fmul;
38007 return false;
38008 }
38009 else if (FLOAT_MODE_P (mode))
38010 {
38011 /* ??? SSE vector cost should be used here. */
38012 *total = cost->fmul;
38013 return false;
38014 }
38015 else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38016 {
38017 /* V*QImode is emulated with 7-13 insns. */
38018 if (mode == V16QImode || mode == V32QImode)
38019 {
38020 int extra = 11;
38021 if (TARGET_XOP && mode == V16QImode)
38022 extra = 5;
38023 else if (TARGET_SSSE3)
38024 extra = 6;
38025 *total = cost->fmul * 2 + cost->fabs * extra;
38026 }
38027 /* V*DImode is emulated with 5-8 insns. */
38028 else if (mode == V2DImode || mode == V4DImode)
38029 {
38030 if (TARGET_XOP && mode == V2DImode)
38031 *total = cost->fmul * 2 + cost->fabs * 3;
38032 else
38033 *total = cost->fmul * 3 + cost->fabs * 5;
38034 }
38035 /* Without sse4.1, we don't have PMULLD; it's emulated with 7
38036 insns, including two PMULUDQ. */
38037 else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
38038 *total = cost->fmul * 2 + cost->fabs * 5;
38039 else
38040 *total = cost->fmul;
38041 return false;
38042 }
38043 else
38044 {
38045 rtx op0 = XEXP (x, 0);
38046 rtx op1 = XEXP (x, 1);
38047 int nbits;
38048 if (CONST_INT_P (XEXP (x, 1)))
38049 {
38050 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
38051 for (nbits = 0; value != 0; value &= value - 1)
38052 nbits++;
38053 }
38054 else
38055 /* This is arbitrary. */
38056 nbits = 7;
38057
38058 /* Compute costs correctly for widening multiplication. */
38059 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
38060 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
38061 == GET_MODE_SIZE (mode))
38062 {
38063 int is_mulwiden = 0;
38064 enum machine_mode inner_mode = GET_MODE (op0);
38065
38066 if (GET_CODE (op0) == GET_CODE (op1))
38067 is_mulwiden = 1, op1 = XEXP (op1, 0);
38068 else if (CONST_INT_P (op1))
38069 {
38070 if (GET_CODE (op0) == SIGN_EXTEND)
38071 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
38072 == INTVAL (op1);
38073 else
38074 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
38075 }
38076
38077 if (is_mulwiden)
38078 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
38079 }
38080
38081 *total = (cost->mult_init[MODE_INDEX (mode)]
38082 + nbits * cost->mult_bit
38083 + rtx_cost (op0, outer_code, opno, speed)
38084 + rtx_cost (op1, outer_code, opno, speed));
38085
38086 return true;
38087 }
38088
38089 case DIV:
38090 case UDIV:
38091 case MOD:
38092 case UMOD:
38093 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38094 /* ??? SSE cost should be used here. */
38095 *total = cost->fdiv;
38096 else if (X87_FLOAT_MODE_P (mode))
38097 *total = cost->fdiv;
38098 else if (FLOAT_MODE_P (mode))
38099 /* ??? SSE vector cost should be used here. */
38100 *total = cost->fdiv;
38101 else
38102 *total = cost->divide[MODE_INDEX (mode)];
38103 return false;
38104
38105 case PLUS:
38106 if (GET_MODE_CLASS (mode) == MODE_INT
38107 && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
38108 {
38109 if (GET_CODE (XEXP (x, 0)) == PLUS
38110 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
38111 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
38112 && CONSTANT_P (XEXP (x, 1)))
38113 {
38114 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
38115 if (val == 2 || val == 4 || val == 8)
38116 {
38117 *total = cost->lea;
38118 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38119 outer_code, opno, speed);
38120 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
38121 outer_code, opno, speed);
38122 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38123 return true;
38124 }
38125 }
38126 else if (GET_CODE (XEXP (x, 0)) == MULT
38127 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
38128 {
38129 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
38130 if (val == 2 || val == 4 || val == 8)
38131 {
38132 *total = cost->lea;
38133 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38134 outer_code, opno, speed);
38135 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38136 return true;
38137 }
38138 }
38139 else if (GET_CODE (XEXP (x, 0)) == PLUS)
38140 {
38141 *total = cost->lea;
38142 *total += rtx_cost (XEXP (XEXP (x, 0), 0),
38143 outer_code, opno, speed);
38144 *total += rtx_cost (XEXP (XEXP (x, 0), 1),
38145 outer_code, opno, speed);
38146 *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
38147 return true;
38148 }
38149 }
38150 /* FALLTHRU */
38151
38152 case MINUS:
38153 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38154 {
38155 /* ??? SSE cost should be used here. */
38156 *total = cost->fadd;
38157 return false;
38158 }
38159 else if (X87_FLOAT_MODE_P (mode))
38160 {
38161 *total = cost->fadd;
38162 return false;
38163 }
38164 else if (FLOAT_MODE_P (mode))
38165 {
38166 /* ??? SSE vector cost should be used here. */
38167 *total = cost->fadd;
38168 return false;
38169 }
38170 /* FALLTHRU */
38171
38172 case AND:
38173 case IOR:
38174 case XOR:
38175 if (GET_MODE_CLASS (mode) == MODE_INT
38176 && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38177 {
38178 *total = (cost->add * 2
38179 + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
38180 << (GET_MODE (XEXP (x, 0)) != DImode))
38181 + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
38182 << (GET_MODE (XEXP (x, 1)) != DImode)));
38183 return true;
38184 }
38185 /* FALLTHRU */
38186
38187 case NEG:
38188 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38189 {
38190 /* ??? SSE cost should be used here. */
38191 *total = cost->fchs;
38192 return false;
38193 }
38194 else if (X87_FLOAT_MODE_P (mode))
38195 {
38196 *total = cost->fchs;
38197 return false;
38198 }
38199 else if (FLOAT_MODE_P (mode))
38200 {
38201 /* ??? SSE vector cost should be used here. */
38202 *total = cost->fchs;
38203 return false;
38204 }
38205 /* FALLTHRU */
38206
38207 case NOT:
38208 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
38209 {
38210 /* ??? Should be SSE vector operation cost. */
38211 /* At least for published AMD latencies, this really is the same
38212 as the latency for a simple fpu operation like fabs. */
38213 *total = cost->fabs;
38214 }
38215 else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
38216 *total = cost->add * 2;
38217 else
38218 *total = cost->add;
38219 return false;
38220
38221 case COMPARE:
38222 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
38223 && XEXP (XEXP (x, 0), 1) == const1_rtx
38224 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
38225 && XEXP (x, 1) == const0_rtx)
38226 {
38227 /* This kind of construct is implemented using test[bwl].
38228 Treat it as if we had an AND. */
38229 *total = (cost->add
38230 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
38231 + rtx_cost (const1_rtx, outer_code, opno, speed));
38232 return true;
38233 }
38234 return false;
38235
38236 case FLOAT_EXTEND:
38237 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
38238 *total = 0;
38239 return false;
38240
38241 case ABS:
38242 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38243 /* ??? SSE cost should be used here. */
38244 *total = cost->fabs;
38245 else if (X87_FLOAT_MODE_P (mode))
38246 *total = cost->fabs;
38247 else if (FLOAT_MODE_P (mode))
38248 /* ??? SSE vector cost should be used here. */
38249 *total = cost->fabs;
38250 return false;
38251
38252 case SQRT:
38253 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
38254 /* ??? SSE cost should be used here. */
38255 *total = cost->fsqrt;
38256 else if (X87_FLOAT_MODE_P (mode))
38257 *total = cost->fsqrt;
38258 else if (FLOAT_MODE_P (mode))
38259 /* ??? SSE vector cost should be used here. */
38260 *total = cost->fsqrt;
38261 return false;
38262
38263 case UNSPEC:
38264 if (XINT (x, 1) == UNSPEC_TP)
38265 *total = 0;
38266 return false;
38267
38268 case VEC_SELECT:
38269 case VEC_CONCAT:
38270 case VEC_DUPLICATE:
38271 /* ??? Assume all of these vector manipulation patterns are
38272 recognizable. In which case they all pretty much have the
38273 same cost. */
38274 *total = cost->fabs;
38275 return true;
38276 case VEC_MERGE:
38277 mask = XEXP (x, 2);
38278 /* This is masked instruction, assume the same cost,
38279 as nonmasked variant. */
38280 if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
38281 *total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
38282 else
38283 *total = cost->fabs;
38284 return true;
38285
38286 default:
38287 return false;
38288 }
38289 }
38290
38291 #if TARGET_MACHO
38292
38293 static int current_machopic_label_num;
38294
38295 /* Given a symbol name and its associated stub, write out the
38296 definition of the stub. */
38297
38298 void
38299 machopic_output_stub (FILE *file, const char *symb, const char *stub)
38300 {
38301 unsigned int length;
38302 char *binder_name, *symbol_name, lazy_ptr_name[32];
38303 int label = ++current_machopic_label_num;
38304
38305 /* For 64-bit we shouldn't get here. */
38306 gcc_assert (!TARGET_64BIT);
38307
38308 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
38309 symb = targetm.strip_name_encoding (symb);
38310
38311 length = strlen (stub);
38312 binder_name = XALLOCAVEC (char, length + 32);
38313 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
38314
38315 length = strlen (symb);
38316 symbol_name = XALLOCAVEC (char, length + 32);
38317 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
38318
38319 sprintf (lazy_ptr_name, "L%d$lz", label);
38320
38321 if (MACHOPIC_ATT_STUB)
38322 switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
38323 else if (MACHOPIC_PURE)
38324 switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
38325 else
38326 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
38327
38328 fprintf (file, "%s:\n", stub);
38329 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38330
38331 if (MACHOPIC_ATT_STUB)
38332 {
38333 fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
38334 }
38335 else if (MACHOPIC_PURE)
38336 {
38337 /* PIC stub. */
38338 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38339 rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
38340 output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */
38341 fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
38342 label, lazy_ptr_name, label);
38343 fprintf (file, "\tjmp\t*%%ecx\n");
38344 }
38345 else
38346 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
38347
38348 /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
38349 it needs no stub-binding-helper. */
38350 if (MACHOPIC_ATT_STUB)
38351 return;
38352
38353 fprintf (file, "%s:\n", binder_name);
38354
38355 if (MACHOPIC_PURE)
38356 {
38357 fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
38358 fprintf (file, "\tpushl\t%%ecx\n");
38359 }
38360 else
38361 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
38362
38363 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
38364
38365 /* N.B. Keep the correspondence of these
38366 'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
38367 old-pic/new-pic/non-pic stubs; altering this will break
38368 compatibility with existing dylibs. */
38369 if (MACHOPIC_PURE)
38370 {
38371 /* 25-byte PIC stub using "CALL get_pc_thunk". */
38372 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
38373 }
38374 else
38375 /* 16-byte -mdynamic-no-pic stub. */
38376 switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
38377
38378 fprintf (file, "%s:\n", lazy_ptr_name);
38379 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
38380 fprintf (file, ASM_LONG "%s\n", binder_name);
38381 }
38382 #endif /* TARGET_MACHO */
38383
38384 /* Order the registers for register allocator. */
38385
38386 void
38387 x86_order_regs_for_local_alloc (void)
38388 {
38389 int pos = 0;
38390 int i;
38391
38392 /* First allocate the local general purpose registers. */
38393 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38394 if (GENERAL_REGNO_P (i) && call_used_regs[i])
38395 reg_alloc_order [pos++] = i;
38396
38397 /* Global general purpose registers. */
38398 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
38399 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
38400 reg_alloc_order [pos++] = i;
38401
38402 /* x87 registers come first in case we are doing FP math
38403 using them. */
38404 if (!TARGET_SSE_MATH)
38405 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38406 reg_alloc_order [pos++] = i;
38407
38408 /* SSE registers. */
38409 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
38410 reg_alloc_order [pos++] = i;
38411 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
38412 reg_alloc_order [pos++] = i;
38413
38414 /* Extended REX SSE registers. */
38415 for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
38416 reg_alloc_order [pos++] = i;
38417
38418 /* Mask register. */
38419 for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
38420 reg_alloc_order [pos++] = i;
38421
38422 /* x87 registers. */
38423 if (TARGET_SSE_MATH)
38424 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
38425 reg_alloc_order [pos++] = i;
38426
38427 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
38428 reg_alloc_order [pos++] = i;
38429
38430 /* Initialize the rest of array as we do not allocate some registers
38431 at all. */
38432 while (pos < FIRST_PSEUDO_REGISTER)
38433 reg_alloc_order [pos++] = 0;
38434 }
38435
38436 /* Handle a "callee_pop_aggregate_return" attribute; arguments as
38437 in struct attribute_spec handler. */
38438 static tree
38439 ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
38440 tree args,
38441 int flags ATTRIBUTE_UNUSED,
38442 bool *no_add_attrs)
38443 {
38444 if (TREE_CODE (*node) != FUNCTION_TYPE
38445 && TREE_CODE (*node) != METHOD_TYPE
38446 && TREE_CODE (*node) != FIELD_DECL
38447 && TREE_CODE (*node) != TYPE_DECL)
38448 {
38449 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38450 name);
38451 *no_add_attrs = true;
38452 return NULL_TREE;
38453 }
38454 if (TARGET_64BIT)
38455 {
38456 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
38457 name);
38458 *no_add_attrs = true;
38459 return NULL_TREE;
38460 }
38461 if (is_attribute_p ("callee_pop_aggregate_return", name))
38462 {
38463 tree cst;
38464
38465 cst = TREE_VALUE (args);
38466 if (TREE_CODE (cst) != INTEGER_CST)
38467 {
38468 warning (OPT_Wattributes,
38469 "%qE attribute requires an integer constant argument",
38470 name);
38471 *no_add_attrs = true;
38472 }
38473 else if (compare_tree_int (cst, 0) != 0
38474 && compare_tree_int (cst, 1) != 0)
38475 {
38476 warning (OPT_Wattributes,
38477 "argument to %qE attribute is neither zero, nor one",
38478 name);
38479 *no_add_attrs = true;
38480 }
38481
38482 return NULL_TREE;
38483 }
38484
38485 return NULL_TREE;
38486 }
38487
38488 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
38489 struct attribute_spec.handler. */
38490 static tree
38491 ix86_handle_abi_attribute (tree *node, tree name,
38492 tree args ATTRIBUTE_UNUSED,
38493 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38494 {
38495 if (TREE_CODE (*node) != FUNCTION_TYPE
38496 && TREE_CODE (*node) != METHOD_TYPE
38497 && TREE_CODE (*node) != FIELD_DECL
38498 && TREE_CODE (*node) != TYPE_DECL)
38499 {
38500 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38501 name);
38502 *no_add_attrs = true;
38503 return NULL_TREE;
38504 }
38505
38506 /* Can combine regparm with all attributes but fastcall. */
38507 if (is_attribute_p ("ms_abi", name))
38508 {
38509 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
38510 {
38511 error ("ms_abi and sysv_abi attributes are not compatible");
38512 }
38513
38514 return NULL_TREE;
38515 }
38516 else if (is_attribute_p ("sysv_abi", name))
38517 {
38518 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
38519 {
38520 error ("ms_abi and sysv_abi attributes are not compatible");
38521 }
38522
38523 return NULL_TREE;
38524 }
38525
38526 return NULL_TREE;
38527 }
38528
38529 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
38530 struct attribute_spec.handler. */
38531 static tree
38532 ix86_handle_struct_attribute (tree *node, tree name,
38533 tree args ATTRIBUTE_UNUSED,
38534 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38535 {
38536 tree *type = NULL;
38537 if (DECL_P (*node))
38538 {
38539 if (TREE_CODE (*node) == TYPE_DECL)
38540 type = &TREE_TYPE (*node);
38541 }
38542 else
38543 type = node;
38544
38545 if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
38546 {
38547 warning (OPT_Wattributes, "%qE attribute ignored",
38548 name);
38549 *no_add_attrs = true;
38550 }
38551
38552 else if ((is_attribute_p ("ms_struct", name)
38553 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
38554 || ((is_attribute_p ("gcc_struct", name)
38555 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
38556 {
38557 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
38558 name);
38559 *no_add_attrs = true;
38560 }
38561
38562 return NULL_TREE;
38563 }
38564
38565 static tree
38566 ix86_handle_fndecl_attribute (tree *node, tree name,
38567 tree args ATTRIBUTE_UNUSED,
38568 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
38569 {
38570 if (TREE_CODE (*node) != FUNCTION_DECL)
38571 {
38572 warning (OPT_Wattributes, "%qE attribute only applies to functions",
38573 name);
38574 *no_add_attrs = true;
38575 }
38576 return NULL_TREE;
38577 }
38578
38579 static bool
38580 ix86_ms_bitfield_layout_p (const_tree record_type)
38581 {
38582 return ((TARGET_MS_BITFIELD_LAYOUT
38583 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
38584 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
38585 }
38586
38587 /* Returns an expression indicating where the this parameter is
38588 located on entry to the FUNCTION. */
38589
38590 static rtx
38591 x86_this_parameter (tree function)
38592 {
38593 tree type = TREE_TYPE (function);
38594 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
38595 int nregs;
38596
38597 if (TARGET_64BIT)
38598 {
38599 const int *parm_regs;
38600
38601 if (ix86_function_type_abi (type) == MS_ABI)
38602 parm_regs = x86_64_ms_abi_int_parameter_registers;
38603 else
38604 parm_regs = x86_64_int_parameter_registers;
38605 return gen_rtx_REG (Pmode, parm_regs[aggr]);
38606 }
38607
38608 nregs = ix86_function_regparm (type, function);
38609
38610 if (nregs > 0 && !stdarg_p (type))
38611 {
38612 int regno;
38613 unsigned int ccvt = ix86_get_callcvt (type);
38614
38615 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38616 regno = aggr ? DX_REG : CX_REG;
38617 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38618 {
38619 regno = CX_REG;
38620 if (aggr)
38621 return gen_rtx_MEM (SImode,
38622 plus_constant (Pmode, stack_pointer_rtx, 4));
38623 }
38624 else
38625 {
38626 regno = AX_REG;
38627 if (aggr)
38628 {
38629 regno = DX_REG;
38630 if (nregs == 1)
38631 return gen_rtx_MEM (SImode,
38632 plus_constant (Pmode,
38633 stack_pointer_rtx, 4));
38634 }
38635 }
38636 return gen_rtx_REG (SImode, regno);
38637 }
38638
38639 return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
38640 aggr ? 8 : 4));
38641 }
38642
38643 /* Determine whether x86_output_mi_thunk can succeed. */
38644
38645 static bool
38646 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
38647 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
38648 HOST_WIDE_INT vcall_offset, const_tree function)
38649 {
38650 /* 64-bit can handle anything. */
38651 if (TARGET_64BIT)
38652 return true;
38653
38654 /* For 32-bit, everything's fine if we have one free register. */
38655 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
38656 return true;
38657
38658 /* Need a free register for vcall_offset. */
38659 if (vcall_offset)
38660 return false;
38661
38662 /* Need a free register for GOT references. */
38663 if (flag_pic && !targetm.binds_local_p (function))
38664 return false;
38665
38666 /* Otherwise ok. */
38667 return true;
38668 }
38669
38670 /* Output the assembler code for a thunk function. THUNK_DECL is the
38671 declaration for the thunk function itself, FUNCTION is the decl for
38672 the target function. DELTA is an immediate constant offset to be
38673 added to THIS. If VCALL_OFFSET is nonzero, the word at
38674 *(*this + vcall_offset) should be added to THIS. */
38675
38676 static void
38677 x86_output_mi_thunk (FILE *file,
38678 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
38679 HOST_WIDE_INT vcall_offset, tree function)
38680 {
38681 rtx this_param = x86_this_parameter (function);
38682 rtx this_reg, tmp, fnaddr;
38683 unsigned int tmp_regno;
38684
38685 if (TARGET_64BIT)
38686 tmp_regno = R10_REG;
38687 else
38688 {
38689 unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
38690 if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
38691 tmp_regno = AX_REG;
38692 else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
38693 tmp_regno = DX_REG;
38694 else
38695 tmp_regno = CX_REG;
38696 }
38697
38698 emit_note (NOTE_INSN_PROLOGUE_END);
38699
38700 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
38701 pull it in now and let DELTA benefit. */
38702 if (REG_P (this_param))
38703 this_reg = this_param;
38704 else if (vcall_offset)
38705 {
38706 /* Put the this parameter into %eax. */
38707 this_reg = gen_rtx_REG (Pmode, AX_REG);
38708 emit_move_insn (this_reg, this_param);
38709 }
38710 else
38711 this_reg = NULL_RTX;
38712
38713 /* Adjust the this parameter by a fixed constant. */
38714 if (delta)
38715 {
38716 rtx delta_rtx = GEN_INT (delta);
38717 rtx delta_dst = this_reg ? this_reg : this_param;
38718
38719 if (TARGET_64BIT)
38720 {
38721 if (!x86_64_general_operand (delta_rtx, Pmode))
38722 {
38723 tmp = gen_rtx_REG (Pmode, tmp_regno);
38724 emit_move_insn (tmp, delta_rtx);
38725 delta_rtx = tmp;
38726 }
38727 }
38728
38729 ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
38730 }
38731
38732 /* Adjust the this parameter by a value stored in the vtable. */
38733 if (vcall_offset)
38734 {
38735 rtx vcall_addr, vcall_mem, this_mem;
38736
38737 tmp = gen_rtx_REG (Pmode, tmp_regno);
38738
38739 this_mem = gen_rtx_MEM (ptr_mode, this_reg);
38740 if (Pmode != ptr_mode)
38741 this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
38742 emit_move_insn (tmp, this_mem);
38743
38744 /* Adjust the this parameter. */
38745 vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
38746 if (TARGET_64BIT
38747 && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
38748 {
38749 rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
38750 emit_move_insn (tmp2, GEN_INT (vcall_offset));
38751 vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
38752 }
38753
38754 vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
38755 if (Pmode != ptr_mode)
38756 emit_insn (gen_addsi_1_zext (this_reg,
38757 gen_rtx_REG (ptr_mode,
38758 REGNO (this_reg)),
38759 vcall_mem));
38760 else
38761 ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
38762 }
38763
38764 /* If necessary, drop THIS back to its stack slot. */
38765 if (this_reg && this_reg != this_param)
38766 emit_move_insn (this_param, this_reg);
38767
38768 fnaddr = XEXP (DECL_RTL (function), 0);
38769 if (TARGET_64BIT)
38770 {
38771 if (!flag_pic || targetm.binds_local_p (function)
38772 || TARGET_PECOFF)
38773 ;
38774 else
38775 {
38776 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
38777 tmp = gen_rtx_CONST (Pmode, tmp);
38778 fnaddr = gen_const_mem (Pmode, tmp);
38779 }
38780 }
38781 else
38782 {
38783 if (!flag_pic || targetm.binds_local_p (function))
38784 ;
38785 #if TARGET_MACHO
38786 else if (TARGET_MACHO)
38787 {
38788 fnaddr = machopic_indirect_call_target (DECL_RTL (function));
38789 fnaddr = XEXP (fnaddr, 0);
38790 }
38791 #endif /* TARGET_MACHO */
38792 else
38793 {
38794 tmp = gen_rtx_REG (Pmode, CX_REG);
38795 output_set_got (tmp, NULL_RTX);
38796
38797 fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
38798 fnaddr = gen_rtx_CONST (Pmode, fnaddr);
38799 fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
38800 fnaddr = gen_const_mem (Pmode, fnaddr);
38801 }
38802 }
38803
38804 /* Our sibling call patterns do not allow memories, because we have no
38805 predicate that can distinguish between frame and non-frame memory.
38806 For our purposes here, we can get away with (ab)using a jump pattern,
38807 because we're going to do no optimization. */
38808 if (MEM_P (fnaddr))
38809 emit_jump_insn (gen_indirect_jump (fnaddr));
38810 else
38811 {
38812 if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
38813 fnaddr = legitimize_pic_address (fnaddr,
38814 gen_rtx_REG (Pmode, tmp_regno));
38815
38816 if (!sibcall_insn_operand (fnaddr, word_mode))
38817 {
38818 tmp = gen_rtx_REG (word_mode, tmp_regno);
38819 if (GET_MODE (fnaddr) != word_mode)
38820 fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
38821 emit_move_insn (tmp, fnaddr);
38822 fnaddr = tmp;
38823 }
38824
38825 tmp = gen_rtx_MEM (QImode, fnaddr);
38826 tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
38827 tmp = emit_call_insn (tmp);
38828 SIBLING_CALL_P (tmp) = 1;
38829 }
38830 emit_barrier ();
38831
38832 /* Emit just enough of rest_of_compilation to get the insns emitted.
38833 Note that use_thunk calls assemble_start_function et al. */
38834 tmp = get_insns ();
38835 shorten_branches (tmp);
38836 final_start_function (tmp, file, 1);
38837 final (tmp, file, 1);
38838 final_end_function ();
38839 }
38840
38841 static void
38842 x86_file_start (void)
38843 {
38844 default_file_start ();
38845 if (TARGET_16BIT)
38846 fputs ("\t.code16gcc\n", asm_out_file);
38847 #if TARGET_MACHO
38848 darwin_file_start ();
38849 #endif
38850 if (X86_FILE_START_VERSION_DIRECTIVE)
38851 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
38852 if (X86_FILE_START_FLTUSED)
38853 fputs ("\t.global\t__fltused\n", asm_out_file);
38854 if (ix86_asm_dialect == ASM_INTEL)
38855 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
38856 }
38857
38858 int
38859 x86_field_alignment (tree field, int computed)
38860 {
38861 enum machine_mode mode;
38862 tree type = TREE_TYPE (field);
38863
38864 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
38865 return computed;
38866 mode = TYPE_MODE (strip_array_types (type));
38867 if (mode == DFmode || mode == DCmode
38868 || GET_MODE_CLASS (mode) == MODE_INT
38869 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
38870 return MIN (32, computed);
38871 return computed;
38872 }
38873
38874 /* Output assembler code to FILE to increment profiler label # LABELNO
38875 for profiling a function entry. */
38876 void
38877 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
38878 {
38879 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
38880 : MCOUNT_NAME);
38881
38882 if (TARGET_64BIT)
38883 {
38884 #ifndef NO_PROFILE_COUNTERS
38885 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
38886 #endif
38887
38888 if (!TARGET_PECOFF && flag_pic)
38889 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
38890 else
38891 fprintf (file, "\tcall\t%s\n", mcount_name);
38892 }
38893 else if (flag_pic)
38894 {
38895 #ifndef NO_PROFILE_COUNTERS
38896 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
38897 LPREFIX, labelno);
38898 #endif
38899 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
38900 }
38901 else
38902 {
38903 #ifndef NO_PROFILE_COUNTERS
38904 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
38905 LPREFIX, labelno);
38906 #endif
38907 fprintf (file, "\tcall\t%s\n", mcount_name);
38908 }
38909 }
38910
38911 /* We don't have exact information about the insn sizes, but we may assume
38912 quite safely that we are informed about all 1 byte insns and memory
38913 address sizes. This is enough to eliminate unnecessary padding in
38914 99% of cases. */
38915
38916 static int
38917 min_insn_size (rtx insn)
38918 {
38919 int l = 0, len;
38920
38921 if (!INSN_P (insn) || !active_insn_p (insn))
38922 return 0;
38923
38924 /* Discard alignments we've emit and jump instructions. */
38925 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
38926 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
38927 return 0;
38928
38929 /* Important case - calls are always 5 bytes.
38930 It is common to have many calls in the row. */
38931 if (CALL_P (insn)
38932 && symbolic_reference_mentioned_p (PATTERN (insn))
38933 && !SIBLING_CALL_P (insn))
38934 return 5;
38935 len = get_attr_length (insn);
38936 if (len <= 1)
38937 return 1;
38938
38939 /* For normal instructions we rely on get_attr_length being exact,
38940 with a few exceptions. */
38941 if (!JUMP_P (insn))
38942 {
38943 enum attr_type type = get_attr_type (insn);
38944
38945 switch (type)
38946 {
38947 case TYPE_MULTI:
38948 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
38949 || asm_noperands (PATTERN (insn)) >= 0)
38950 return 0;
38951 break;
38952 case TYPE_OTHER:
38953 case TYPE_FCMP:
38954 break;
38955 default:
38956 /* Otherwise trust get_attr_length. */
38957 return len;
38958 }
38959
38960 l = get_attr_length_address (insn);
38961 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
38962 l = 4;
38963 }
38964 if (l)
38965 return 1+l;
38966 else
38967 return 2;
38968 }
38969
38970 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
38971
38972 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
38973 window. */
38974
38975 static void
38976 ix86_avoid_jump_mispredicts (void)
38977 {
38978 rtx insn, start = get_insns ();
38979 int nbytes = 0, njumps = 0;
38980 int isjump = 0;
38981
38982 /* Look for all minimal intervals of instructions containing 4 jumps.
38983 The intervals are bounded by START and INSN. NBYTES is the total
38984 size of instructions in the interval including INSN and not including
38985 START. When the NBYTES is smaller than 16 bytes, it is possible
38986 that the end of START and INSN ends up in the same 16byte page.
38987
38988 The smallest offset in the page INSN can start is the case where START
38989 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
38990 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
38991
38992 Don't consider asm goto as jump, while it can contain a jump, it doesn't
38993 have to, control transfer to label(s) can be performed through other
38994 means, and also we estimate minimum length of all asm stmts as 0. */
38995 for (insn = start; insn; insn = NEXT_INSN (insn))
38996 {
38997 int min_size;
38998
38999 if (LABEL_P (insn))
39000 {
39001 int align = label_to_alignment (insn);
39002 int max_skip = label_to_max_skip (insn);
39003
39004 if (max_skip > 15)
39005 max_skip = 15;
39006 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
39007 already in the current 16 byte page, because otherwise
39008 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
39009 bytes to reach 16 byte boundary. */
39010 if (align <= 0
39011 || (align <= 3 && max_skip != (1 << align) - 1))
39012 max_skip = 0;
39013 if (dump_file)
39014 fprintf (dump_file, "Label %i with max_skip %i\n",
39015 INSN_UID (insn), max_skip);
39016 if (max_skip)
39017 {
39018 while (nbytes + max_skip >= 16)
39019 {
39020 start = NEXT_INSN (start);
39021 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39022 || CALL_P (start))
39023 njumps--, isjump = 1;
39024 else
39025 isjump = 0;
39026 nbytes -= min_insn_size (start);
39027 }
39028 }
39029 continue;
39030 }
39031
39032 min_size = min_insn_size (insn);
39033 nbytes += min_size;
39034 if (dump_file)
39035 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
39036 INSN_UID (insn), min_size);
39037 if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
39038 || CALL_P (insn))
39039 njumps++;
39040 else
39041 continue;
39042
39043 while (njumps > 3)
39044 {
39045 start = NEXT_INSN (start);
39046 if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
39047 || CALL_P (start))
39048 njumps--, isjump = 1;
39049 else
39050 isjump = 0;
39051 nbytes -= min_insn_size (start);
39052 }
39053 gcc_assert (njumps >= 0);
39054 if (dump_file)
39055 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
39056 INSN_UID (start), INSN_UID (insn), nbytes);
39057
39058 if (njumps == 3 && isjump && nbytes < 16)
39059 {
39060 int padsize = 15 - nbytes + min_insn_size (insn);
39061
39062 if (dump_file)
39063 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
39064 INSN_UID (insn), padsize);
39065 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
39066 }
39067 }
39068 }
39069 #endif
39070
39071 /* AMD Athlon works faster
39072 when RET is not destination of conditional jump or directly preceded
39073 by other jump instruction. We avoid the penalty by inserting NOP just
39074 before the RET instructions in such cases. */
39075 static void
39076 ix86_pad_returns (void)
39077 {
39078 edge e;
39079 edge_iterator ei;
39080
39081 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39082 {
39083 basic_block bb = e->src;
39084 rtx ret = BB_END (bb);
39085 rtx prev;
39086 bool replace = false;
39087
39088 if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
39089 || optimize_bb_for_size_p (bb))
39090 continue;
39091 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
39092 if (active_insn_p (prev) || LABEL_P (prev))
39093 break;
39094 if (prev && LABEL_P (prev))
39095 {
39096 edge e;
39097 edge_iterator ei;
39098
39099 FOR_EACH_EDGE (e, ei, bb->preds)
39100 if (EDGE_FREQUENCY (e) && e->src->index >= 0
39101 && !(e->flags & EDGE_FALLTHRU))
39102 {
39103 replace = true;
39104 break;
39105 }
39106 }
39107 if (!replace)
39108 {
39109 prev = prev_active_insn (ret);
39110 if (prev
39111 && ((JUMP_P (prev) && any_condjump_p (prev))
39112 || CALL_P (prev)))
39113 replace = true;
39114 /* Empty functions get branch mispredict even when
39115 the jump destination is not visible to us. */
39116 if (!prev && !optimize_function_for_size_p (cfun))
39117 replace = true;
39118 }
39119 if (replace)
39120 {
39121 emit_jump_insn_before (gen_simple_return_internal_long (), ret);
39122 delete_insn (ret);
39123 }
39124 }
39125 }
39126
39127 /* Count the minimum number of instructions in BB. Return 4 if the
39128 number of instructions >= 4. */
39129
39130 static int
39131 ix86_count_insn_bb (basic_block bb)
39132 {
39133 rtx insn;
39134 int insn_count = 0;
39135
39136 /* Count number of instructions in this block. Return 4 if the number
39137 of instructions >= 4. */
39138 FOR_BB_INSNS (bb, insn)
39139 {
39140 /* Only happen in exit blocks. */
39141 if (JUMP_P (insn)
39142 && ANY_RETURN_P (PATTERN (insn)))
39143 break;
39144
39145 if (NONDEBUG_INSN_P (insn)
39146 && GET_CODE (PATTERN (insn)) != USE
39147 && GET_CODE (PATTERN (insn)) != CLOBBER)
39148 {
39149 insn_count++;
39150 if (insn_count >= 4)
39151 return insn_count;
39152 }
39153 }
39154
39155 return insn_count;
39156 }
39157
39158
39159 /* Count the minimum number of instructions in code path in BB.
39160 Return 4 if the number of instructions >= 4. */
39161
39162 static int
39163 ix86_count_insn (basic_block bb)
39164 {
39165 edge e;
39166 edge_iterator ei;
39167 int min_prev_count;
39168
39169 /* Only bother counting instructions along paths with no
39170 more than 2 basic blocks between entry and exit. Given
39171 that BB has an edge to exit, determine if a predecessor
39172 of BB has an edge from entry. If so, compute the number
39173 of instructions in the predecessor block. If there
39174 happen to be multiple such blocks, compute the minimum. */
39175 min_prev_count = 4;
39176 FOR_EACH_EDGE (e, ei, bb->preds)
39177 {
39178 edge prev_e;
39179 edge_iterator prev_ei;
39180
39181 if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39182 {
39183 min_prev_count = 0;
39184 break;
39185 }
39186 FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
39187 {
39188 if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
39189 {
39190 int count = ix86_count_insn_bb (e->src);
39191 if (count < min_prev_count)
39192 min_prev_count = count;
39193 break;
39194 }
39195 }
39196 }
39197
39198 if (min_prev_count < 4)
39199 min_prev_count += ix86_count_insn_bb (bb);
39200
39201 return min_prev_count;
39202 }
39203
39204 /* Pad short function to 4 instructions. */
39205
39206 static void
39207 ix86_pad_short_function (void)
39208 {
39209 edge e;
39210 edge_iterator ei;
39211
39212 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39213 {
39214 rtx ret = BB_END (e->src);
39215 if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
39216 {
39217 int insn_count = ix86_count_insn (e->src);
39218
39219 /* Pad short function. */
39220 if (insn_count < 4)
39221 {
39222 rtx insn = ret;
39223
39224 /* Find epilogue. */
39225 while (insn
39226 && (!NOTE_P (insn)
39227 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
39228 insn = PREV_INSN (insn);
39229
39230 if (!insn)
39231 insn = ret;
39232
39233 /* Two NOPs count as one instruction. */
39234 insn_count = 2 * (4 - insn_count);
39235 emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
39236 }
39237 }
39238 }
39239 }
39240
39241 /* Fix up a Windows system unwinder issue. If an EH region falls through into
39242 the epilogue, the Windows system unwinder will apply epilogue logic and
39243 produce incorrect offsets. This can be avoided by adding a nop between
39244 the last insn that can throw and the first insn of the epilogue. */
39245
39246 static void
39247 ix86_seh_fixup_eh_fallthru (void)
39248 {
39249 edge e;
39250 edge_iterator ei;
39251
39252 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
39253 {
39254 rtx insn, next;
39255
39256 /* Find the beginning of the epilogue. */
39257 for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
39258 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
39259 break;
39260 if (insn == NULL)
39261 continue;
39262
39263 /* We only care about preceding insns that can throw. */
39264 insn = prev_active_insn (insn);
39265 if (insn == NULL || !can_throw_internal (insn))
39266 continue;
39267
39268 /* Do not separate calls from their debug information. */
39269 for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
39270 if (NOTE_P (next)
39271 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
39272 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
39273 insn = next;
39274 else
39275 break;
39276
39277 emit_insn_after (gen_nops (const1_rtx), insn);
39278 }
39279 }
39280
39281 /* Implement machine specific optimizations. We implement padding of returns
39282 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
39283 static void
39284 ix86_reorg (void)
39285 {
39286 /* We are freeing block_for_insn in the toplev to keep compatibility
39287 with old MDEP_REORGS that are not CFG based. Recompute it now. */
39288 compute_bb_for_insn ();
39289
39290 if (TARGET_SEH && current_function_has_exception_handlers ())
39291 ix86_seh_fixup_eh_fallthru ();
39292
39293 if (optimize && optimize_function_for_speed_p (cfun))
39294 {
39295 if (TARGET_PAD_SHORT_FUNCTION)
39296 ix86_pad_short_function ();
39297 else if (TARGET_PAD_RETURNS)
39298 ix86_pad_returns ();
39299 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
39300 if (TARGET_FOUR_JUMP_LIMIT)
39301 ix86_avoid_jump_mispredicts ();
39302 #endif
39303 }
39304 }
39305
39306 /* Return nonzero when QImode register that must be represented via REX prefix
39307 is used. */
39308 bool
39309 x86_extended_QIreg_mentioned_p (rtx insn)
39310 {
39311 int i;
39312 extract_insn_cached (insn);
39313 for (i = 0; i < recog_data.n_operands; i++)
39314 if (GENERAL_REG_P (recog_data.operand[i])
39315 && !QI_REGNO_P (REGNO (recog_data.operand[i])))
39316 return true;
39317 return false;
39318 }
39319
39320 /* Return nonzero when P points to register encoded via REX prefix.
39321 Called via for_each_rtx. */
39322 static int
39323 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
39324 {
39325 unsigned int regno;
39326 if (!REG_P (*p))
39327 return 0;
39328 regno = REGNO (*p);
39329 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
39330 }
39331
39332 /* Return true when INSN mentions register that must be encoded using REX
39333 prefix. */
39334 bool
39335 x86_extended_reg_mentioned_p (rtx insn)
39336 {
39337 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
39338 extended_reg_mentioned_1, NULL);
39339 }
39340
39341 /* If profitable, negate (without causing overflow) integer constant
39342 of mode MODE at location LOC. Return true in this case. */
39343 bool
39344 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
39345 {
39346 HOST_WIDE_INT val;
39347
39348 if (!CONST_INT_P (*loc))
39349 return false;
39350
39351 switch (mode)
39352 {
39353 case DImode:
39354 /* DImode x86_64 constants must fit in 32 bits. */
39355 gcc_assert (x86_64_immediate_operand (*loc, mode));
39356
39357 mode = SImode;
39358 break;
39359
39360 case SImode:
39361 case HImode:
39362 case QImode:
39363 break;
39364
39365 default:
39366 gcc_unreachable ();
39367 }
39368
39369 /* Avoid overflows. */
39370 if (mode_signbit_p (mode, *loc))
39371 return false;
39372
39373 val = INTVAL (*loc);
39374
39375 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
39376 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
39377 if ((val < 0 && val != -128)
39378 || val == 128)
39379 {
39380 *loc = GEN_INT (-val);
39381 return true;
39382 }
39383
39384 return false;
39385 }
39386
39387 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
39388 optabs would emit if we didn't have TFmode patterns. */
39389
39390 void
39391 x86_emit_floatuns (rtx operands[2])
39392 {
39393 rtx neglab, donelab, i0, i1, f0, in, out;
39394 enum machine_mode mode, inmode;
39395
39396 inmode = GET_MODE (operands[1]);
39397 gcc_assert (inmode == SImode || inmode == DImode);
39398
39399 out = operands[0];
39400 in = force_reg (inmode, operands[1]);
39401 mode = GET_MODE (out);
39402 neglab = gen_label_rtx ();
39403 donelab = gen_label_rtx ();
39404 f0 = gen_reg_rtx (mode);
39405
39406 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
39407
39408 expand_float (out, in, 0);
39409
39410 emit_jump_insn (gen_jump (donelab));
39411 emit_barrier ();
39412
39413 emit_label (neglab);
39414
39415 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
39416 1, OPTAB_DIRECT);
39417 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
39418 1, OPTAB_DIRECT);
39419 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
39420
39421 expand_float (f0, i0, 0);
39422
39423 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
39424
39425 emit_label (donelab);
39426 }
39427 \f
39428 /* AVX512F does support 64-byte integer vector operations,
39429 thus the longest vector we are faced with is V64QImode. */
39430 #define MAX_VECT_LEN 64
39431
39432 struct expand_vec_perm_d
39433 {
39434 rtx target, op0, op1;
39435 unsigned char perm[MAX_VECT_LEN];
39436 enum machine_mode vmode;
39437 unsigned char nelt;
39438 bool one_operand_p;
39439 bool testing_p;
39440 };
39441
39442 static bool canonicalize_perm (struct expand_vec_perm_d *d);
39443 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
39444 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
39445
39446 /* Get a vector mode of the same size as the original but with elements
39447 twice as wide. This is only guaranteed to apply to integral vectors. */
39448
39449 static inline enum machine_mode
39450 get_mode_wider_vector (enum machine_mode o)
39451 {
39452 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
39453 enum machine_mode n = GET_MODE_WIDER_MODE (o);
39454 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
39455 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
39456 return n;
39457 }
39458
39459 /* A subroutine of ix86_expand_vector_init_duplicate. Tries to
39460 fill target with val via vec_duplicate. */
39461
39462 static bool
39463 ix86_vector_duplicate_value (enum machine_mode mode, rtx target, rtx val)
39464 {
39465 bool ok;
39466 rtx insn, dup;
39467
39468 /* First attempt to recognize VAL as-is. */
39469 dup = gen_rtx_VEC_DUPLICATE (mode, val);
39470 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
39471 if (recog_memoized (insn) < 0)
39472 {
39473 rtx seq;
39474 /* If that fails, force VAL into a register. */
39475
39476 start_sequence ();
39477 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
39478 seq = get_insns ();
39479 end_sequence ();
39480 if (seq)
39481 emit_insn_before (seq, insn);
39482
39483 ok = recog_memoized (insn) >= 0;
39484 gcc_assert (ok);
39485 }
39486 return true;
39487 }
39488
39489 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39490 with all elements equal to VAR. Return true if successful. */
39491
39492 static bool
39493 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
39494 rtx target, rtx val)
39495 {
39496 bool ok;
39497
39498 switch (mode)
39499 {
39500 case V2SImode:
39501 case V2SFmode:
39502 if (!mmx_ok)
39503 return false;
39504 /* FALLTHRU */
39505
39506 case V4DFmode:
39507 case V4DImode:
39508 case V8SFmode:
39509 case V8SImode:
39510 case V2DFmode:
39511 case V2DImode:
39512 case V4SFmode:
39513 case V4SImode:
39514 case V16SImode:
39515 case V8DImode:
39516 case V16SFmode:
39517 case V8DFmode:
39518 return ix86_vector_duplicate_value (mode, target, val);
39519
39520 case V4HImode:
39521 if (!mmx_ok)
39522 return false;
39523 if (TARGET_SSE || TARGET_3DNOW_A)
39524 {
39525 rtx x;
39526
39527 val = gen_lowpart (SImode, val);
39528 x = gen_rtx_TRUNCATE (HImode, val);
39529 x = gen_rtx_VEC_DUPLICATE (mode, x);
39530 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39531 return true;
39532 }
39533 goto widen;
39534
39535 case V8QImode:
39536 if (!mmx_ok)
39537 return false;
39538 goto widen;
39539
39540 case V8HImode:
39541 if (TARGET_SSE2)
39542 {
39543 struct expand_vec_perm_d dperm;
39544 rtx tmp1, tmp2;
39545
39546 permute:
39547 memset (&dperm, 0, sizeof (dperm));
39548 dperm.target = target;
39549 dperm.vmode = mode;
39550 dperm.nelt = GET_MODE_NUNITS (mode);
39551 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
39552 dperm.one_operand_p = true;
39553
39554 /* Extend to SImode using a paradoxical SUBREG. */
39555 tmp1 = gen_reg_rtx (SImode);
39556 emit_move_insn (tmp1, gen_lowpart (SImode, val));
39557
39558 /* Insert the SImode value as low element of a V4SImode vector. */
39559 tmp2 = gen_reg_rtx (V4SImode);
39560 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
39561 emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
39562
39563 ok = (expand_vec_perm_1 (&dperm)
39564 || expand_vec_perm_broadcast_1 (&dperm));
39565 gcc_assert (ok);
39566 return ok;
39567 }
39568 goto widen;
39569
39570 case V16QImode:
39571 if (TARGET_SSE2)
39572 goto permute;
39573 goto widen;
39574
39575 widen:
39576 /* Replicate the value once into the next wider mode and recurse. */
39577 {
39578 enum machine_mode smode, wsmode, wvmode;
39579 rtx x;
39580
39581 smode = GET_MODE_INNER (mode);
39582 wvmode = get_mode_wider_vector (mode);
39583 wsmode = GET_MODE_INNER (wvmode);
39584
39585 val = convert_modes (wsmode, smode, val, true);
39586 x = expand_simple_binop (wsmode, ASHIFT, val,
39587 GEN_INT (GET_MODE_BITSIZE (smode)),
39588 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39589 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
39590
39591 x = gen_reg_rtx (wvmode);
39592 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
39593 gcc_assert (ok);
39594 emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
39595 return ok;
39596 }
39597
39598 case V16HImode:
39599 case V32QImode:
39600 {
39601 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
39602 rtx x = gen_reg_rtx (hvmode);
39603
39604 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
39605 gcc_assert (ok);
39606
39607 x = gen_rtx_VEC_CONCAT (mode, x, x);
39608 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39609 }
39610 return true;
39611
39612 default:
39613 return false;
39614 }
39615 }
39616
39617 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39618 whose ONE_VAR element is VAR, and other elements are zero. Return true
39619 if successful. */
39620
39621 static bool
39622 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
39623 rtx target, rtx var, int one_var)
39624 {
39625 enum machine_mode vsimode;
39626 rtx new_target;
39627 rtx x, tmp;
39628 bool use_vector_set = false;
39629
39630 switch (mode)
39631 {
39632 case V2DImode:
39633 /* For SSE4.1, we normally use vector set. But if the second
39634 element is zero and inter-unit moves are OK, we use movq
39635 instead. */
39636 use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
39637 && !(TARGET_INTER_UNIT_MOVES_TO_VEC
39638 && one_var == 0));
39639 break;
39640 case V16QImode:
39641 case V4SImode:
39642 case V4SFmode:
39643 use_vector_set = TARGET_SSE4_1;
39644 break;
39645 case V8HImode:
39646 use_vector_set = TARGET_SSE2;
39647 break;
39648 case V4HImode:
39649 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
39650 break;
39651 case V32QImode:
39652 case V16HImode:
39653 case V8SImode:
39654 case V8SFmode:
39655 case V4DFmode:
39656 use_vector_set = TARGET_AVX;
39657 break;
39658 case V4DImode:
39659 /* Use ix86_expand_vector_set in 64bit mode only. */
39660 use_vector_set = TARGET_AVX && TARGET_64BIT;
39661 break;
39662 default:
39663 break;
39664 }
39665
39666 if (use_vector_set)
39667 {
39668 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
39669 var = force_reg (GET_MODE_INNER (mode), var);
39670 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39671 return true;
39672 }
39673
39674 switch (mode)
39675 {
39676 case V2SFmode:
39677 case V2SImode:
39678 if (!mmx_ok)
39679 return false;
39680 /* FALLTHRU */
39681
39682 case V2DFmode:
39683 case V2DImode:
39684 if (one_var != 0)
39685 return false;
39686 var = force_reg (GET_MODE_INNER (mode), var);
39687 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
39688 emit_insn (gen_rtx_SET (VOIDmode, target, x));
39689 return true;
39690
39691 case V4SFmode:
39692 case V4SImode:
39693 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
39694 new_target = gen_reg_rtx (mode);
39695 else
39696 new_target = target;
39697 var = force_reg (GET_MODE_INNER (mode), var);
39698 x = gen_rtx_VEC_DUPLICATE (mode, var);
39699 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
39700 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
39701 if (one_var != 0)
39702 {
39703 /* We need to shuffle the value to the correct position, so
39704 create a new pseudo to store the intermediate result. */
39705
39706 /* With SSE2, we can use the integer shuffle insns. */
39707 if (mode != V4SFmode && TARGET_SSE2)
39708 {
39709 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
39710 const1_rtx,
39711 GEN_INT (one_var == 1 ? 0 : 1),
39712 GEN_INT (one_var == 2 ? 0 : 1),
39713 GEN_INT (one_var == 3 ? 0 : 1)));
39714 if (target != new_target)
39715 emit_move_insn (target, new_target);
39716 return true;
39717 }
39718
39719 /* Otherwise convert the intermediate result to V4SFmode and
39720 use the SSE1 shuffle instructions. */
39721 if (mode != V4SFmode)
39722 {
39723 tmp = gen_reg_rtx (V4SFmode);
39724 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
39725 }
39726 else
39727 tmp = new_target;
39728
39729 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
39730 const1_rtx,
39731 GEN_INT (one_var == 1 ? 0 : 1),
39732 GEN_INT (one_var == 2 ? 0+4 : 1+4),
39733 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
39734
39735 if (mode != V4SFmode)
39736 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
39737 else if (tmp != target)
39738 emit_move_insn (target, tmp);
39739 }
39740 else if (target != new_target)
39741 emit_move_insn (target, new_target);
39742 return true;
39743
39744 case V8HImode:
39745 case V16QImode:
39746 vsimode = V4SImode;
39747 goto widen;
39748 case V4HImode:
39749 case V8QImode:
39750 if (!mmx_ok)
39751 return false;
39752 vsimode = V2SImode;
39753 goto widen;
39754 widen:
39755 if (one_var != 0)
39756 return false;
39757
39758 /* Zero extend the variable element to SImode and recurse. */
39759 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
39760
39761 x = gen_reg_rtx (vsimode);
39762 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
39763 var, one_var))
39764 gcc_unreachable ();
39765
39766 emit_move_insn (target, gen_lowpart (mode, x));
39767 return true;
39768
39769 default:
39770 return false;
39771 }
39772 }
39773
39774 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
39775 consisting of the values in VALS. It is known that all elements
39776 except ONE_VAR are constants. Return true if successful. */
39777
39778 static bool
39779 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
39780 rtx target, rtx vals, int one_var)
39781 {
39782 rtx var = XVECEXP (vals, 0, one_var);
39783 enum machine_mode wmode;
39784 rtx const_vec, x;
39785
39786 const_vec = copy_rtx (vals);
39787 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
39788 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
39789
39790 switch (mode)
39791 {
39792 case V2DFmode:
39793 case V2DImode:
39794 case V2SFmode:
39795 case V2SImode:
39796 /* For the two element vectors, it's just as easy to use
39797 the general case. */
39798 return false;
39799
39800 case V4DImode:
39801 /* Use ix86_expand_vector_set in 64bit mode only. */
39802 if (!TARGET_64BIT)
39803 return false;
39804 case V4DFmode:
39805 case V8SFmode:
39806 case V8SImode:
39807 case V16HImode:
39808 case V32QImode:
39809 case V4SFmode:
39810 case V4SImode:
39811 case V8HImode:
39812 case V4HImode:
39813 break;
39814
39815 case V16QImode:
39816 if (TARGET_SSE4_1)
39817 break;
39818 wmode = V8HImode;
39819 goto widen;
39820 case V8QImode:
39821 wmode = V4HImode;
39822 goto widen;
39823 widen:
39824 /* There's no way to set one QImode entry easily. Combine
39825 the variable value with its adjacent constant value, and
39826 promote to an HImode set. */
39827 x = XVECEXP (vals, 0, one_var ^ 1);
39828 if (one_var & 1)
39829 {
39830 var = convert_modes (HImode, QImode, var, true);
39831 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
39832 NULL_RTX, 1, OPTAB_LIB_WIDEN);
39833 x = GEN_INT (INTVAL (x) & 0xff);
39834 }
39835 else
39836 {
39837 var = convert_modes (HImode, QImode, var, true);
39838 x = gen_int_mode (INTVAL (x) << 8, HImode);
39839 }
39840 if (x != const0_rtx)
39841 var = expand_simple_binop (HImode, IOR, var, x, var,
39842 1, OPTAB_LIB_WIDEN);
39843
39844 x = gen_reg_rtx (wmode);
39845 emit_move_insn (x, gen_lowpart (wmode, const_vec));
39846 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
39847
39848 emit_move_insn (target, gen_lowpart (mode, x));
39849 return true;
39850
39851 default:
39852 return false;
39853 }
39854
39855 emit_move_insn (target, const_vec);
39856 ix86_expand_vector_set (mmx_ok, target, var, one_var);
39857 return true;
39858 }
39859
39860 /* A subroutine of ix86_expand_vector_init_general. Use vector
39861 concatenate to handle the most general case: all values variable,
39862 and none identical. */
39863
39864 static void
39865 ix86_expand_vector_init_concat (enum machine_mode mode,
39866 rtx target, rtx *ops, int n)
39867 {
39868 enum machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
39869 rtx first[16], second[8], third[4];
39870 rtvec v;
39871 int i, j;
39872
39873 switch (n)
39874 {
39875 case 2:
39876 switch (mode)
39877 {
39878 case V16SImode:
39879 cmode = V8SImode;
39880 break;
39881 case V16SFmode:
39882 cmode = V8SFmode;
39883 break;
39884 case V8DImode:
39885 cmode = V4DImode;
39886 break;
39887 case V8DFmode:
39888 cmode = V4DFmode;
39889 break;
39890 case V8SImode:
39891 cmode = V4SImode;
39892 break;
39893 case V8SFmode:
39894 cmode = V4SFmode;
39895 break;
39896 case V4DImode:
39897 cmode = V2DImode;
39898 break;
39899 case V4DFmode:
39900 cmode = V2DFmode;
39901 break;
39902 case V4SImode:
39903 cmode = V2SImode;
39904 break;
39905 case V4SFmode:
39906 cmode = V2SFmode;
39907 break;
39908 case V2DImode:
39909 cmode = DImode;
39910 break;
39911 case V2SImode:
39912 cmode = SImode;
39913 break;
39914 case V2DFmode:
39915 cmode = DFmode;
39916 break;
39917 case V2SFmode:
39918 cmode = SFmode;
39919 break;
39920 default:
39921 gcc_unreachable ();
39922 }
39923
39924 if (!register_operand (ops[1], cmode))
39925 ops[1] = force_reg (cmode, ops[1]);
39926 if (!register_operand (ops[0], cmode))
39927 ops[0] = force_reg (cmode, ops[0]);
39928 emit_insn (gen_rtx_SET (VOIDmode, target,
39929 gen_rtx_VEC_CONCAT (mode, ops[0],
39930 ops[1])));
39931 break;
39932
39933 case 4:
39934 switch (mode)
39935 {
39936 case V4DImode:
39937 cmode = V2DImode;
39938 break;
39939 case V4DFmode:
39940 cmode = V2DFmode;
39941 break;
39942 case V4SImode:
39943 cmode = V2SImode;
39944 break;
39945 case V4SFmode:
39946 cmode = V2SFmode;
39947 break;
39948 default:
39949 gcc_unreachable ();
39950 }
39951 goto half;
39952
39953 case 8:
39954 switch (mode)
39955 {
39956 case V8DImode:
39957 cmode = V2DImode;
39958 hmode = V4DImode;
39959 break;
39960 case V8DFmode:
39961 cmode = V2DFmode;
39962 hmode = V4DFmode;
39963 break;
39964 case V8SImode:
39965 cmode = V2SImode;
39966 hmode = V4SImode;
39967 break;
39968 case V8SFmode:
39969 cmode = V2SFmode;
39970 hmode = V4SFmode;
39971 break;
39972 default:
39973 gcc_unreachable ();
39974 }
39975 goto half;
39976
39977 case 16:
39978 switch (mode)
39979 {
39980 case V16SImode:
39981 cmode = V2SImode;
39982 hmode = V4SImode;
39983 gmode = V8SImode;
39984 break;
39985 case V16SFmode:
39986 cmode = V2SFmode;
39987 hmode = V4SFmode;
39988 gmode = V8SFmode;
39989 break;
39990 default:
39991 gcc_unreachable ();
39992 }
39993 goto half;
39994
39995 half:
39996 /* FIXME: We process inputs backward to help RA. PR 36222. */
39997 i = n - 1;
39998 j = (n >> 1) - 1;
39999 for (; i > 0; i -= 2, j--)
40000 {
40001 first[j] = gen_reg_rtx (cmode);
40002 v = gen_rtvec (2, ops[i - 1], ops[i]);
40003 ix86_expand_vector_init (false, first[j],
40004 gen_rtx_PARALLEL (cmode, v));
40005 }
40006
40007 n >>= 1;
40008 if (n > 4)
40009 {
40010 gcc_assert (hmode != VOIDmode);
40011 gcc_assert (gmode != VOIDmode);
40012 for (i = j = 0; i < n; i += 2, j++)
40013 {
40014 second[j] = gen_reg_rtx (hmode);
40015 ix86_expand_vector_init_concat (hmode, second [j],
40016 &first [i], 2);
40017 }
40018 n >>= 1;
40019 for (i = j = 0; i < n; i += 2, j++)
40020 {
40021 third[j] = gen_reg_rtx (gmode);
40022 ix86_expand_vector_init_concat (gmode, third[j],
40023 &second[i], 2);
40024 }
40025 n >>= 1;
40026 ix86_expand_vector_init_concat (mode, target, third, n);
40027 }
40028 else if (n > 2)
40029 {
40030 gcc_assert (hmode != VOIDmode);
40031 for (i = j = 0; i < n; i += 2, j++)
40032 {
40033 second[j] = gen_reg_rtx (hmode);
40034 ix86_expand_vector_init_concat (hmode, second [j],
40035 &first [i], 2);
40036 }
40037 n >>= 1;
40038 ix86_expand_vector_init_concat (mode, target, second, n);
40039 }
40040 else
40041 ix86_expand_vector_init_concat (mode, target, first, n);
40042 break;
40043
40044 default:
40045 gcc_unreachable ();
40046 }
40047 }
40048
40049 /* A subroutine of ix86_expand_vector_init_general. Use vector
40050 interleave to handle the most general case: all values variable,
40051 and none identical. */
40052
40053 static void
40054 ix86_expand_vector_init_interleave (enum machine_mode mode,
40055 rtx target, rtx *ops, int n)
40056 {
40057 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
40058 int i, j;
40059 rtx op0, op1;
40060 rtx (*gen_load_even) (rtx, rtx, rtx);
40061 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
40062 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
40063
40064 switch (mode)
40065 {
40066 case V8HImode:
40067 gen_load_even = gen_vec_setv8hi;
40068 gen_interleave_first_low = gen_vec_interleave_lowv4si;
40069 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40070 inner_mode = HImode;
40071 first_imode = V4SImode;
40072 second_imode = V2DImode;
40073 third_imode = VOIDmode;
40074 break;
40075 case V16QImode:
40076 gen_load_even = gen_vec_setv16qi;
40077 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
40078 gen_interleave_second_low = gen_vec_interleave_lowv4si;
40079 inner_mode = QImode;
40080 first_imode = V8HImode;
40081 second_imode = V4SImode;
40082 third_imode = V2DImode;
40083 break;
40084 default:
40085 gcc_unreachable ();
40086 }
40087
40088 for (i = 0; i < n; i++)
40089 {
40090 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
40091 op0 = gen_reg_rtx (SImode);
40092 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
40093
40094 /* Insert the SImode value as low element of V4SImode vector. */
40095 op1 = gen_reg_rtx (V4SImode);
40096 op0 = gen_rtx_VEC_MERGE (V4SImode,
40097 gen_rtx_VEC_DUPLICATE (V4SImode,
40098 op0),
40099 CONST0_RTX (V4SImode),
40100 const1_rtx);
40101 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
40102
40103 /* Cast the V4SImode vector back to a vector in orignal mode. */
40104 op0 = gen_reg_rtx (mode);
40105 emit_move_insn (op0, gen_lowpart (mode, op1));
40106
40107 /* Load even elements into the second position. */
40108 emit_insn (gen_load_even (op0,
40109 force_reg (inner_mode,
40110 ops [i + i + 1]),
40111 const1_rtx));
40112
40113 /* Cast vector to FIRST_IMODE vector. */
40114 ops[i] = gen_reg_rtx (first_imode);
40115 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
40116 }
40117
40118 /* Interleave low FIRST_IMODE vectors. */
40119 for (i = j = 0; i < n; i += 2, j++)
40120 {
40121 op0 = gen_reg_rtx (first_imode);
40122 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
40123
40124 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
40125 ops[j] = gen_reg_rtx (second_imode);
40126 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
40127 }
40128
40129 /* Interleave low SECOND_IMODE vectors. */
40130 switch (second_imode)
40131 {
40132 case V4SImode:
40133 for (i = j = 0; i < n / 2; i += 2, j++)
40134 {
40135 op0 = gen_reg_rtx (second_imode);
40136 emit_insn (gen_interleave_second_low (op0, ops[i],
40137 ops[i + 1]));
40138
40139 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
40140 vector. */
40141 ops[j] = gen_reg_rtx (third_imode);
40142 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
40143 }
40144 second_imode = V2DImode;
40145 gen_interleave_second_low = gen_vec_interleave_lowv2di;
40146 /* FALLTHRU */
40147
40148 case V2DImode:
40149 op0 = gen_reg_rtx (second_imode);
40150 emit_insn (gen_interleave_second_low (op0, ops[0],
40151 ops[1]));
40152
40153 /* Cast the SECOND_IMODE vector back to a vector on original
40154 mode. */
40155 emit_insn (gen_rtx_SET (VOIDmode, target,
40156 gen_lowpart (mode, op0)));
40157 break;
40158
40159 default:
40160 gcc_unreachable ();
40161 }
40162 }
40163
40164 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
40165 all values variable, and none identical. */
40166
40167 static void
40168 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
40169 rtx target, rtx vals)
40170 {
40171 rtx ops[64], op0, op1;
40172 enum machine_mode half_mode = VOIDmode;
40173 int n, i;
40174
40175 switch (mode)
40176 {
40177 case V2SFmode:
40178 case V2SImode:
40179 if (!mmx_ok && !TARGET_SSE)
40180 break;
40181 /* FALLTHRU */
40182
40183 case V16SImode:
40184 case V16SFmode:
40185 case V8DFmode:
40186 case V8DImode:
40187 case V8SFmode:
40188 case V8SImode:
40189 case V4DFmode:
40190 case V4DImode:
40191 case V4SFmode:
40192 case V4SImode:
40193 case V2DFmode:
40194 case V2DImode:
40195 n = GET_MODE_NUNITS (mode);
40196 for (i = 0; i < n; i++)
40197 ops[i] = XVECEXP (vals, 0, i);
40198 ix86_expand_vector_init_concat (mode, target, ops, n);
40199 return;
40200
40201 case V32QImode:
40202 half_mode = V16QImode;
40203 goto half;
40204
40205 case V16HImode:
40206 half_mode = V8HImode;
40207 goto half;
40208
40209 half:
40210 n = GET_MODE_NUNITS (mode);
40211 for (i = 0; i < n; i++)
40212 ops[i] = XVECEXP (vals, 0, i);
40213 op0 = gen_reg_rtx (half_mode);
40214 op1 = gen_reg_rtx (half_mode);
40215 ix86_expand_vector_init_interleave (half_mode, op0, ops,
40216 n >> 2);
40217 ix86_expand_vector_init_interleave (half_mode, op1,
40218 &ops [n >> 1], n >> 2);
40219 emit_insn (gen_rtx_SET (VOIDmode, target,
40220 gen_rtx_VEC_CONCAT (mode, op0, op1)));
40221 return;
40222
40223 case V16QImode:
40224 if (!TARGET_SSE4_1)
40225 break;
40226 /* FALLTHRU */
40227
40228 case V8HImode:
40229 if (!TARGET_SSE2)
40230 break;
40231
40232 /* Don't use ix86_expand_vector_init_interleave if we can't
40233 move from GPR to SSE register directly. */
40234 if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
40235 break;
40236
40237 n = GET_MODE_NUNITS (mode);
40238 for (i = 0; i < n; i++)
40239 ops[i] = XVECEXP (vals, 0, i);
40240 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
40241 return;
40242
40243 case V4HImode:
40244 case V8QImode:
40245 break;
40246
40247 default:
40248 gcc_unreachable ();
40249 }
40250
40251 {
40252 int i, j, n_elts, n_words, n_elt_per_word;
40253 enum machine_mode inner_mode;
40254 rtx words[4], shift;
40255
40256 inner_mode = GET_MODE_INNER (mode);
40257 n_elts = GET_MODE_NUNITS (mode);
40258 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
40259 n_elt_per_word = n_elts / n_words;
40260 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
40261
40262 for (i = 0; i < n_words; ++i)
40263 {
40264 rtx word = NULL_RTX;
40265
40266 for (j = 0; j < n_elt_per_word; ++j)
40267 {
40268 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
40269 elt = convert_modes (word_mode, inner_mode, elt, true);
40270
40271 if (j == 0)
40272 word = elt;
40273 else
40274 {
40275 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
40276 word, 1, OPTAB_LIB_WIDEN);
40277 word = expand_simple_binop (word_mode, IOR, word, elt,
40278 word, 1, OPTAB_LIB_WIDEN);
40279 }
40280 }
40281
40282 words[i] = word;
40283 }
40284
40285 if (n_words == 1)
40286 emit_move_insn (target, gen_lowpart (mode, words[0]));
40287 else if (n_words == 2)
40288 {
40289 rtx tmp = gen_reg_rtx (mode);
40290 emit_clobber (tmp);
40291 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
40292 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
40293 emit_move_insn (target, tmp);
40294 }
40295 else if (n_words == 4)
40296 {
40297 rtx tmp = gen_reg_rtx (V4SImode);
40298 gcc_assert (word_mode == SImode);
40299 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
40300 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
40301 emit_move_insn (target, gen_lowpart (mode, tmp));
40302 }
40303 else
40304 gcc_unreachable ();
40305 }
40306 }
40307
40308 /* Initialize vector TARGET via VALS. Suppress the use of MMX
40309 instructions unless MMX_OK is true. */
40310
40311 void
40312 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
40313 {
40314 enum machine_mode mode = GET_MODE (target);
40315 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40316 int n_elts = GET_MODE_NUNITS (mode);
40317 int n_var = 0, one_var = -1;
40318 bool all_same = true, all_const_zero = true;
40319 int i;
40320 rtx x;
40321
40322 for (i = 0; i < n_elts; ++i)
40323 {
40324 x = XVECEXP (vals, 0, i);
40325 if (!(CONST_INT_P (x)
40326 || GET_CODE (x) == CONST_DOUBLE
40327 || GET_CODE (x) == CONST_FIXED))
40328 n_var++, one_var = i;
40329 else if (x != CONST0_RTX (inner_mode))
40330 all_const_zero = false;
40331 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
40332 all_same = false;
40333 }
40334
40335 /* Constants are best loaded from the constant pool. */
40336 if (n_var == 0)
40337 {
40338 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
40339 return;
40340 }
40341
40342 /* If all values are identical, broadcast the value. */
40343 if (all_same
40344 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
40345 XVECEXP (vals, 0, 0)))
40346 return;
40347
40348 /* Values where only one field is non-constant are best loaded from
40349 the pool and overwritten via move later. */
40350 if (n_var == 1)
40351 {
40352 if (all_const_zero
40353 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
40354 XVECEXP (vals, 0, one_var),
40355 one_var))
40356 return;
40357
40358 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
40359 return;
40360 }
40361
40362 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
40363 }
40364
40365 void
40366 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
40367 {
40368 enum machine_mode mode = GET_MODE (target);
40369 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40370 enum machine_mode half_mode;
40371 bool use_vec_merge = false;
40372 rtx tmp;
40373 static rtx (*gen_extract[6][2]) (rtx, rtx)
40374 = {
40375 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
40376 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
40377 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
40378 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
40379 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
40380 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
40381 };
40382 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
40383 = {
40384 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
40385 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
40386 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
40387 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
40388 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
40389 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
40390 };
40391 int i, j, n;
40392
40393 switch (mode)
40394 {
40395 case V2SFmode:
40396 case V2SImode:
40397 if (mmx_ok)
40398 {
40399 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40400 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
40401 if (elt == 0)
40402 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40403 else
40404 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40405 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40406 return;
40407 }
40408 break;
40409
40410 case V2DImode:
40411 use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
40412 if (use_vec_merge)
40413 break;
40414
40415 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
40416 ix86_expand_vector_extract (false, tmp, target, 1 - elt);
40417 if (elt == 0)
40418 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
40419 else
40420 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
40421 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40422 return;
40423
40424 case V2DFmode:
40425 {
40426 rtx op0, op1;
40427
40428 /* For the two element vectors, we implement a VEC_CONCAT with
40429 the extraction of the other element. */
40430
40431 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
40432 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
40433
40434 if (elt == 0)
40435 op0 = val, op1 = tmp;
40436 else
40437 op0 = tmp, op1 = val;
40438
40439 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
40440 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40441 }
40442 return;
40443
40444 case V4SFmode:
40445 use_vec_merge = TARGET_SSE4_1;
40446 if (use_vec_merge)
40447 break;
40448
40449 switch (elt)
40450 {
40451 case 0:
40452 use_vec_merge = true;
40453 break;
40454
40455 case 1:
40456 /* tmp = target = A B C D */
40457 tmp = copy_to_reg (target);
40458 /* target = A A B B */
40459 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
40460 /* target = X A B B */
40461 ix86_expand_vector_set (false, target, val, 0);
40462 /* target = A X C D */
40463 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40464 const1_rtx, const0_rtx,
40465 GEN_INT (2+4), GEN_INT (3+4)));
40466 return;
40467
40468 case 2:
40469 /* tmp = target = A B C D */
40470 tmp = copy_to_reg (target);
40471 /* tmp = X B C D */
40472 ix86_expand_vector_set (false, tmp, val, 0);
40473 /* target = A B X D */
40474 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40475 const0_rtx, const1_rtx,
40476 GEN_INT (0+4), GEN_INT (3+4)));
40477 return;
40478
40479 case 3:
40480 /* tmp = target = A B C D */
40481 tmp = copy_to_reg (target);
40482 /* tmp = X B C D */
40483 ix86_expand_vector_set (false, tmp, val, 0);
40484 /* target = A B X D */
40485 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
40486 const0_rtx, const1_rtx,
40487 GEN_INT (2+4), GEN_INT (0+4)));
40488 return;
40489
40490 default:
40491 gcc_unreachable ();
40492 }
40493 break;
40494
40495 case V4SImode:
40496 use_vec_merge = TARGET_SSE4_1;
40497 if (use_vec_merge)
40498 break;
40499
40500 /* Element 0 handled by vec_merge below. */
40501 if (elt == 0)
40502 {
40503 use_vec_merge = true;
40504 break;
40505 }
40506
40507 if (TARGET_SSE2)
40508 {
40509 /* With SSE2, use integer shuffles to swap element 0 and ELT,
40510 store into element 0, then shuffle them back. */
40511
40512 rtx order[4];
40513
40514 order[0] = GEN_INT (elt);
40515 order[1] = const1_rtx;
40516 order[2] = const2_rtx;
40517 order[3] = GEN_INT (3);
40518 order[elt] = const0_rtx;
40519
40520 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40521 order[1], order[2], order[3]));
40522
40523 ix86_expand_vector_set (false, target, val, 0);
40524
40525 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
40526 order[1], order[2], order[3]));
40527 }
40528 else
40529 {
40530 /* For SSE1, we have to reuse the V4SF code. */
40531 rtx t = gen_reg_rtx (V4SFmode);
40532 ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
40533 emit_move_insn (target, gen_lowpart (mode, t));
40534 }
40535 return;
40536
40537 case V8HImode:
40538 use_vec_merge = TARGET_SSE2;
40539 break;
40540 case V4HImode:
40541 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40542 break;
40543
40544 case V16QImode:
40545 use_vec_merge = TARGET_SSE4_1;
40546 break;
40547
40548 case V8QImode:
40549 break;
40550
40551 case V32QImode:
40552 half_mode = V16QImode;
40553 j = 0;
40554 n = 16;
40555 goto half;
40556
40557 case V16HImode:
40558 half_mode = V8HImode;
40559 j = 1;
40560 n = 8;
40561 goto half;
40562
40563 case V8SImode:
40564 half_mode = V4SImode;
40565 j = 2;
40566 n = 4;
40567 goto half;
40568
40569 case V4DImode:
40570 half_mode = V2DImode;
40571 j = 3;
40572 n = 2;
40573 goto half;
40574
40575 case V8SFmode:
40576 half_mode = V4SFmode;
40577 j = 4;
40578 n = 4;
40579 goto half;
40580
40581 case V4DFmode:
40582 half_mode = V2DFmode;
40583 j = 5;
40584 n = 2;
40585 goto half;
40586
40587 half:
40588 /* Compute offset. */
40589 i = elt / n;
40590 elt %= n;
40591
40592 gcc_assert (i <= 1);
40593
40594 /* Extract the half. */
40595 tmp = gen_reg_rtx (half_mode);
40596 emit_insn (gen_extract[j][i] (tmp, target));
40597
40598 /* Put val in tmp at elt. */
40599 ix86_expand_vector_set (false, tmp, val, elt);
40600
40601 /* Put it back. */
40602 emit_insn (gen_insert[j][i] (target, target, tmp));
40603 return;
40604
40605 default:
40606 break;
40607 }
40608
40609 if (use_vec_merge)
40610 {
40611 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
40612 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
40613 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40614 }
40615 else
40616 {
40617 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40618
40619 emit_move_insn (mem, target);
40620
40621 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40622 emit_move_insn (tmp, val);
40623
40624 emit_move_insn (target, mem);
40625 }
40626 }
40627
40628 void
40629 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
40630 {
40631 enum machine_mode mode = GET_MODE (vec);
40632 enum machine_mode inner_mode = GET_MODE_INNER (mode);
40633 bool use_vec_extr = false;
40634 rtx tmp;
40635
40636 switch (mode)
40637 {
40638 case V2SImode:
40639 case V2SFmode:
40640 if (!mmx_ok)
40641 break;
40642 /* FALLTHRU */
40643
40644 case V2DFmode:
40645 case V2DImode:
40646 use_vec_extr = true;
40647 break;
40648
40649 case V4SFmode:
40650 use_vec_extr = TARGET_SSE4_1;
40651 if (use_vec_extr)
40652 break;
40653
40654 switch (elt)
40655 {
40656 case 0:
40657 tmp = vec;
40658 break;
40659
40660 case 1:
40661 case 3:
40662 tmp = gen_reg_rtx (mode);
40663 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
40664 GEN_INT (elt), GEN_INT (elt),
40665 GEN_INT (elt+4), GEN_INT (elt+4)));
40666 break;
40667
40668 case 2:
40669 tmp = gen_reg_rtx (mode);
40670 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
40671 break;
40672
40673 default:
40674 gcc_unreachable ();
40675 }
40676 vec = tmp;
40677 use_vec_extr = true;
40678 elt = 0;
40679 break;
40680
40681 case V4SImode:
40682 use_vec_extr = TARGET_SSE4_1;
40683 if (use_vec_extr)
40684 break;
40685
40686 if (TARGET_SSE2)
40687 {
40688 switch (elt)
40689 {
40690 case 0:
40691 tmp = vec;
40692 break;
40693
40694 case 1:
40695 case 3:
40696 tmp = gen_reg_rtx (mode);
40697 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
40698 GEN_INT (elt), GEN_INT (elt),
40699 GEN_INT (elt), GEN_INT (elt)));
40700 break;
40701
40702 case 2:
40703 tmp = gen_reg_rtx (mode);
40704 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
40705 break;
40706
40707 default:
40708 gcc_unreachable ();
40709 }
40710 vec = tmp;
40711 use_vec_extr = true;
40712 elt = 0;
40713 }
40714 else
40715 {
40716 /* For SSE1, we have to reuse the V4SF code. */
40717 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
40718 gen_lowpart (V4SFmode, vec), elt);
40719 return;
40720 }
40721 break;
40722
40723 case V8HImode:
40724 use_vec_extr = TARGET_SSE2;
40725 break;
40726 case V4HImode:
40727 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
40728 break;
40729
40730 case V16QImode:
40731 use_vec_extr = TARGET_SSE4_1;
40732 break;
40733
40734 case V8SFmode:
40735 if (TARGET_AVX)
40736 {
40737 tmp = gen_reg_rtx (V4SFmode);
40738 if (elt < 4)
40739 emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
40740 else
40741 emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
40742 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40743 return;
40744 }
40745 break;
40746
40747 case V4DFmode:
40748 if (TARGET_AVX)
40749 {
40750 tmp = gen_reg_rtx (V2DFmode);
40751 if (elt < 2)
40752 emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
40753 else
40754 emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
40755 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40756 return;
40757 }
40758 break;
40759
40760 case V32QImode:
40761 if (TARGET_AVX)
40762 {
40763 tmp = gen_reg_rtx (V16QImode);
40764 if (elt < 16)
40765 emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
40766 else
40767 emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
40768 ix86_expand_vector_extract (false, target, tmp, elt & 15);
40769 return;
40770 }
40771 break;
40772
40773 case V16HImode:
40774 if (TARGET_AVX)
40775 {
40776 tmp = gen_reg_rtx (V8HImode);
40777 if (elt < 8)
40778 emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
40779 else
40780 emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
40781 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40782 return;
40783 }
40784 break;
40785
40786 case V8SImode:
40787 if (TARGET_AVX)
40788 {
40789 tmp = gen_reg_rtx (V4SImode);
40790 if (elt < 4)
40791 emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
40792 else
40793 emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
40794 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40795 return;
40796 }
40797 break;
40798
40799 case V4DImode:
40800 if (TARGET_AVX)
40801 {
40802 tmp = gen_reg_rtx (V2DImode);
40803 if (elt < 2)
40804 emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
40805 else
40806 emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
40807 ix86_expand_vector_extract (false, target, tmp, elt & 1);
40808 return;
40809 }
40810 break;
40811
40812 case V16SFmode:
40813 tmp = gen_reg_rtx (V8SFmode);
40814 if (elt < 8)
40815 emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
40816 else
40817 emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
40818 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40819 return;
40820
40821 case V8DFmode:
40822 tmp = gen_reg_rtx (V4DFmode);
40823 if (elt < 4)
40824 emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
40825 else
40826 emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
40827 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40828 return;
40829
40830 case V16SImode:
40831 tmp = gen_reg_rtx (V8SImode);
40832 if (elt < 8)
40833 emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
40834 else
40835 emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
40836 ix86_expand_vector_extract (false, target, tmp, elt & 7);
40837 return;
40838
40839 case V8DImode:
40840 tmp = gen_reg_rtx (V4DImode);
40841 if (elt < 4)
40842 emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
40843 else
40844 emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
40845 ix86_expand_vector_extract (false, target, tmp, elt & 3);
40846 return;
40847
40848 case V8QImode:
40849 /* ??? Could extract the appropriate HImode element and shift. */
40850 default:
40851 break;
40852 }
40853
40854 if (use_vec_extr)
40855 {
40856 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
40857 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
40858
40859 /* Let the rtl optimizers know about the zero extension performed. */
40860 if (inner_mode == QImode || inner_mode == HImode)
40861 {
40862 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
40863 target = gen_lowpart (SImode, target);
40864 }
40865
40866 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
40867 }
40868 else
40869 {
40870 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
40871
40872 emit_move_insn (mem, vec);
40873
40874 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
40875 emit_move_insn (target, tmp);
40876 }
40877 }
40878
40879 /* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
40880 to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
40881 The upper bits of DEST are undefined, though they shouldn't cause
40882 exceptions (some bits from src or all zeros are ok). */
40883
40884 static void
40885 emit_reduc_half (rtx dest, rtx src, int i)
40886 {
40887 rtx tem, d = dest;
40888 switch (GET_MODE (src))
40889 {
40890 case V4SFmode:
40891 if (i == 128)
40892 tem = gen_sse_movhlps (dest, src, src);
40893 else
40894 tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
40895 GEN_INT (1 + 4), GEN_INT (1 + 4));
40896 break;
40897 case V2DFmode:
40898 tem = gen_vec_interleave_highv2df (dest, src, src);
40899 break;
40900 case V16QImode:
40901 case V8HImode:
40902 case V4SImode:
40903 case V2DImode:
40904 d = gen_reg_rtx (V1TImode);
40905 tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
40906 GEN_INT (i / 2));
40907 break;
40908 case V8SFmode:
40909 if (i == 256)
40910 tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
40911 else
40912 tem = gen_avx_shufps256 (dest, src, src,
40913 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
40914 break;
40915 case V4DFmode:
40916 if (i == 256)
40917 tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
40918 else
40919 tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
40920 break;
40921 case V32QImode:
40922 case V16HImode:
40923 case V8SImode:
40924 case V4DImode:
40925 if (i == 256)
40926 {
40927 if (GET_MODE (dest) != V4DImode)
40928 d = gen_reg_rtx (V4DImode);
40929 tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
40930 gen_lowpart (V4DImode, src),
40931 const1_rtx);
40932 }
40933 else
40934 {
40935 d = gen_reg_rtx (V2TImode);
40936 tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
40937 GEN_INT (i / 2));
40938 }
40939 break;
40940 case V16SImode:
40941 case V16SFmode:
40942 case V8DImode:
40943 case V8DFmode:
40944 if (i > 128)
40945 tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
40946 gen_lowpart (V16SImode, src),
40947 gen_lowpart (V16SImode, src),
40948 GEN_INT (0x4 + (i == 512 ? 4 : 0)),
40949 GEN_INT (0x5 + (i == 512 ? 4 : 0)),
40950 GEN_INT (0x6 + (i == 512 ? 4 : 0)),
40951 GEN_INT (0x7 + (i == 512 ? 4 : 0)),
40952 GEN_INT (0xC), GEN_INT (0xD),
40953 GEN_INT (0xE), GEN_INT (0xF),
40954 GEN_INT (0x10), GEN_INT (0x11),
40955 GEN_INT (0x12), GEN_INT (0x13),
40956 GEN_INT (0x14), GEN_INT (0x15),
40957 GEN_INT (0x16), GEN_INT (0x17));
40958 else
40959 tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
40960 gen_lowpart (V16SImode, src),
40961 GEN_INT (i == 128 ? 0x2 : 0x1),
40962 GEN_INT (0x3),
40963 GEN_INT (0x3),
40964 GEN_INT (0x3),
40965 GEN_INT (i == 128 ? 0x6 : 0x5),
40966 GEN_INT (0x7),
40967 GEN_INT (0x7),
40968 GEN_INT (0x7),
40969 GEN_INT (i == 128 ? 0xA : 0x9),
40970 GEN_INT (0xB),
40971 GEN_INT (0xB),
40972 GEN_INT (0xB),
40973 GEN_INT (i == 128 ? 0xE : 0xD),
40974 GEN_INT (0xF),
40975 GEN_INT (0xF),
40976 GEN_INT (0xF));
40977 break;
40978 default:
40979 gcc_unreachable ();
40980 }
40981 emit_insn (tem);
40982 if (d != dest)
40983 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
40984 }
40985
40986 /* Expand a vector reduction. FN is the binary pattern to reduce;
40987 DEST is the destination; IN is the input vector. */
40988
40989 void
40990 ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
40991 {
40992 rtx half, dst, vec = in;
40993 enum machine_mode mode = GET_MODE (in);
40994 int i;
40995
40996 /* SSE4 has a special instruction for V8HImode UMIN reduction. */
40997 if (TARGET_SSE4_1
40998 && mode == V8HImode
40999 && fn == gen_uminv8hi3)
41000 {
41001 emit_insn (gen_sse4_1_phminposuw (dest, in));
41002 return;
41003 }
41004
41005 for (i = GET_MODE_BITSIZE (mode);
41006 i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
41007 i >>= 1)
41008 {
41009 half = gen_reg_rtx (mode);
41010 emit_reduc_half (half, vec, i);
41011 if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
41012 dst = dest;
41013 else
41014 dst = gen_reg_rtx (mode);
41015 emit_insn (fn (dst, half, vec));
41016 vec = dst;
41017 }
41018 }
41019 \f
41020 /* Target hook for scalar_mode_supported_p. */
41021 static bool
41022 ix86_scalar_mode_supported_p (enum machine_mode mode)
41023 {
41024 if (DECIMAL_FLOAT_MODE_P (mode))
41025 return default_decimal_float_supported_p ();
41026 else if (mode == TFmode)
41027 return true;
41028 else
41029 return default_scalar_mode_supported_p (mode);
41030 }
41031
41032 /* Implements target hook vector_mode_supported_p. */
41033 static bool
41034 ix86_vector_mode_supported_p (enum machine_mode mode)
41035 {
41036 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
41037 return true;
41038 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
41039 return true;
41040 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
41041 return true;
41042 if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
41043 return true;
41044 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
41045 return true;
41046 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
41047 return true;
41048 return false;
41049 }
41050
41051 /* Target hook for c_mode_for_suffix. */
41052 static enum machine_mode
41053 ix86_c_mode_for_suffix (char suffix)
41054 {
41055 if (suffix == 'q')
41056 return TFmode;
41057 if (suffix == 'w')
41058 return XFmode;
41059
41060 return VOIDmode;
41061 }
41062
41063 /* Worker function for TARGET_MD_ASM_CLOBBERS.
41064
41065 We do this in the new i386 backend to maintain source compatibility
41066 with the old cc0-based compiler. */
41067
41068 static tree
41069 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
41070 tree inputs ATTRIBUTE_UNUSED,
41071 tree clobbers)
41072 {
41073 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
41074 clobbers);
41075 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
41076 clobbers);
41077 return clobbers;
41078 }
41079
41080 /* Implements target vector targetm.asm.encode_section_info. */
41081
41082 static void ATTRIBUTE_UNUSED
41083 ix86_encode_section_info (tree decl, rtx rtl, int first)
41084 {
41085 default_encode_section_info (decl, rtl, first);
41086
41087 if (TREE_CODE (decl) == VAR_DECL
41088 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
41089 && ix86_in_large_data_p (decl))
41090 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
41091 }
41092
41093 /* Worker function for REVERSE_CONDITION. */
41094
41095 enum rtx_code
41096 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
41097 {
41098 return (mode != CCFPmode && mode != CCFPUmode
41099 ? reverse_condition (code)
41100 : reverse_condition_maybe_unordered (code));
41101 }
41102
41103 /* Output code to perform an x87 FP register move, from OPERANDS[1]
41104 to OPERANDS[0]. */
41105
41106 const char *
41107 output_387_reg_move (rtx insn, rtx *operands)
41108 {
41109 if (REG_P (operands[0]))
41110 {
41111 if (REG_P (operands[1])
41112 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41113 {
41114 if (REGNO (operands[0]) == FIRST_STACK_REG)
41115 return output_387_ffreep (operands, 0);
41116 return "fstp\t%y0";
41117 }
41118 if (STACK_TOP_P (operands[0]))
41119 return "fld%Z1\t%y1";
41120 return "fst\t%y0";
41121 }
41122 else if (MEM_P (operands[0]))
41123 {
41124 gcc_assert (REG_P (operands[1]));
41125 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
41126 return "fstp%Z0\t%y0";
41127 else
41128 {
41129 /* There is no non-popping store to memory for XFmode.
41130 So if we need one, follow the store with a load. */
41131 if (GET_MODE (operands[0]) == XFmode)
41132 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
41133 else
41134 return "fst%Z0\t%y0";
41135 }
41136 }
41137 else
41138 gcc_unreachable();
41139 }
41140
41141 /* Output code to perform a conditional jump to LABEL, if C2 flag in
41142 FP status register is set. */
41143
41144 void
41145 ix86_emit_fp_unordered_jump (rtx label)
41146 {
41147 rtx reg = gen_reg_rtx (HImode);
41148 rtx temp;
41149
41150 emit_insn (gen_x86_fnstsw_1 (reg));
41151
41152 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
41153 {
41154 emit_insn (gen_x86_sahf_1 (reg));
41155
41156 temp = gen_rtx_REG (CCmode, FLAGS_REG);
41157 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
41158 }
41159 else
41160 {
41161 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
41162
41163 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
41164 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
41165 }
41166
41167 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
41168 gen_rtx_LABEL_REF (VOIDmode, label),
41169 pc_rtx);
41170 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
41171
41172 emit_jump_insn (temp);
41173 predict_jump (REG_BR_PROB_BASE * 10 / 100);
41174 }
41175
41176 /* Output code to perform a log1p XFmode calculation. */
41177
41178 void ix86_emit_i387_log1p (rtx op0, rtx op1)
41179 {
41180 rtx label1 = gen_label_rtx ();
41181 rtx label2 = gen_label_rtx ();
41182
41183 rtx tmp = gen_reg_rtx (XFmode);
41184 rtx tmp2 = gen_reg_rtx (XFmode);
41185 rtx test;
41186
41187 emit_insn (gen_absxf2 (tmp, op1));
41188 test = gen_rtx_GE (VOIDmode, tmp,
41189 CONST_DOUBLE_FROM_REAL_VALUE (
41190 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
41191 XFmode));
41192 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
41193
41194 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41195 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
41196 emit_jump (label2);
41197
41198 emit_label (label1);
41199 emit_move_insn (tmp, CONST1_RTX (XFmode));
41200 emit_insn (gen_addxf3 (tmp, op1, tmp));
41201 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
41202 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
41203
41204 emit_label (label2);
41205 }
41206
41207 /* Emit code for round calculation. */
41208 void ix86_emit_i387_round (rtx op0, rtx op1)
41209 {
41210 enum machine_mode inmode = GET_MODE (op1);
41211 enum machine_mode outmode = GET_MODE (op0);
41212 rtx e1, e2, res, tmp, tmp1, half;
41213 rtx scratch = gen_reg_rtx (HImode);
41214 rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
41215 rtx jump_label = gen_label_rtx ();
41216 rtx insn;
41217 rtx (*gen_abs) (rtx, rtx);
41218 rtx (*gen_neg) (rtx, rtx);
41219
41220 switch (inmode)
41221 {
41222 case SFmode:
41223 gen_abs = gen_abssf2;
41224 break;
41225 case DFmode:
41226 gen_abs = gen_absdf2;
41227 break;
41228 case XFmode:
41229 gen_abs = gen_absxf2;
41230 break;
41231 default:
41232 gcc_unreachable ();
41233 }
41234
41235 switch (outmode)
41236 {
41237 case SFmode:
41238 gen_neg = gen_negsf2;
41239 break;
41240 case DFmode:
41241 gen_neg = gen_negdf2;
41242 break;
41243 case XFmode:
41244 gen_neg = gen_negxf2;
41245 break;
41246 case HImode:
41247 gen_neg = gen_neghi2;
41248 break;
41249 case SImode:
41250 gen_neg = gen_negsi2;
41251 break;
41252 case DImode:
41253 gen_neg = gen_negdi2;
41254 break;
41255 default:
41256 gcc_unreachable ();
41257 }
41258
41259 e1 = gen_reg_rtx (inmode);
41260 e2 = gen_reg_rtx (inmode);
41261 res = gen_reg_rtx (outmode);
41262
41263 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
41264
41265 /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
41266
41267 /* scratch = fxam(op1) */
41268 emit_insn (gen_rtx_SET (VOIDmode, scratch,
41269 gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
41270 UNSPEC_FXAM)));
41271 /* e1 = fabs(op1) */
41272 emit_insn (gen_abs (e1, op1));
41273
41274 /* e2 = e1 + 0.5 */
41275 half = force_reg (inmode, half);
41276 emit_insn (gen_rtx_SET (VOIDmode, e2,
41277 gen_rtx_PLUS (inmode, e1, half)));
41278
41279 /* res = floor(e2) */
41280 if (inmode != XFmode)
41281 {
41282 tmp1 = gen_reg_rtx (XFmode);
41283
41284 emit_insn (gen_rtx_SET (VOIDmode, tmp1,
41285 gen_rtx_FLOAT_EXTEND (XFmode, e2)));
41286 }
41287 else
41288 tmp1 = e2;
41289
41290 switch (outmode)
41291 {
41292 case SFmode:
41293 case DFmode:
41294 {
41295 rtx tmp0 = gen_reg_rtx (XFmode);
41296
41297 emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
41298
41299 emit_insn (gen_rtx_SET (VOIDmode, res,
41300 gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
41301 UNSPEC_TRUNC_NOOP)));
41302 }
41303 break;
41304 case XFmode:
41305 emit_insn (gen_frndintxf2_floor (res, tmp1));
41306 break;
41307 case HImode:
41308 emit_insn (gen_lfloorxfhi2 (res, tmp1));
41309 break;
41310 case SImode:
41311 emit_insn (gen_lfloorxfsi2 (res, tmp1));
41312 break;
41313 case DImode:
41314 emit_insn (gen_lfloorxfdi2 (res, tmp1));
41315 break;
41316 default:
41317 gcc_unreachable ();
41318 }
41319
41320 /* flags = signbit(a) */
41321 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
41322
41323 /* if (flags) then res = -res */
41324 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
41325 gen_rtx_EQ (VOIDmode, flags, const0_rtx),
41326 gen_rtx_LABEL_REF (VOIDmode, jump_label),
41327 pc_rtx);
41328 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41329 predict_jump (REG_BR_PROB_BASE * 50 / 100);
41330 JUMP_LABEL (insn) = jump_label;
41331
41332 emit_insn (gen_neg (res, res));
41333
41334 emit_label (jump_label);
41335 LABEL_NUSES (jump_label) = 1;
41336
41337 emit_move_insn (op0, res);
41338 }
41339
41340 /* Output code to perform a Newton-Rhapson approximation of a single precision
41341 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
41342
41343 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
41344 {
41345 rtx x0, x1, e0, e1;
41346
41347 x0 = gen_reg_rtx (mode);
41348 e0 = gen_reg_rtx (mode);
41349 e1 = gen_reg_rtx (mode);
41350 x1 = gen_reg_rtx (mode);
41351
41352 /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
41353
41354 b = force_reg (mode, b);
41355
41356 /* x0 = rcp(b) estimate */
41357 if (mode == V16SFmode || mode == V8DFmode)
41358 emit_insn (gen_rtx_SET (VOIDmode, x0,
41359 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41360 UNSPEC_RCP14)));
41361 else
41362 emit_insn (gen_rtx_SET (VOIDmode, x0,
41363 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
41364 UNSPEC_RCP)));
41365
41366 /* e0 = x0 * b */
41367 emit_insn (gen_rtx_SET (VOIDmode, e0,
41368 gen_rtx_MULT (mode, x0, b)));
41369
41370 /* e0 = x0 * e0 */
41371 emit_insn (gen_rtx_SET (VOIDmode, e0,
41372 gen_rtx_MULT (mode, x0, e0)));
41373
41374 /* e1 = x0 + x0 */
41375 emit_insn (gen_rtx_SET (VOIDmode, e1,
41376 gen_rtx_PLUS (mode, x0, x0)));
41377
41378 /* x1 = e1 - e0 */
41379 emit_insn (gen_rtx_SET (VOIDmode, x1,
41380 gen_rtx_MINUS (mode, e1, e0)));
41381
41382 /* res = a * x1 */
41383 emit_insn (gen_rtx_SET (VOIDmode, res,
41384 gen_rtx_MULT (mode, a, x1)));
41385 }
41386
41387 /* Output code to perform a Newton-Rhapson approximation of a
41388 single precision floating point [reciprocal] square root. */
41389
41390 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
41391 bool recip)
41392 {
41393 rtx x0, e0, e1, e2, e3, mthree, mhalf;
41394 REAL_VALUE_TYPE r;
41395 int unspec;
41396
41397 x0 = gen_reg_rtx (mode);
41398 e0 = gen_reg_rtx (mode);
41399 e1 = gen_reg_rtx (mode);
41400 e2 = gen_reg_rtx (mode);
41401 e3 = gen_reg_rtx (mode);
41402
41403 real_from_integer (&r, VOIDmode, -3, SIGNED);
41404 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41405
41406 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
41407 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
41408 unspec = UNSPEC_RSQRT;
41409
41410 if (VECTOR_MODE_P (mode))
41411 {
41412 mthree = ix86_build_const_vector (mode, true, mthree);
41413 mhalf = ix86_build_const_vector (mode, true, mhalf);
41414 /* There is no 512-bit rsqrt. There is however rsqrt14. */
41415 if (GET_MODE_SIZE (mode) == 64)
41416 unspec = UNSPEC_RSQRT14;
41417 }
41418
41419 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
41420 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
41421
41422 a = force_reg (mode, a);
41423
41424 /* x0 = rsqrt(a) estimate */
41425 emit_insn (gen_rtx_SET (VOIDmode, x0,
41426 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
41427 unspec)));
41428
41429 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
41430 if (!recip)
41431 {
41432 rtx zero, mask;
41433
41434 zero = gen_reg_rtx (mode);
41435 mask = gen_reg_rtx (mode);
41436
41437 zero = force_reg (mode, CONST0_RTX(mode));
41438
41439 /* Handle masked compare. */
41440 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
41441 {
41442 mask = gen_reg_rtx (HImode);
41443 /* Imm value 0x4 corresponds to not-equal comparison. */
41444 emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
41445 emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
41446 }
41447 else
41448 {
41449 emit_insn (gen_rtx_SET (VOIDmode, mask,
41450 gen_rtx_NE (mode, zero, a)));
41451
41452 emit_insn (gen_rtx_SET (VOIDmode, x0,
41453 gen_rtx_AND (mode, x0, mask)));
41454 }
41455 }
41456
41457 /* e0 = x0 * a */
41458 emit_insn (gen_rtx_SET (VOIDmode, e0,
41459 gen_rtx_MULT (mode, x0, a)));
41460 /* e1 = e0 * x0 */
41461 emit_insn (gen_rtx_SET (VOIDmode, e1,
41462 gen_rtx_MULT (mode, e0, x0)));
41463
41464 /* e2 = e1 - 3. */
41465 mthree = force_reg (mode, mthree);
41466 emit_insn (gen_rtx_SET (VOIDmode, e2,
41467 gen_rtx_PLUS (mode, e1, mthree)));
41468
41469 mhalf = force_reg (mode, mhalf);
41470 if (recip)
41471 /* e3 = -.5 * x0 */
41472 emit_insn (gen_rtx_SET (VOIDmode, e3,
41473 gen_rtx_MULT (mode, x0, mhalf)));
41474 else
41475 /* e3 = -.5 * e0 */
41476 emit_insn (gen_rtx_SET (VOIDmode, e3,
41477 gen_rtx_MULT (mode, e0, mhalf)));
41478 /* ret = e2 * e3 */
41479 emit_insn (gen_rtx_SET (VOIDmode, res,
41480 gen_rtx_MULT (mode, e2, e3)));
41481 }
41482
41483 #ifdef TARGET_SOLARIS
41484 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
41485
41486 static void
41487 i386_solaris_elf_named_section (const char *name, unsigned int flags,
41488 tree decl)
41489 {
41490 /* With Binutils 2.15, the "@unwind" marker must be specified on
41491 every occurrence of the ".eh_frame" section, not just the first
41492 one. */
41493 if (TARGET_64BIT
41494 && strcmp (name, ".eh_frame") == 0)
41495 {
41496 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
41497 flags & SECTION_WRITE ? "aw" : "a");
41498 return;
41499 }
41500
41501 #ifndef USE_GAS
41502 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
41503 {
41504 solaris_elf_asm_comdat_section (name, flags, decl);
41505 return;
41506 }
41507 #endif
41508
41509 default_elf_asm_named_section (name, flags, decl);
41510 }
41511 #endif /* TARGET_SOLARIS */
41512
41513 /* Return the mangling of TYPE if it is an extended fundamental type. */
41514
41515 static const char *
41516 ix86_mangle_type (const_tree type)
41517 {
41518 type = TYPE_MAIN_VARIANT (type);
41519
41520 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
41521 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
41522 return NULL;
41523
41524 switch (TYPE_MODE (type))
41525 {
41526 case TFmode:
41527 /* __float128 is "g". */
41528 return "g";
41529 case XFmode:
41530 /* "long double" or __float80 is "e". */
41531 return "e";
41532 default:
41533 return NULL;
41534 }
41535 }
41536
41537 /* For 32-bit code we can save PIC register setup by using
41538 __stack_chk_fail_local hidden function instead of calling
41539 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
41540 register, so it is better to call __stack_chk_fail directly. */
41541
41542 static tree ATTRIBUTE_UNUSED
41543 ix86_stack_protect_fail (void)
41544 {
41545 return TARGET_64BIT
41546 ? default_external_stack_protect_fail ()
41547 : default_hidden_stack_protect_fail ();
41548 }
41549
41550 /* Select a format to encode pointers in exception handling data. CODE
41551 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
41552 true if the symbol may be affected by dynamic relocations.
41553
41554 ??? All x86 object file formats are capable of representing this.
41555 After all, the relocation needed is the same as for the call insn.
41556 Whether or not a particular assembler allows us to enter such, I
41557 guess we'll have to see. */
41558 int
41559 asm_preferred_eh_data_format (int code, int global)
41560 {
41561 if (flag_pic)
41562 {
41563 int type = DW_EH_PE_sdata8;
41564 if (!TARGET_64BIT
41565 || ix86_cmodel == CM_SMALL_PIC
41566 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
41567 type = DW_EH_PE_sdata4;
41568 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
41569 }
41570 if (ix86_cmodel == CM_SMALL
41571 || (ix86_cmodel == CM_MEDIUM && code))
41572 return DW_EH_PE_udata4;
41573 return DW_EH_PE_absptr;
41574 }
41575 \f
41576 /* Expand copysign from SIGN to the positive value ABS_VALUE
41577 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
41578 the sign-bit. */
41579 static void
41580 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
41581 {
41582 enum machine_mode mode = GET_MODE (sign);
41583 rtx sgn = gen_reg_rtx (mode);
41584 if (mask == NULL_RTX)
41585 {
41586 enum machine_mode vmode;
41587
41588 if (mode == SFmode)
41589 vmode = V4SFmode;
41590 else if (mode == DFmode)
41591 vmode = V2DFmode;
41592 else
41593 vmode = mode;
41594
41595 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
41596 if (!VECTOR_MODE_P (mode))
41597 {
41598 /* We need to generate a scalar mode mask in this case. */
41599 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41600 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41601 mask = gen_reg_rtx (mode);
41602 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41603 }
41604 }
41605 else
41606 mask = gen_rtx_NOT (mode, mask);
41607 emit_insn (gen_rtx_SET (VOIDmode, sgn,
41608 gen_rtx_AND (mode, mask, sign)));
41609 emit_insn (gen_rtx_SET (VOIDmode, result,
41610 gen_rtx_IOR (mode, abs_value, sgn)));
41611 }
41612
41613 /* Expand fabs (OP0) and return a new rtx that holds the result. The
41614 mask for masking out the sign-bit is stored in *SMASK, if that is
41615 non-null. */
41616 static rtx
41617 ix86_expand_sse_fabs (rtx op0, rtx *smask)
41618 {
41619 enum machine_mode vmode, mode = GET_MODE (op0);
41620 rtx xa, mask;
41621
41622 xa = gen_reg_rtx (mode);
41623 if (mode == SFmode)
41624 vmode = V4SFmode;
41625 else if (mode == DFmode)
41626 vmode = V2DFmode;
41627 else
41628 vmode = mode;
41629 mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
41630 if (!VECTOR_MODE_P (mode))
41631 {
41632 /* We need to generate a scalar mode mask in this case. */
41633 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
41634 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
41635 mask = gen_reg_rtx (mode);
41636 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
41637 }
41638 emit_insn (gen_rtx_SET (VOIDmode, xa,
41639 gen_rtx_AND (mode, op0, mask)));
41640
41641 if (smask)
41642 *smask = mask;
41643
41644 return xa;
41645 }
41646
41647 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
41648 swapping the operands if SWAP_OPERANDS is true. The expanded
41649 code is a forward jump to a newly created label in case the
41650 comparison is true. The generated label rtx is returned. */
41651 static rtx
41652 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
41653 bool swap_operands)
41654 {
41655 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
41656 rtx label, tmp;
41657
41658 if (swap_operands)
41659 {
41660 tmp = op0;
41661 op0 = op1;
41662 op1 = tmp;
41663 }
41664
41665 label = gen_label_rtx ();
41666 tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
41667 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41668 gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
41669 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
41670 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
41671 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
41672 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
41673 JUMP_LABEL (tmp) = label;
41674
41675 return label;
41676 }
41677
41678 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
41679 using comparison code CODE. Operands are swapped for the comparison if
41680 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
41681 static rtx
41682 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
41683 bool swap_operands)
41684 {
41685 rtx (*insn)(rtx, rtx, rtx, rtx);
41686 enum machine_mode mode = GET_MODE (op0);
41687 rtx mask = gen_reg_rtx (mode);
41688
41689 if (swap_operands)
41690 {
41691 rtx tmp = op0;
41692 op0 = op1;
41693 op1 = tmp;
41694 }
41695
41696 insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
41697
41698 emit_insn (insn (mask, op0, op1,
41699 gen_rtx_fmt_ee (code, mode, op0, op1)));
41700 return mask;
41701 }
41702
41703 /* Generate and return a rtx of mode MODE for 2**n where n is the number
41704 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
41705 static rtx
41706 ix86_gen_TWO52 (enum machine_mode mode)
41707 {
41708 REAL_VALUE_TYPE TWO52r;
41709 rtx TWO52;
41710
41711 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
41712 TWO52 = const_double_from_real_value (TWO52r, mode);
41713 TWO52 = force_reg (mode, TWO52);
41714
41715 return TWO52;
41716 }
41717
41718 /* Expand SSE sequence for computing lround from OP1 storing
41719 into OP0. */
41720 void
41721 ix86_expand_lround (rtx op0, rtx op1)
41722 {
41723 /* C code for the stuff we're doing below:
41724 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
41725 return (long)tmp;
41726 */
41727 enum machine_mode mode = GET_MODE (op1);
41728 const struct real_format *fmt;
41729 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
41730 rtx adj;
41731
41732 /* load nextafter (0.5, 0.0) */
41733 fmt = REAL_MODE_FORMAT (mode);
41734 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
41735 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
41736
41737 /* adj = copysign (0.5, op1) */
41738 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
41739 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
41740
41741 /* adj = op1 + adj */
41742 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
41743
41744 /* op0 = (imode)adj */
41745 expand_fix (op0, adj, 0);
41746 }
41747
41748 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
41749 into OPERAND0. */
41750 void
41751 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
41752 {
41753 /* C code for the stuff we're doing below (for do_floor):
41754 xi = (long)op1;
41755 xi -= (double)xi > op1 ? 1 : 0;
41756 return xi;
41757 */
41758 enum machine_mode fmode = GET_MODE (op1);
41759 enum machine_mode imode = GET_MODE (op0);
41760 rtx ireg, freg, label, tmp;
41761
41762 /* reg = (long)op1 */
41763 ireg = gen_reg_rtx (imode);
41764 expand_fix (ireg, op1, 0);
41765
41766 /* freg = (double)reg */
41767 freg = gen_reg_rtx (fmode);
41768 expand_float (freg, ireg, 0);
41769
41770 /* ireg = (freg > op1) ? ireg - 1 : ireg */
41771 label = ix86_expand_sse_compare_and_jump (UNLE,
41772 freg, op1, !do_floor);
41773 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
41774 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
41775 emit_move_insn (ireg, tmp);
41776
41777 emit_label (label);
41778 LABEL_NUSES (label) = 1;
41779
41780 emit_move_insn (op0, ireg);
41781 }
41782
41783 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
41784 result in OPERAND0. */
41785 void
41786 ix86_expand_rint (rtx operand0, rtx operand1)
41787 {
41788 /* C code for the stuff we're doing below:
41789 xa = fabs (operand1);
41790 if (!isless (xa, 2**52))
41791 return operand1;
41792 xa = xa + 2**52 - 2**52;
41793 return copysign (xa, operand1);
41794 */
41795 enum machine_mode mode = GET_MODE (operand0);
41796 rtx res, xa, label, TWO52, mask;
41797
41798 res = gen_reg_rtx (mode);
41799 emit_move_insn (res, operand1);
41800
41801 /* xa = abs (operand1) */
41802 xa = ix86_expand_sse_fabs (res, &mask);
41803
41804 /* if (!isless (xa, TWO52)) goto label; */
41805 TWO52 = ix86_gen_TWO52 (mode);
41806 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41807
41808 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41809 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41810
41811 ix86_sse_copysign_to_positive (res, xa, res, mask);
41812
41813 emit_label (label);
41814 LABEL_NUSES (label) = 1;
41815
41816 emit_move_insn (operand0, res);
41817 }
41818
41819 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41820 into OPERAND0. */
41821 void
41822 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
41823 {
41824 /* C code for the stuff we expand below.
41825 double xa = fabs (x), x2;
41826 if (!isless (xa, TWO52))
41827 return x;
41828 xa = xa + TWO52 - TWO52;
41829 x2 = copysign (xa, x);
41830 Compensate. Floor:
41831 if (x2 > x)
41832 x2 -= 1;
41833 Compensate. Ceil:
41834 if (x2 < x)
41835 x2 -= -1;
41836 return x2;
41837 */
41838 enum machine_mode mode = GET_MODE (operand0);
41839 rtx xa, TWO52, tmp, label, one, res, mask;
41840
41841 TWO52 = ix86_gen_TWO52 (mode);
41842
41843 /* Temporary for holding the result, initialized to the input
41844 operand to ease control flow. */
41845 res = gen_reg_rtx (mode);
41846 emit_move_insn (res, operand1);
41847
41848 /* xa = abs (operand1) */
41849 xa = ix86_expand_sse_fabs (res, &mask);
41850
41851 /* if (!isless (xa, TWO52)) goto label; */
41852 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41853
41854 /* xa = xa + TWO52 - TWO52; */
41855 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41856 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
41857
41858 /* xa = copysign (xa, operand1) */
41859 ix86_sse_copysign_to_positive (xa, xa, res, mask);
41860
41861 /* generate 1.0 or -1.0 */
41862 one = force_reg (mode,
41863 const_double_from_real_value (do_floor
41864 ? dconst1 : dconstm1, mode));
41865
41866 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41867 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41868 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41869 gen_rtx_AND (mode, one, tmp)));
41870 /* We always need to subtract here to preserve signed zero. */
41871 tmp = expand_simple_binop (mode, MINUS,
41872 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41873 emit_move_insn (res, tmp);
41874
41875 emit_label (label);
41876 LABEL_NUSES (label) = 1;
41877
41878 emit_move_insn (operand0, res);
41879 }
41880
41881 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
41882 into OPERAND0. */
41883 void
41884 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
41885 {
41886 /* C code for the stuff we expand below.
41887 double xa = fabs (x), x2;
41888 if (!isless (xa, TWO52))
41889 return x;
41890 x2 = (double)(long)x;
41891 Compensate. Floor:
41892 if (x2 > x)
41893 x2 -= 1;
41894 Compensate. Ceil:
41895 if (x2 < x)
41896 x2 += 1;
41897 if (HONOR_SIGNED_ZEROS (mode))
41898 return copysign (x2, x);
41899 return x2;
41900 */
41901 enum machine_mode mode = GET_MODE (operand0);
41902 rtx xa, xi, TWO52, tmp, label, one, res, mask;
41903
41904 TWO52 = ix86_gen_TWO52 (mode);
41905
41906 /* Temporary for holding the result, initialized to the input
41907 operand to ease control flow. */
41908 res = gen_reg_rtx (mode);
41909 emit_move_insn (res, operand1);
41910
41911 /* xa = abs (operand1) */
41912 xa = ix86_expand_sse_fabs (res, &mask);
41913
41914 /* if (!isless (xa, TWO52)) goto label; */
41915 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41916
41917 /* xa = (double)(long)x */
41918 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
41919 expand_fix (xi, res, 0);
41920 expand_float (xa, xi, 0);
41921
41922 /* generate 1.0 */
41923 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
41924
41925 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
41926 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
41927 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41928 gen_rtx_AND (mode, one, tmp)));
41929 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
41930 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
41931 emit_move_insn (res, tmp);
41932
41933 if (HONOR_SIGNED_ZEROS (mode))
41934 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
41935
41936 emit_label (label);
41937 LABEL_NUSES (label) = 1;
41938
41939 emit_move_insn (operand0, res);
41940 }
41941
41942 /* Expand SSE sequence for computing round from OPERAND1 storing
41943 into OPERAND0. Sequence that works without relying on DImode truncation
41944 via cvttsd2siq that is only available on 64bit targets. */
41945 void
41946 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
41947 {
41948 /* C code for the stuff we expand below.
41949 double xa = fabs (x), xa2, x2;
41950 if (!isless (xa, TWO52))
41951 return x;
41952 Using the absolute value and copying back sign makes
41953 -0.0 -> -0.0 correct.
41954 xa2 = xa + TWO52 - TWO52;
41955 Compensate.
41956 dxa = xa2 - xa;
41957 if (dxa <= -0.5)
41958 xa2 += 1;
41959 else if (dxa > 0.5)
41960 xa2 -= 1;
41961 x2 = copysign (xa2, x);
41962 return x2;
41963 */
41964 enum machine_mode mode = GET_MODE (operand0);
41965 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
41966
41967 TWO52 = ix86_gen_TWO52 (mode);
41968
41969 /* Temporary for holding the result, initialized to the input
41970 operand to ease control flow. */
41971 res = gen_reg_rtx (mode);
41972 emit_move_insn (res, operand1);
41973
41974 /* xa = abs (operand1) */
41975 xa = ix86_expand_sse_fabs (res, &mask);
41976
41977 /* if (!isless (xa, TWO52)) goto label; */
41978 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
41979
41980 /* xa2 = xa + TWO52 - TWO52; */
41981 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
41982 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
41983
41984 /* dxa = xa2 - xa; */
41985 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
41986
41987 /* generate 0.5, 1.0 and -0.5 */
41988 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
41989 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
41990 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
41991 0, OPTAB_DIRECT);
41992
41993 /* Compensate. */
41994 tmp = gen_reg_rtx (mode);
41995 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
41996 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
41997 emit_insn (gen_rtx_SET (VOIDmode, tmp,
41998 gen_rtx_AND (mode, one, tmp)));
41999 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42000 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
42001 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
42002 emit_insn (gen_rtx_SET (VOIDmode, tmp,
42003 gen_rtx_AND (mode, one, tmp)));
42004 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
42005
42006 /* res = copysign (xa2, operand1) */
42007 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
42008
42009 emit_label (label);
42010 LABEL_NUSES (label) = 1;
42011
42012 emit_move_insn (operand0, res);
42013 }
42014
42015 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42016 into OPERAND0. */
42017 void
42018 ix86_expand_trunc (rtx operand0, rtx operand1)
42019 {
42020 /* C code for SSE variant we expand below.
42021 double xa = fabs (x), x2;
42022 if (!isless (xa, TWO52))
42023 return x;
42024 x2 = (double)(long)x;
42025 if (HONOR_SIGNED_ZEROS (mode))
42026 return copysign (x2, x);
42027 return x2;
42028 */
42029 enum machine_mode mode = GET_MODE (operand0);
42030 rtx xa, xi, TWO52, label, res, mask;
42031
42032 TWO52 = ix86_gen_TWO52 (mode);
42033
42034 /* Temporary for holding the result, initialized to the input
42035 operand to ease control flow. */
42036 res = gen_reg_rtx (mode);
42037 emit_move_insn (res, operand1);
42038
42039 /* xa = abs (operand1) */
42040 xa = ix86_expand_sse_fabs (res, &mask);
42041
42042 /* if (!isless (xa, TWO52)) goto label; */
42043 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42044
42045 /* x = (double)(long)x */
42046 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42047 expand_fix (xi, res, 0);
42048 expand_float (res, xi, 0);
42049
42050 if (HONOR_SIGNED_ZEROS (mode))
42051 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
42052
42053 emit_label (label);
42054 LABEL_NUSES (label) = 1;
42055
42056 emit_move_insn (operand0, res);
42057 }
42058
42059 /* Expand SSE sequence for computing trunc from OPERAND1 storing
42060 into OPERAND0. */
42061 void
42062 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
42063 {
42064 enum machine_mode mode = GET_MODE (operand0);
42065 rtx xa, mask, TWO52, label, one, res, smask, tmp;
42066
42067 /* C code for SSE variant we expand below.
42068 double xa = fabs (x), x2;
42069 if (!isless (xa, TWO52))
42070 return x;
42071 xa2 = xa + TWO52 - TWO52;
42072 Compensate:
42073 if (xa2 > xa)
42074 xa2 -= 1.0;
42075 x2 = copysign (xa2, x);
42076 return x2;
42077 */
42078
42079 TWO52 = ix86_gen_TWO52 (mode);
42080
42081 /* Temporary for holding the result, initialized to the input
42082 operand to ease control flow. */
42083 res = gen_reg_rtx (mode);
42084 emit_move_insn (res, operand1);
42085
42086 /* xa = abs (operand1) */
42087 xa = ix86_expand_sse_fabs (res, &smask);
42088
42089 /* if (!isless (xa, TWO52)) goto label; */
42090 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42091
42092 /* res = xa + TWO52 - TWO52; */
42093 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
42094 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
42095 emit_move_insn (res, tmp);
42096
42097 /* generate 1.0 */
42098 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
42099
42100 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
42101 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
42102 emit_insn (gen_rtx_SET (VOIDmode, mask,
42103 gen_rtx_AND (mode, mask, one)));
42104 tmp = expand_simple_binop (mode, MINUS,
42105 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
42106 emit_move_insn (res, tmp);
42107
42108 /* res = copysign (res, operand1) */
42109 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
42110
42111 emit_label (label);
42112 LABEL_NUSES (label) = 1;
42113
42114 emit_move_insn (operand0, res);
42115 }
42116
42117 /* Expand SSE sequence for computing round from OPERAND1 storing
42118 into OPERAND0. */
42119 void
42120 ix86_expand_round (rtx operand0, rtx operand1)
42121 {
42122 /* C code for the stuff we're doing below:
42123 double xa = fabs (x);
42124 if (!isless (xa, TWO52))
42125 return x;
42126 xa = (double)(long)(xa + nextafter (0.5, 0.0));
42127 return copysign (xa, x);
42128 */
42129 enum machine_mode mode = GET_MODE (operand0);
42130 rtx res, TWO52, xa, label, xi, half, mask;
42131 const struct real_format *fmt;
42132 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42133
42134 /* Temporary for holding the result, initialized to the input
42135 operand to ease control flow. */
42136 res = gen_reg_rtx (mode);
42137 emit_move_insn (res, operand1);
42138
42139 TWO52 = ix86_gen_TWO52 (mode);
42140 xa = ix86_expand_sse_fabs (res, &mask);
42141 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
42142
42143 /* load nextafter (0.5, 0.0) */
42144 fmt = REAL_MODE_FORMAT (mode);
42145 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42146 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42147
42148 /* xa = xa + 0.5 */
42149 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
42150 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
42151
42152 /* xa = (double)(int64_t)xa */
42153 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
42154 expand_fix (xi, xa, 0);
42155 expand_float (xa, xi, 0);
42156
42157 /* res = copysign (xa, operand1) */
42158 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
42159
42160 emit_label (label);
42161 LABEL_NUSES (label) = 1;
42162
42163 emit_move_insn (operand0, res);
42164 }
42165
42166 /* Expand SSE sequence for computing round
42167 from OP1 storing into OP0 using sse4 round insn. */
42168 void
42169 ix86_expand_round_sse4 (rtx op0, rtx op1)
42170 {
42171 enum machine_mode mode = GET_MODE (op0);
42172 rtx e1, e2, res, half;
42173 const struct real_format *fmt;
42174 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
42175 rtx (*gen_copysign) (rtx, rtx, rtx);
42176 rtx (*gen_round) (rtx, rtx, rtx);
42177
42178 switch (mode)
42179 {
42180 case SFmode:
42181 gen_copysign = gen_copysignsf3;
42182 gen_round = gen_sse4_1_roundsf2;
42183 break;
42184 case DFmode:
42185 gen_copysign = gen_copysigndf3;
42186 gen_round = gen_sse4_1_rounddf2;
42187 break;
42188 default:
42189 gcc_unreachable ();
42190 }
42191
42192 /* round (a) = trunc (a + copysign (0.5, a)) */
42193
42194 /* load nextafter (0.5, 0.0) */
42195 fmt = REAL_MODE_FORMAT (mode);
42196 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
42197 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
42198 half = const_double_from_real_value (pred_half, mode);
42199
42200 /* e1 = copysign (0.5, op1) */
42201 e1 = gen_reg_rtx (mode);
42202 emit_insn (gen_copysign (e1, half, op1));
42203
42204 /* e2 = op1 + e1 */
42205 e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
42206
42207 /* res = trunc (e2) */
42208 res = gen_reg_rtx (mode);
42209 emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
42210
42211 emit_move_insn (op0, res);
42212 }
42213 \f
42214
42215 /* Table of valid machine attributes. */
42216 static const struct attribute_spec ix86_attribute_table[] =
42217 {
42218 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
42219 affects_type_identity } */
42220 /* Stdcall attribute says callee is responsible for popping arguments
42221 if they are not variable. */
42222 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42223 true },
42224 /* Fastcall attribute says callee is responsible for popping arguments
42225 if they are not variable. */
42226 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42227 true },
42228 /* Thiscall attribute says callee is responsible for popping arguments
42229 if they are not variable. */
42230 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42231 true },
42232 /* Cdecl attribute says the callee is a normal C declaration */
42233 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42234 true },
42235 /* Regparm attribute specifies how many integer arguments are to be
42236 passed in registers. */
42237 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute,
42238 true },
42239 /* Sseregparm attribute says we are using x86_64 calling conventions
42240 for FP arguments. */
42241 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
42242 true },
42243 /* The transactional memory builtins are implicitly regparm or fastcall
42244 depending on the ABI. Override the generic do-nothing attribute that
42245 these builtins were declared with. */
42246 { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
42247 true },
42248 /* force_align_arg_pointer says this function realigns the stack at entry. */
42249 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
42250 false, true, true, ix86_handle_cconv_attribute, false },
42251 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
42252 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
42253 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
42254 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute,
42255 false },
42256 #endif
42257 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42258 false },
42259 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute,
42260 false },
42261 #ifdef SUBTARGET_ATTRIBUTE_TABLE
42262 SUBTARGET_ATTRIBUTE_TABLE,
42263 #endif
42264 /* ms_abi and sysv_abi calling convention function attributes. */
42265 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42266 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
42267 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
42268 false },
42269 { "callee_pop_aggregate_return", 1, 1, false, true, true,
42270 ix86_handle_callee_pop_aggregate_return, true },
42271 /* End element. */
42272 { NULL, 0, 0, false, false, false, NULL, false }
42273 };
42274
42275 /* Implement targetm.vectorize.builtin_vectorization_cost. */
42276 static int
42277 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
42278 tree vectype,
42279 int misalign ATTRIBUTE_UNUSED)
42280 {
42281 unsigned elements;
42282
42283 switch (type_of_cost)
42284 {
42285 case scalar_stmt:
42286 return ix86_cost->scalar_stmt_cost;
42287
42288 case scalar_load:
42289 return ix86_cost->scalar_load_cost;
42290
42291 case scalar_store:
42292 return ix86_cost->scalar_store_cost;
42293
42294 case vector_stmt:
42295 return ix86_cost->vec_stmt_cost;
42296
42297 case vector_load:
42298 return ix86_cost->vec_align_load_cost;
42299
42300 case vector_store:
42301 return ix86_cost->vec_store_cost;
42302
42303 case vec_to_scalar:
42304 return ix86_cost->vec_to_scalar_cost;
42305
42306 case scalar_to_vec:
42307 return ix86_cost->scalar_to_vec_cost;
42308
42309 case unaligned_load:
42310 case unaligned_store:
42311 return ix86_cost->vec_unalign_load_cost;
42312
42313 case cond_branch_taken:
42314 return ix86_cost->cond_taken_branch_cost;
42315
42316 case cond_branch_not_taken:
42317 return ix86_cost->cond_not_taken_branch_cost;
42318
42319 case vec_perm:
42320 case vec_promote_demote:
42321 return ix86_cost->vec_stmt_cost;
42322
42323 case vec_construct:
42324 elements = TYPE_VECTOR_SUBPARTS (vectype);
42325 return elements / 2 + 1;
42326
42327 default:
42328 gcc_unreachable ();
42329 }
42330 }
42331
42332 /* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
42333 insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
42334 insn every time. */
42335
42336 static GTY(()) rtx vselect_insn;
42337
42338 /* Initialize vselect_insn. */
42339
42340 static void
42341 init_vselect_insn (void)
42342 {
42343 unsigned i;
42344 rtx x;
42345
42346 x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
42347 for (i = 0; i < MAX_VECT_LEN; ++i)
42348 XVECEXP (x, 0, i) = const0_rtx;
42349 x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
42350 const0_rtx), x);
42351 x = gen_rtx_SET (VOIDmode, const0_rtx, x);
42352 start_sequence ();
42353 vselect_insn = emit_insn (x);
42354 end_sequence ();
42355 }
42356
42357 /* Construct (set target (vec_select op0 (parallel perm))) and
42358 return true if that's a valid instruction in the active ISA. */
42359
42360 static bool
42361 expand_vselect (rtx target, rtx op0, const unsigned char *perm,
42362 unsigned nelt, bool testing_p)
42363 {
42364 unsigned int i;
42365 rtx x, save_vconcat;
42366 int icode;
42367
42368 if (vselect_insn == NULL_RTX)
42369 init_vselect_insn ();
42370
42371 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
42372 PUT_NUM_ELEM (XVEC (x, 0), nelt);
42373 for (i = 0; i < nelt; ++i)
42374 XVECEXP (x, 0, i) = GEN_INT (perm[i]);
42375 save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42376 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
42377 PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
42378 SET_DEST (PATTERN (vselect_insn)) = target;
42379 icode = recog_memoized (vselect_insn);
42380
42381 if (icode >= 0 && !testing_p)
42382 emit_insn (copy_rtx (PATTERN (vselect_insn)));
42383
42384 SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
42385 XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
42386 INSN_CODE (vselect_insn) = -1;
42387
42388 return icode >= 0;
42389 }
42390
42391 /* Similar, but generate a vec_concat from op0 and op1 as well. */
42392
42393 static bool
42394 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
42395 const unsigned char *perm, unsigned nelt,
42396 bool testing_p)
42397 {
42398 enum machine_mode v2mode;
42399 rtx x;
42400 bool ok;
42401
42402 if (vselect_insn == NULL_RTX)
42403 init_vselect_insn ();
42404
42405 v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
42406 x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
42407 PUT_MODE (x, v2mode);
42408 XEXP (x, 0) = op0;
42409 XEXP (x, 1) = op1;
42410 ok = expand_vselect (target, x, perm, nelt, testing_p);
42411 XEXP (x, 0) = const0_rtx;
42412 XEXP (x, 1) = const0_rtx;
42413 return ok;
42414 }
42415
42416 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42417 in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
42418
42419 static bool
42420 expand_vec_perm_blend (struct expand_vec_perm_d *d)
42421 {
42422 enum machine_mode vmode = d->vmode;
42423 unsigned i, mask, nelt = d->nelt;
42424 rtx target, op0, op1, x;
42425 rtx rperm[32], vperm;
42426
42427 if (d->one_operand_p)
42428 return false;
42429 if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
42430 ;
42431 else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
42432 ;
42433 else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
42434 ;
42435 else
42436 return false;
42437
42438 /* This is a blend, not a permute. Elements must stay in their
42439 respective lanes. */
42440 for (i = 0; i < nelt; ++i)
42441 {
42442 unsigned e = d->perm[i];
42443 if (!(e == i || e == i + nelt))
42444 return false;
42445 }
42446
42447 if (d->testing_p)
42448 return true;
42449
42450 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
42451 decision should be extracted elsewhere, so that we only try that
42452 sequence once all budget==3 options have been tried. */
42453 target = d->target;
42454 op0 = d->op0;
42455 op1 = d->op1;
42456 mask = 0;
42457
42458 switch (vmode)
42459 {
42460 case V4DFmode:
42461 case V8SFmode:
42462 case V2DFmode:
42463 case V4SFmode:
42464 case V8HImode:
42465 case V8SImode:
42466 for (i = 0; i < nelt; ++i)
42467 mask |= (d->perm[i] >= nelt) << i;
42468 break;
42469
42470 case V2DImode:
42471 for (i = 0; i < 2; ++i)
42472 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
42473 vmode = V8HImode;
42474 goto do_subreg;
42475
42476 case V4SImode:
42477 for (i = 0; i < 4; ++i)
42478 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42479 vmode = V8HImode;
42480 goto do_subreg;
42481
42482 case V16QImode:
42483 /* See if bytes move in pairs so we can use pblendw with
42484 an immediate argument, rather than pblendvb with a vector
42485 argument. */
42486 for (i = 0; i < 16; i += 2)
42487 if (d->perm[i] + 1 != d->perm[i + 1])
42488 {
42489 use_pblendvb:
42490 for (i = 0; i < nelt; ++i)
42491 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
42492
42493 finish_pblendvb:
42494 vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
42495 vperm = force_reg (vmode, vperm);
42496
42497 if (GET_MODE_SIZE (vmode) == 16)
42498 emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
42499 else
42500 emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
42501 if (target != d->target)
42502 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42503 return true;
42504 }
42505
42506 for (i = 0; i < 8; ++i)
42507 mask |= (d->perm[i * 2] >= 16) << i;
42508 vmode = V8HImode;
42509 /* FALLTHRU */
42510
42511 do_subreg:
42512 target = gen_reg_rtx (vmode);
42513 op0 = gen_lowpart (vmode, op0);
42514 op1 = gen_lowpart (vmode, op1);
42515 break;
42516
42517 case V32QImode:
42518 /* See if bytes move in pairs. If not, vpblendvb must be used. */
42519 for (i = 0; i < 32; i += 2)
42520 if (d->perm[i] + 1 != d->perm[i + 1])
42521 goto use_pblendvb;
42522 /* See if bytes move in quadruplets. If yes, vpblendd
42523 with immediate can be used. */
42524 for (i = 0; i < 32; i += 4)
42525 if (d->perm[i] + 2 != d->perm[i + 2])
42526 break;
42527 if (i < 32)
42528 {
42529 /* See if bytes move the same in both lanes. If yes,
42530 vpblendw with immediate can be used. */
42531 for (i = 0; i < 16; i += 2)
42532 if (d->perm[i] + 16 != d->perm[i + 16])
42533 goto use_pblendvb;
42534
42535 /* Use vpblendw. */
42536 for (i = 0; i < 16; ++i)
42537 mask |= (d->perm[i * 2] >= 32) << i;
42538 vmode = V16HImode;
42539 goto do_subreg;
42540 }
42541
42542 /* Use vpblendd. */
42543 for (i = 0; i < 8; ++i)
42544 mask |= (d->perm[i * 4] >= 32) << i;
42545 vmode = V8SImode;
42546 goto do_subreg;
42547
42548 case V16HImode:
42549 /* See if words move in pairs. If yes, vpblendd can be used. */
42550 for (i = 0; i < 16; i += 2)
42551 if (d->perm[i] + 1 != d->perm[i + 1])
42552 break;
42553 if (i < 16)
42554 {
42555 /* See if words move the same in both lanes. If not,
42556 vpblendvb must be used. */
42557 for (i = 0; i < 8; i++)
42558 if (d->perm[i] + 8 != d->perm[i + 8])
42559 {
42560 /* Use vpblendvb. */
42561 for (i = 0; i < 32; ++i)
42562 rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
42563
42564 vmode = V32QImode;
42565 nelt = 32;
42566 target = gen_reg_rtx (vmode);
42567 op0 = gen_lowpart (vmode, op0);
42568 op1 = gen_lowpart (vmode, op1);
42569 goto finish_pblendvb;
42570 }
42571
42572 /* Use vpblendw. */
42573 for (i = 0; i < 16; ++i)
42574 mask |= (d->perm[i] >= 16) << i;
42575 break;
42576 }
42577
42578 /* Use vpblendd. */
42579 for (i = 0; i < 8; ++i)
42580 mask |= (d->perm[i * 2] >= 16) << i;
42581 vmode = V8SImode;
42582 goto do_subreg;
42583
42584 case V4DImode:
42585 /* Use vpblendd. */
42586 for (i = 0; i < 4; ++i)
42587 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
42588 vmode = V8SImode;
42589 goto do_subreg;
42590
42591 default:
42592 gcc_unreachable ();
42593 }
42594
42595 /* This matches five different patterns with the different modes. */
42596 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
42597 x = gen_rtx_SET (VOIDmode, target, x);
42598 emit_insn (x);
42599 if (target != d->target)
42600 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42601
42602 return true;
42603 }
42604
42605 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42606 in terms of the variable form of vpermilps.
42607
42608 Note that we will have already failed the immediate input vpermilps,
42609 which requires that the high and low part shuffle be identical; the
42610 variable form doesn't require that. */
42611
42612 static bool
42613 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
42614 {
42615 rtx rperm[8], vperm;
42616 unsigned i;
42617
42618 if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
42619 return false;
42620
42621 /* We can only permute within the 128-bit lane. */
42622 for (i = 0; i < 8; ++i)
42623 {
42624 unsigned e = d->perm[i];
42625 if (i < 4 ? e >= 4 : e < 4)
42626 return false;
42627 }
42628
42629 if (d->testing_p)
42630 return true;
42631
42632 for (i = 0; i < 8; ++i)
42633 {
42634 unsigned e = d->perm[i];
42635
42636 /* Within each 128-bit lane, the elements of op0 are numbered
42637 from 0 and the elements of op1 are numbered from 4. */
42638 if (e >= 8 + 4)
42639 e -= 8;
42640 else if (e >= 4)
42641 e -= 4;
42642
42643 rperm[i] = GEN_INT (e);
42644 }
42645
42646 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
42647 vperm = force_reg (V8SImode, vperm);
42648 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
42649
42650 return true;
42651 }
42652
42653 /* Return true if permutation D can be performed as VMODE permutation
42654 instead. */
42655
42656 static bool
42657 valid_perm_using_mode_p (enum machine_mode vmode, struct expand_vec_perm_d *d)
42658 {
42659 unsigned int i, j, chunk;
42660
42661 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
42662 || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
42663 || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
42664 return false;
42665
42666 if (GET_MODE_NUNITS (vmode) >= d->nelt)
42667 return true;
42668
42669 chunk = d->nelt / GET_MODE_NUNITS (vmode);
42670 for (i = 0; i < d->nelt; i += chunk)
42671 if (d->perm[i] & (chunk - 1))
42672 return false;
42673 else
42674 for (j = 1; j < chunk; ++j)
42675 if (d->perm[i] + j != d->perm[i + j])
42676 return false;
42677
42678 return true;
42679 }
42680
42681 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42682 in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128. */
42683
42684 static bool
42685 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
42686 {
42687 unsigned i, nelt, eltsz, mask;
42688 unsigned char perm[32];
42689 enum machine_mode vmode = V16QImode;
42690 rtx rperm[32], vperm, target, op0, op1;
42691
42692 nelt = d->nelt;
42693
42694 if (!d->one_operand_p)
42695 {
42696 if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
42697 {
42698 if (TARGET_AVX2
42699 && valid_perm_using_mode_p (V2TImode, d))
42700 {
42701 if (d->testing_p)
42702 return true;
42703
42704 /* Use vperm2i128 insn. The pattern uses
42705 V4DImode instead of V2TImode. */
42706 target = d->target;
42707 if (d->vmode != V4DImode)
42708 target = gen_reg_rtx (V4DImode);
42709 op0 = gen_lowpart (V4DImode, d->op0);
42710 op1 = gen_lowpart (V4DImode, d->op1);
42711 rperm[0]
42712 = GEN_INT (((d->perm[0] & (nelt / 2)) ? 1 : 0)
42713 || ((d->perm[nelt / 2] & (nelt / 2)) ? 2 : 0));
42714 emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
42715 if (target != d->target)
42716 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42717 return true;
42718 }
42719 return false;
42720 }
42721 }
42722 else
42723 {
42724 if (GET_MODE_SIZE (d->vmode) == 16)
42725 {
42726 if (!TARGET_SSSE3)
42727 return false;
42728 }
42729 else if (GET_MODE_SIZE (d->vmode) == 32)
42730 {
42731 if (!TARGET_AVX2)
42732 return false;
42733
42734 /* V4DImode should be already handled through
42735 expand_vselect by vpermq instruction. */
42736 gcc_assert (d->vmode != V4DImode);
42737
42738 vmode = V32QImode;
42739 if (d->vmode == V8SImode
42740 || d->vmode == V16HImode
42741 || d->vmode == V32QImode)
42742 {
42743 /* First see if vpermq can be used for
42744 V8SImode/V16HImode/V32QImode. */
42745 if (valid_perm_using_mode_p (V4DImode, d))
42746 {
42747 for (i = 0; i < 4; i++)
42748 perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
42749 if (d->testing_p)
42750 return true;
42751 target = gen_reg_rtx (V4DImode);
42752 if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
42753 perm, 4, false))
42754 {
42755 emit_move_insn (d->target,
42756 gen_lowpart (d->vmode, target));
42757 return true;
42758 }
42759 return false;
42760 }
42761
42762 /* Next see if vpermd can be used. */
42763 if (valid_perm_using_mode_p (V8SImode, d))
42764 vmode = V8SImode;
42765 }
42766 /* Or if vpermps can be used. */
42767 else if (d->vmode == V8SFmode)
42768 vmode = V8SImode;
42769
42770 if (vmode == V32QImode)
42771 {
42772 /* vpshufb only works intra lanes, it is not
42773 possible to shuffle bytes in between the lanes. */
42774 for (i = 0; i < nelt; ++i)
42775 if ((d->perm[i] ^ i) & (nelt / 2))
42776 return false;
42777 }
42778 }
42779 else
42780 return false;
42781 }
42782
42783 if (d->testing_p)
42784 return true;
42785
42786 if (vmode == V8SImode)
42787 for (i = 0; i < 8; ++i)
42788 rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
42789 else
42790 {
42791 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
42792 if (!d->one_operand_p)
42793 mask = 2 * nelt - 1;
42794 else if (vmode == V16QImode)
42795 mask = nelt - 1;
42796 else
42797 mask = nelt / 2 - 1;
42798
42799 for (i = 0; i < nelt; ++i)
42800 {
42801 unsigned j, e = d->perm[i] & mask;
42802 for (j = 0; j < eltsz; ++j)
42803 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
42804 }
42805 }
42806
42807 vperm = gen_rtx_CONST_VECTOR (vmode,
42808 gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
42809 vperm = force_reg (vmode, vperm);
42810
42811 target = d->target;
42812 if (d->vmode != vmode)
42813 target = gen_reg_rtx (vmode);
42814 op0 = gen_lowpart (vmode, d->op0);
42815 if (d->one_operand_p)
42816 {
42817 if (vmode == V16QImode)
42818 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
42819 else if (vmode == V32QImode)
42820 emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
42821 else if (vmode == V8SFmode)
42822 emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
42823 else
42824 emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
42825 }
42826 else
42827 {
42828 op1 = gen_lowpart (vmode, d->op1);
42829 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
42830 }
42831 if (target != d->target)
42832 emit_move_insn (d->target, gen_lowpart (d->vmode, target));
42833
42834 return true;
42835 }
42836
42837 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
42838 in a single instruction. */
42839
42840 static bool
42841 expand_vec_perm_1 (struct expand_vec_perm_d *d)
42842 {
42843 unsigned i, nelt = d->nelt;
42844 unsigned char perm2[MAX_VECT_LEN];
42845
42846 /* Check plain VEC_SELECT first, because AVX has instructions that could
42847 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
42848 input where SEL+CONCAT may not. */
42849 if (d->one_operand_p)
42850 {
42851 int mask = nelt - 1;
42852 bool identity_perm = true;
42853 bool broadcast_perm = true;
42854
42855 for (i = 0; i < nelt; i++)
42856 {
42857 perm2[i] = d->perm[i] & mask;
42858 if (perm2[i] != i)
42859 identity_perm = false;
42860 if (perm2[i])
42861 broadcast_perm = false;
42862 }
42863
42864 if (identity_perm)
42865 {
42866 if (!d->testing_p)
42867 emit_move_insn (d->target, d->op0);
42868 return true;
42869 }
42870 else if (broadcast_perm && TARGET_AVX2)
42871 {
42872 /* Use vpbroadcast{b,w,d}. */
42873 rtx (*gen) (rtx, rtx) = NULL;
42874 switch (d->vmode)
42875 {
42876 case V32QImode:
42877 gen = gen_avx2_pbroadcastv32qi_1;
42878 break;
42879 case V16HImode:
42880 gen = gen_avx2_pbroadcastv16hi_1;
42881 break;
42882 case V8SImode:
42883 gen = gen_avx2_pbroadcastv8si_1;
42884 break;
42885 case V16QImode:
42886 gen = gen_avx2_pbroadcastv16qi;
42887 break;
42888 case V8HImode:
42889 gen = gen_avx2_pbroadcastv8hi;
42890 break;
42891 case V8SFmode:
42892 gen = gen_avx2_vec_dupv8sf_1;
42893 break;
42894 /* For other modes prefer other shuffles this function creates. */
42895 default: break;
42896 }
42897 if (gen != NULL)
42898 {
42899 if (!d->testing_p)
42900 emit_insn (gen (d->target, d->op0));
42901 return true;
42902 }
42903 }
42904
42905 if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
42906 return true;
42907
42908 /* There are plenty of patterns in sse.md that are written for
42909 SEL+CONCAT and are not replicated for a single op. Perhaps
42910 that should be changed, to avoid the nastiness here. */
42911
42912 /* Recognize interleave style patterns, which means incrementing
42913 every other permutation operand. */
42914 for (i = 0; i < nelt; i += 2)
42915 {
42916 perm2[i] = d->perm[i] & mask;
42917 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
42918 }
42919 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42920 d->testing_p))
42921 return true;
42922
42923 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
42924 if (nelt >= 4)
42925 {
42926 for (i = 0; i < nelt; i += 4)
42927 {
42928 perm2[i + 0] = d->perm[i + 0] & mask;
42929 perm2[i + 1] = d->perm[i + 1] & mask;
42930 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
42931 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
42932 }
42933
42934 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
42935 d->testing_p))
42936 return true;
42937 }
42938 }
42939
42940 /* Finally, try the fully general two operand permute. */
42941 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
42942 d->testing_p))
42943 return true;
42944
42945 /* Recognize interleave style patterns with reversed operands. */
42946 if (!d->one_operand_p)
42947 {
42948 for (i = 0; i < nelt; ++i)
42949 {
42950 unsigned e = d->perm[i];
42951 if (e >= nelt)
42952 e -= nelt;
42953 else
42954 e += nelt;
42955 perm2[i] = e;
42956 }
42957
42958 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
42959 d->testing_p))
42960 return true;
42961 }
42962
42963 /* Try the SSE4.1 blend variable merge instructions. */
42964 if (expand_vec_perm_blend (d))
42965 return true;
42966
42967 /* Try one of the AVX vpermil variable permutations. */
42968 if (expand_vec_perm_vpermil (d))
42969 return true;
42970
42971 /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
42972 vpshufb, vpermd, vpermps or vpermq variable permutation. */
42973 if (expand_vec_perm_pshufb (d))
42974 return true;
42975
42976 /* Try the AVX512F vpermi2 instructions. */
42977 rtx vec[64];
42978 enum machine_mode mode = d->vmode;
42979 if (mode == V8DFmode)
42980 mode = V8DImode;
42981 else if (mode == V16SFmode)
42982 mode = V16SImode;
42983 for (i = 0; i < nelt; ++i)
42984 vec[i] = GEN_INT (d->perm[i]);
42985 rtx mask = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt, vec));
42986 if (ix86_expand_vec_perm_vpermi2 (d->target, d->op0, mask, d->op1))
42987 return true;
42988
42989 return false;
42990 }
42991
42992 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
42993 in terms of a pair of pshuflw + pshufhw instructions. */
42994
42995 static bool
42996 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
42997 {
42998 unsigned char perm2[MAX_VECT_LEN];
42999 unsigned i;
43000 bool ok;
43001
43002 if (d->vmode != V8HImode || !d->one_operand_p)
43003 return false;
43004
43005 /* The two permutations only operate in 64-bit lanes. */
43006 for (i = 0; i < 4; ++i)
43007 if (d->perm[i] >= 4)
43008 return false;
43009 for (i = 4; i < 8; ++i)
43010 if (d->perm[i] < 4)
43011 return false;
43012
43013 if (d->testing_p)
43014 return true;
43015
43016 /* Emit the pshuflw. */
43017 memcpy (perm2, d->perm, 4);
43018 for (i = 4; i < 8; ++i)
43019 perm2[i] = i;
43020 ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
43021 gcc_assert (ok);
43022
43023 /* Emit the pshufhw. */
43024 memcpy (perm2 + 4, d->perm + 4, 4);
43025 for (i = 0; i < 4; ++i)
43026 perm2[i] = i;
43027 ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
43028 gcc_assert (ok);
43029
43030 return true;
43031 }
43032
43033 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43034 the permutation using the SSSE3 palignr instruction. This succeeds
43035 when all of the elements in PERM fit within one vector and we merely
43036 need to shift them down so that a single vector permutation has a
43037 chance to succeed. */
43038
43039 static bool
43040 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
43041 {
43042 unsigned i, nelt = d->nelt;
43043 unsigned min, max;
43044 bool in_order, ok;
43045 rtx shift, target;
43046 struct expand_vec_perm_d dcopy;
43047
43048 /* Even with AVX, palignr only operates on 128-bit vectors. */
43049 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43050 return false;
43051
43052 min = nelt, max = 0;
43053 for (i = 0; i < nelt; ++i)
43054 {
43055 unsigned e = d->perm[i];
43056 if (e < min)
43057 min = e;
43058 if (e > max)
43059 max = e;
43060 }
43061 if (min == 0 || max - min >= nelt)
43062 return false;
43063
43064 /* Given that we have SSSE3, we know we'll be able to implement the
43065 single operand permutation after the palignr with pshufb. */
43066 if (d->testing_p)
43067 return true;
43068
43069 dcopy = *d;
43070 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
43071 target = gen_reg_rtx (TImode);
43072 emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
43073 gen_lowpart (TImode, d->op0), shift));
43074
43075 dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
43076 dcopy.one_operand_p = true;
43077
43078 in_order = true;
43079 for (i = 0; i < nelt; ++i)
43080 {
43081 unsigned e = dcopy.perm[i] - min;
43082 if (e != i)
43083 in_order = false;
43084 dcopy.perm[i] = e;
43085 }
43086
43087 /* Test for the degenerate case where the alignment by itself
43088 produces the desired permutation. */
43089 if (in_order)
43090 {
43091 emit_move_insn (d->target, dcopy.op0);
43092 return true;
43093 }
43094
43095 ok = expand_vec_perm_1 (&dcopy);
43096 gcc_assert (ok);
43097
43098 return ok;
43099 }
43100
43101 static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
43102
43103 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43104 a two vector permutation into a single vector permutation by using
43105 an interleave operation to merge the vectors. */
43106
43107 static bool
43108 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
43109 {
43110 struct expand_vec_perm_d dremap, dfinal;
43111 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
43112 unsigned HOST_WIDE_INT contents;
43113 unsigned char remap[2 * MAX_VECT_LEN];
43114 rtx seq;
43115 bool ok, same_halves = false;
43116
43117 if (GET_MODE_SIZE (d->vmode) == 16)
43118 {
43119 if (d->one_operand_p)
43120 return false;
43121 }
43122 else if (GET_MODE_SIZE (d->vmode) == 32)
43123 {
43124 if (!TARGET_AVX)
43125 return false;
43126 /* For 32-byte modes allow even d->one_operand_p.
43127 The lack of cross-lane shuffling in some instructions
43128 might prevent a single insn shuffle. */
43129 dfinal = *d;
43130 dfinal.testing_p = true;
43131 /* If expand_vec_perm_interleave3 can expand this into
43132 a 3 insn sequence, give up and let it be expanded as
43133 3 insn sequence. While that is one insn longer,
43134 it doesn't need a memory operand and in the common
43135 case that both interleave low and high permutations
43136 with the same operands are adjacent needs 4 insns
43137 for both after CSE. */
43138 if (expand_vec_perm_interleave3 (&dfinal))
43139 return false;
43140 }
43141 else
43142 return false;
43143
43144 /* Examine from whence the elements come. */
43145 contents = 0;
43146 for (i = 0; i < nelt; ++i)
43147 contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
43148
43149 memset (remap, 0xff, sizeof (remap));
43150 dremap = *d;
43151
43152 if (GET_MODE_SIZE (d->vmode) == 16)
43153 {
43154 unsigned HOST_WIDE_INT h1, h2, h3, h4;
43155
43156 /* Split the two input vectors into 4 halves. */
43157 h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
43158 h2 = h1 << nelt2;
43159 h3 = h2 << nelt2;
43160 h4 = h3 << nelt2;
43161
43162 /* If the elements from the low halves use interleave low, and similarly
43163 for interleave high. If the elements are from mis-matched halves, we
43164 can use shufps for V4SF/V4SI or do a DImode shuffle. */
43165 if ((contents & (h1 | h3)) == contents)
43166 {
43167 /* punpckl* */
43168 for (i = 0; i < nelt2; ++i)
43169 {
43170 remap[i] = i * 2;
43171 remap[i + nelt] = i * 2 + 1;
43172 dremap.perm[i * 2] = i;
43173 dremap.perm[i * 2 + 1] = i + nelt;
43174 }
43175 if (!TARGET_SSE2 && d->vmode == V4SImode)
43176 dremap.vmode = V4SFmode;
43177 }
43178 else if ((contents & (h2 | h4)) == contents)
43179 {
43180 /* punpckh* */
43181 for (i = 0; i < nelt2; ++i)
43182 {
43183 remap[i + nelt2] = i * 2;
43184 remap[i + nelt + nelt2] = i * 2 + 1;
43185 dremap.perm[i * 2] = i + nelt2;
43186 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
43187 }
43188 if (!TARGET_SSE2 && d->vmode == V4SImode)
43189 dremap.vmode = V4SFmode;
43190 }
43191 else if ((contents & (h1 | h4)) == contents)
43192 {
43193 /* shufps */
43194 for (i = 0; i < nelt2; ++i)
43195 {
43196 remap[i] = i;
43197 remap[i + nelt + nelt2] = i + nelt2;
43198 dremap.perm[i] = i;
43199 dremap.perm[i + nelt2] = i + nelt + nelt2;
43200 }
43201 if (nelt != 4)
43202 {
43203 /* shufpd */
43204 dremap.vmode = V2DImode;
43205 dremap.nelt = 2;
43206 dremap.perm[0] = 0;
43207 dremap.perm[1] = 3;
43208 }
43209 }
43210 else if ((contents & (h2 | h3)) == contents)
43211 {
43212 /* shufps */
43213 for (i = 0; i < nelt2; ++i)
43214 {
43215 remap[i + nelt2] = i;
43216 remap[i + nelt] = i + nelt2;
43217 dremap.perm[i] = i + nelt2;
43218 dremap.perm[i + nelt2] = i + nelt;
43219 }
43220 if (nelt != 4)
43221 {
43222 /* shufpd */
43223 dremap.vmode = V2DImode;
43224 dremap.nelt = 2;
43225 dremap.perm[0] = 1;
43226 dremap.perm[1] = 2;
43227 }
43228 }
43229 else
43230 return false;
43231 }
43232 else
43233 {
43234 unsigned int nelt4 = nelt / 4, nzcnt = 0;
43235 unsigned HOST_WIDE_INT q[8];
43236 unsigned int nonzero_halves[4];
43237
43238 /* Split the two input vectors into 8 quarters. */
43239 q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
43240 for (i = 1; i < 8; ++i)
43241 q[i] = q[0] << (nelt4 * i);
43242 for (i = 0; i < 4; ++i)
43243 if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
43244 {
43245 nonzero_halves[nzcnt] = i;
43246 ++nzcnt;
43247 }
43248
43249 if (nzcnt == 1)
43250 {
43251 gcc_assert (d->one_operand_p);
43252 nonzero_halves[1] = nonzero_halves[0];
43253 same_halves = true;
43254 }
43255 else if (d->one_operand_p)
43256 {
43257 gcc_assert (nonzero_halves[0] == 0);
43258 gcc_assert (nonzero_halves[1] == 1);
43259 }
43260
43261 if (nzcnt <= 2)
43262 {
43263 if (d->perm[0] / nelt2 == nonzero_halves[1])
43264 {
43265 /* Attempt to increase the likelihood that dfinal
43266 shuffle will be intra-lane. */
43267 char tmph = nonzero_halves[0];
43268 nonzero_halves[0] = nonzero_halves[1];
43269 nonzero_halves[1] = tmph;
43270 }
43271
43272 /* vperm2f128 or vperm2i128. */
43273 for (i = 0; i < nelt2; ++i)
43274 {
43275 remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
43276 remap[i + nonzero_halves[0] * nelt2] = i;
43277 dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
43278 dremap.perm[i] = i + nonzero_halves[0] * nelt2;
43279 }
43280
43281 if (d->vmode != V8SFmode
43282 && d->vmode != V4DFmode
43283 && d->vmode != V8SImode)
43284 {
43285 dremap.vmode = V8SImode;
43286 dremap.nelt = 8;
43287 for (i = 0; i < 4; ++i)
43288 {
43289 dremap.perm[i] = i + nonzero_halves[0] * 4;
43290 dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
43291 }
43292 }
43293 }
43294 else if (d->one_operand_p)
43295 return false;
43296 else if (TARGET_AVX2
43297 && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
43298 {
43299 /* vpunpckl* */
43300 for (i = 0; i < nelt4; ++i)
43301 {
43302 remap[i] = i * 2;
43303 remap[i + nelt] = i * 2 + 1;
43304 remap[i + nelt2] = i * 2 + nelt2;
43305 remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
43306 dremap.perm[i * 2] = i;
43307 dremap.perm[i * 2 + 1] = i + nelt;
43308 dremap.perm[i * 2 + nelt2] = i + nelt2;
43309 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
43310 }
43311 }
43312 else if (TARGET_AVX2
43313 && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
43314 {
43315 /* vpunpckh* */
43316 for (i = 0; i < nelt4; ++i)
43317 {
43318 remap[i + nelt4] = i * 2;
43319 remap[i + nelt + nelt4] = i * 2 + 1;
43320 remap[i + nelt2 + nelt4] = i * 2 + nelt2;
43321 remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
43322 dremap.perm[i * 2] = i + nelt4;
43323 dremap.perm[i * 2 + 1] = i + nelt + nelt4;
43324 dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
43325 dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
43326 }
43327 }
43328 else
43329 return false;
43330 }
43331
43332 /* Use the remapping array set up above to move the elements from their
43333 swizzled locations into their final destinations. */
43334 dfinal = *d;
43335 for (i = 0; i < nelt; ++i)
43336 {
43337 unsigned e = remap[d->perm[i]];
43338 gcc_assert (e < nelt);
43339 /* If same_halves is true, both halves of the remapped vector are the
43340 same. Avoid cross-lane accesses if possible. */
43341 if (same_halves && i >= nelt2)
43342 {
43343 gcc_assert (e < nelt2);
43344 dfinal.perm[i] = e + nelt2;
43345 }
43346 else
43347 dfinal.perm[i] = e;
43348 }
43349 if (!d->testing_p)
43350 {
43351 dremap.target = gen_reg_rtx (dremap.vmode);
43352 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43353 }
43354 dfinal.op1 = dfinal.op0;
43355 dfinal.one_operand_p = true;
43356
43357 /* Test if the final remap can be done with a single insn. For V4SFmode or
43358 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
43359 start_sequence ();
43360 ok = expand_vec_perm_1 (&dfinal);
43361 seq = get_insns ();
43362 end_sequence ();
43363
43364 if (!ok)
43365 return false;
43366
43367 if (d->testing_p)
43368 return true;
43369
43370 if (dremap.vmode != dfinal.vmode)
43371 {
43372 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
43373 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
43374 }
43375
43376 ok = expand_vec_perm_1 (&dremap);
43377 gcc_assert (ok);
43378
43379 emit_insn (seq);
43380 return true;
43381 }
43382
43383 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43384 a single vector cross-lane permutation into vpermq followed
43385 by any of the single insn permutations. */
43386
43387 static bool
43388 expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
43389 {
43390 struct expand_vec_perm_d dremap, dfinal;
43391 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
43392 unsigned contents[2];
43393 bool ok;
43394
43395 if (!(TARGET_AVX2
43396 && (d->vmode == V32QImode || d->vmode == V16HImode)
43397 && d->one_operand_p))
43398 return false;
43399
43400 contents[0] = 0;
43401 contents[1] = 0;
43402 for (i = 0; i < nelt2; ++i)
43403 {
43404 contents[0] |= 1u << (d->perm[i] / nelt4);
43405 contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
43406 }
43407
43408 for (i = 0; i < 2; ++i)
43409 {
43410 unsigned int cnt = 0;
43411 for (j = 0; j < 4; ++j)
43412 if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
43413 return false;
43414 }
43415
43416 if (d->testing_p)
43417 return true;
43418
43419 dremap = *d;
43420 dremap.vmode = V4DImode;
43421 dremap.nelt = 4;
43422 dremap.target = gen_reg_rtx (V4DImode);
43423 dremap.op0 = gen_lowpart (V4DImode, d->op0);
43424 dremap.op1 = dremap.op0;
43425 dremap.one_operand_p = true;
43426 for (i = 0; i < 2; ++i)
43427 {
43428 unsigned int cnt = 0;
43429 for (j = 0; j < 4; ++j)
43430 if ((contents[i] & (1u << j)) != 0)
43431 dremap.perm[2 * i + cnt++] = j;
43432 for (; cnt < 2; ++cnt)
43433 dremap.perm[2 * i + cnt] = 0;
43434 }
43435
43436 dfinal = *d;
43437 dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
43438 dfinal.op1 = dfinal.op0;
43439 dfinal.one_operand_p = true;
43440 for (i = 0, j = 0; i < nelt; ++i)
43441 {
43442 if (i == nelt2)
43443 j = 2;
43444 dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
43445 if ((d->perm[i] / nelt4) == dremap.perm[j])
43446 ;
43447 else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
43448 dfinal.perm[i] |= nelt4;
43449 else
43450 gcc_unreachable ();
43451 }
43452
43453 ok = expand_vec_perm_1 (&dremap);
43454 gcc_assert (ok);
43455
43456 ok = expand_vec_perm_1 (&dfinal);
43457 gcc_assert (ok);
43458
43459 return true;
43460 }
43461
43462 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to expand
43463 a vector permutation using two instructions, vperm2f128 resp.
43464 vperm2i128 followed by any single in-lane permutation. */
43465
43466 static bool
43467 expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
43468 {
43469 struct expand_vec_perm_d dfirst, dsecond;
43470 unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
43471 bool ok;
43472
43473 if (!TARGET_AVX
43474 || GET_MODE_SIZE (d->vmode) != 32
43475 || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
43476 return false;
43477
43478 dsecond = *d;
43479 dsecond.one_operand_p = false;
43480 dsecond.testing_p = true;
43481
43482 /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
43483 immediate. For perm < 16 the second permutation uses
43484 d->op0 as first operand, for perm >= 16 it uses d->op1
43485 as first operand. The second operand is the result of
43486 vperm2[fi]128. */
43487 for (perm = 0; perm < 32; perm++)
43488 {
43489 /* Ignore permutations which do not move anything cross-lane. */
43490 if (perm < 16)
43491 {
43492 /* The second shuffle for e.g. V4DFmode has
43493 0123 and ABCD operands.
43494 Ignore AB23, as 23 is already in the second lane
43495 of the first operand. */
43496 if ((perm & 0xc) == (1 << 2)) continue;
43497 /* And 01CD, as 01 is in the first lane of the first
43498 operand. */
43499 if ((perm & 3) == 0) continue;
43500 /* And 4567, as then the vperm2[fi]128 doesn't change
43501 anything on the original 4567 second operand. */
43502 if ((perm & 0xf) == ((3 << 2) | 2)) continue;
43503 }
43504 else
43505 {
43506 /* The second shuffle for e.g. V4DFmode has
43507 4567 and ABCD operands.
43508 Ignore AB67, as 67 is already in the second lane
43509 of the first operand. */
43510 if ((perm & 0xc) == (3 << 2)) continue;
43511 /* And 45CD, as 45 is in the first lane of the first
43512 operand. */
43513 if ((perm & 3) == 2) continue;
43514 /* And 0123, as then the vperm2[fi]128 doesn't change
43515 anything on the original 0123 first operand. */
43516 if ((perm & 0xf) == (1 << 2)) continue;
43517 }
43518
43519 for (i = 0; i < nelt; i++)
43520 {
43521 j = d->perm[i] / nelt2;
43522 if (j == ((perm >> (2 * (i >= nelt2))) & 3))
43523 dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
43524 else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
43525 dsecond.perm[i] = d->perm[i] & (nelt - 1);
43526 else
43527 break;
43528 }
43529
43530 if (i == nelt)
43531 {
43532 start_sequence ();
43533 ok = expand_vec_perm_1 (&dsecond);
43534 end_sequence ();
43535 }
43536 else
43537 ok = false;
43538
43539 if (ok)
43540 {
43541 if (d->testing_p)
43542 return true;
43543
43544 /* Found a usable second shuffle. dfirst will be
43545 vperm2f128 on d->op0 and d->op1. */
43546 dsecond.testing_p = false;
43547 dfirst = *d;
43548 dfirst.target = gen_reg_rtx (d->vmode);
43549 for (i = 0; i < nelt; i++)
43550 dfirst.perm[i] = (i & (nelt2 - 1))
43551 + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
43552
43553 ok = expand_vec_perm_1 (&dfirst);
43554 gcc_assert (ok);
43555
43556 /* And dsecond is some single insn shuffle, taking
43557 d->op0 and result of vperm2f128 (if perm < 16) or
43558 d->op1 and result of vperm2f128 (otherwise). */
43559 dsecond.op1 = dfirst.target;
43560 if (perm >= 16)
43561 dsecond.op0 = dfirst.op1;
43562
43563 ok = expand_vec_perm_1 (&dsecond);
43564 gcc_assert (ok);
43565
43566 return true;
43567 }
43568
43569 /* For one operand, the only useful vperm2f128 permutation is 0x10. */
43570 if (d->one_operand_p)
43571 return false;
43572 }
43573
43574 return false;
43575 }
43576
43577 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
43578 a two vector permutation using 2 intra-lane interleave insns
43579 and cross-lane shuffle for 32-byte vectors. */
43580
43581 static bool
43582 expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
43583 {
43584 unsigned i, nelt;
43585 rtx (*gen) (rtx, rtx, rtx);
43586
43587 if (d->one_operand_p)
43588 return false;
43589 if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
43590 ;
43591 else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
43592 ;
43593 else
43594 return false;
43595
43596 nelt = d->nelt;
43597 if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
43598 return false;
43599 for (i = 0; i < nelt; i += 2)
43600 if (d->perm[i] != d->perm[0] + i / 2
43601 || d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
43602 return false;
43603
43604 if (d->testing_p)
43605 return true;
43606
43607 switch (d->vmode)
43608 {
43609 case V32QImode:
43610 if (d->perm[0])
43611 gen = gen_vec_interleave_highv32qi;
43612 else
43613 gen = gen_vec_interleave_lowv32qi;
43614 break;
43615 case V16HImode:
43616 if (d->perm[0])
43617 gen = gen_vec_interleave_highv16hi;
43618 else
43619 gen = gen_vec_interleave_lowv16hi;
43620 break;
43621 case V8SImode:
43622 if (d->perm[0])
43623 gen = gen_vec_interleave_highv8si;
43624 else
43625 gen = gen_vec_interleave_lowv8si;
43626 break;
43627 case V4DImode:
43628 if (d->perm[0])
43629 gen = gen_vec_interleave_highv4di;
43630 else
43631 gen = gen_vec_interleave_lowv4di;
43632 break;
43633 case V8SFmode:
43634 if (d->perm[0])
43635 gen = gen_vec_interleave_highv8sf;
43636 else
43637 gen = gen_vec_interleave_lowv8sf;
43638 break;
43639 case V4DFmode:
43640 if (d->perm[0])
43641 gen = gen_vec_interleave_highv4df;
43642 else
43643 gen = gen_vec_interleave_lowv4df;
43644 break;
43645 default:
43646 gcc_unreachable ();
43647 }
43648
43649 emit_insn (gen (d->target, d->op0, d->op1));
43650 return true;
43651 }
43652
43653 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement
43654 a single vector permutation using a single intra-lane vector
43655 permutation, vperm2f128 swapping the lanes and vblend* insn blending
43656 the non-swapped and swapped vectors together. */
43657
43658 static bool
43659 expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
43660 {
43661 struct expand_vec_perm_d dfirst, dsecond;
43662 unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
43663 rtx seq;
43664 bool ok;
43665 rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
43666
43667 if (!TARGET_AVX
43668 || TARGET_AVX2
43669 || (d->vmode != V8SFmode && d->vmode != V4DFmode)
43670 || !d->one_operand_p)
43671 return false;
43672
43673 dfirst = *d;
43674 for (i = 0; i < nelt; i++)
43675 dfirst.perm[i] = 0xff;
43676 for (i = 0, msk = 0; i < nelt; i++)
43677 {
43678 j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
43679 if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
43680 return false;
43681 dfirst.perm[j] = d->perm[i];
43682 if (j != i)
43683 msk |= (1 << i);
43684 }
43685 for (i = 0; i < nelt; i++)
43686 if (dfirst.perm[i] == 0xff)
43687 dfirst.perm[i] = i;
43688
43689 if (!d->testing_p)
43690 dfirst.target = gen_reg_rtx (dfirst.vmode);
43691
43692 start_sequence ();
43693 ok = expand_vec_perm_1 (&dfirst);
43694 seq = get_insns ();
43695 end_sequence ();
43696
43697 if (!ok)
43698 return false;
43699
43700 if (d->testing_p)
43701 return true;
43702
43703 emit_insn (seq);
43704
43705 dsecond = *d;
43706 dsecond.op0 = dfirst.target;
43707 dsecond.op1 = dfirst.target;
43708 dsecond.one_operand_p = true;
43709 dsecond.target = gen_reg_rtx (dsecond.vmode);
43710 for (i = 0; i < nelt; i++)
43711 dsecond.perm[i] = i ^ nelt2;
43712
43713 ok = expand_vec_perm_1 (&dsecond);
43714 gcc_assert (ok);
43715
43716 blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
43717 emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
43718 return true;
43719 }
43720
43721 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement a V4DF
43722 permutation using two vperm2f128, followed by a vshufpd insn blending
43723 the two vectors together. */
43724
43725 static bool
43726 expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
43727 {
43728 struct expand_vec_perm_d dfirst, dsecond, dthird;
43729 bool ok;
43730
43731 if (!TARGET_AVX || (d->vmode != V4DFmode))
43732 return false;
43733
43734 if (d->testing_p)
43735 return true;
43736
43737 dfirst = *d;
43738 dsecond = *d;
43739 dthird = *d;
43740
43741 dfirst.perm[0] = (d->perm[0] & ~1);
43742 dfirst.perm[1] = (d->perm[0] & ~1) + 1;
43743 dfirst.perm[2] = (d->perm[2] & ~1);
43744 dfirst.perm[3] = (d->perm[2] & ~1) + 1;
43745 dsecond.perm[0] = (d->perm[1] & ~1);
43746 dsecond.perm[1] = (d->perm[1] & ~1) + 1;
43747 dsecond.perm[2] = (d->perm[3] & ~1);
43748 dsecond.perm[3] = (d->perm[3] & ~1) + 1;
43749 dthird.perm[0] = (d->perm[0] % 2);
43750 dthird.perm[1] = (d->perm[1] % 2) + 4;
43751 dthird.perm[2] = (d->perm[2] % 2) + 2;
43752 dthird.perm[3] = (d->perm[3] % 2) + 6;
43753
43754 dfirst.target = gen_reg_rtx (dfirst.vmode);
43755 dsecond.target = gen_reg_rtx (dsecond.vmode);
43756 dthird.op0 = dfirst.target;
43757 dthird.op1 = dsecond.target;
43758 dthird.one_operand_p = false;
43759
43760 canonicalize_perm (&dfirst);
43761 canonicalize_perm (&dsecond);
43762
43763 ok = expand_vec_perm_1 (&dfirst)
43764 && expand_vec_perm_1 (&dsecond)
43765 && expand_vec_perm_1 (&dthird);
43766
43767 gcc_assert (ok);
43768
43769 return true;
43770 }
43771
43772 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
43773 permutation with two pshufb insns and an ior. We should have already
43774 failed all two instruction sequences. */
43775
43776 static bool
43777 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
43778 {
43779 rtx rperm[2][16], vperm, l, h, op, m128;
43780 unsigned int i, nelt, eltsz;
43781
43782 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
43783 return false;
43784 gcc_assert (!d->one_operand_p);
43785
43786 if (d->testing_p)
43787 return true;
43788
43789 nelt = d->nelt;
43790 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43791
43792 /* Generate two permutation masks. If the required element is within
43793 the given vector it is shuffled into the proper lane. If the required
43794 element is in the other vector, force a zero into the lane by setting
43795 bit 7 in the permutation mask. */
43796 m128 = GEN_INT (-128);
43797 for (i = 0; i < nelt; ++i)
43798 {
43799 unsigned j, e = d->perm[i];
43800 unsigned which = (e >= nelt);
43801 if (e >= nelt)
43802 e -= nelt;
43803
43804 for (j = 0; j < eltsz; ++j)
43805 {
43806 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
43807 rperm[1-which][i*eltsz + j] = m128;
43808 }
43809 }
43810
43811 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
43812 vperm = force_reg (V16QImode, vperm);
43813
43814 l = gen_reg_rtx (V16QImode);
43815 op = gen_lowpart (V16QImode, d->op0);
43816 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
43817
43818 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
43819 vperm = force_reg (V16QImode, vperm);
43820
43821 h = gen_reg_rtx (V16QImode);
43822 op = gen_lowpart (V16QImode, d->op1);
43823 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
43824
43825 op = d->target;
43826 if (d->vmode != V16QImode)
43827 op = gen_reg_rtx (V16QImode);
43828 emit_insn (gen_iorv16qi3 (op, l, h));
43829 if (op != d->target)
43830 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43831
43832 return true;
43833 }
43834
43835 /* Implement arbitrary permutation of one V32QImode and V16QImode operand
43836 with two vpshufb insns, vpermq and vpor. We should have already failed
43837 all two or three instruction sequences. */
43838
43839 static bool
43840 expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
43841 {
43842 rtx rperm[2][32], vperm, l, h, hp, op, m128;
43843 unsigned int i, nelt, eltsz;
43844
43845 if (!TARGET_AVX2
43846 || !d->one_operand_p
43847 || (d->vmode != V32QImode && d->vmode != V16HImode))
43848 return false;
43849
43850 if (d->testing_p)
43851 return true;
43852
43853 nelt = d->nelt;
43854 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43855
43856 /* Generate two permutation masks. If the required element is within
43857 the same lane, it is shuffled in. If the required element from the
43858 other lane, force a zero by setting bit 7 in the permutation mask.
43859 In the other mask the mask has non-negative elements if element
43860 is requested from the other lane, but also moved to the other lane,
43861 so that the result of vpshufb can have the two V2TImode halves
43862 swapped. */
43863 m128 = GEN_INT (-128);
43864 for (i = 0; i < nelt; ++i)
43865 {
43866 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43867 unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
43868
43869 for (j = 0; j < eltsz; ++j)
43870 {
43871 rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
43872 rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
43873 }
43874 }
43875
43876 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43877 vperm = force_reg (V32QImode, vperm);
43878
43879 h = gen_reg_rtx (V32QImode);
43880 op = gen_lowpart (V32QImode, d->op0);
43881 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43882
43883 /* Swap the 128-byte lanes of h into hp. */
43884 hp = gen_reg_rtx (V4DImode);
43885 op = gen_lowpart (V4DImode, h);
43886 emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
43887 const1_rtx));
43888
43889 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43890 vperm = force_reg (V32QImode, vperm);
43891
43892 l = gen_reg_rtx (V32QImode);
43893 op = gen_lowpart (V32QImode, d->op0);
43894 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43895
43896 op = d->target;
43897 if (d->vmode != V32QImode)
43898 op = gen_reg_rtx (V32QImode);
43899 emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
43900 if (op != d->target)
43901 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43902
43903 return true;
43904 }
43905
43906 /* A subroutine of expand_vec_perm_even_odd_1. Implement extract-even
43907 and extract-odd permutations of two V32QImode and V16QImode operand
43908 with two vpshufb insns, vpor and vpermq. We should have already
43909 failed all two or three instruction sequences. */
43910
43911 static bool
43912 expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
43913 {
43914 rtx rperm[2][32], vperm, l, h, ior, op, m128;
43915 unsigned int i, nelt, eltsz;
43916
43917 if (!TARGET_AVX2
43918 || d->one_operand_p
43919 || (d->vmode != V32QImode && d->vmode != V16HImode))
43920 return false;
43921
43922 for (i = 0; i < d->nelt; ++i)
43923 if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
43924 return false;
43925
43926 if (d->testing_p)
43927 return true;
43928
43929 nelt = d->nelt;
43930 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
43931
43932 /* Generate two permutation masks. In the first permutation mask
43933 the first quarter will contain indexes for the first half
43934 of the op0, the second quarter will contain bit 7 set, third quarter
43935 will contain indexes for the second half of the op0 and the
43936 last quarter bit 7 set. In the second permutation mask
43937 the first quarter will contain bit 7 set, the second quarter
43938 indexes for the first half of the op1, the third quarter bit 7 set
43939 and last quarter indexes for the second half of the op1.
43940 I.e. the first mask e.g. for V32QImode extract even will be:
43941 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
43942 (all values masked with 0xf except for -128) and second mask
43943 for extract even will be
43944 -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe. */
43945 m128 = GEN_INT (-128);
43946 for (i = 0; i < nelt; ++i)
43947 {
43948 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
43949 unsigned which = d->perm[i] >= nelt;
43950 unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
43951
43952 for (j = 0; j < eltsz; ++j)
43953 {
43954 rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
43955 rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
43956 }
43957 }
43958
43959 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
43960 vperm = force_reg (V32QImode, vperm);
43961
43962 l = gen_reg_rtx (V32QImode);
43963 op = gen_lowpart (V32QImode, d->op0);
43964 emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
43965
43966 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
43967 vperm = force_reg (V32QImode, vperm);
43968
43969 h = gen_reg_rtx (V32QImode);
43970 op = gen_lowpart (V32QImode, d->op1);
43971 emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
43972
43973 ior = gen_reg_rtx (V32QImode);
43974 emit_insn (gen_iorv32qi3 (ior, l, h));
43975
43976 /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation. */
43977 op = gen_reg_rtx (V4DImode);
43978 ior = gen_lowpart (V4DImode, ior);
43979 emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
43980 const1_rtx, GEN_INT (3)));
43981 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
43982
43983 return true;
43984 }
43985
43986 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
43987 and extract-odd permutations. */
43988
43989 static bool
43990 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
43991 {
43992 rtx t1, t2, t3, t4, t5;
43993
43994 switch (d->vmode)
43995 {
43996 case V4DFmode:
43997 if (d->testing_p)
43998 break;
43999 t1 = gen_reg_rtx (V4DFmode);
44000 t2 = gen_reg_rtx (V4DFmode);
44001
44002 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44003 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
44004 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
44005
44006 /* Now an unpck[lh]pd will produce the result required. */
44007 if (odd)
44008 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
44009 else
44010 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
44011 emit_insn (t3);
44012 break;
44013
44014 case V8SFmode:
44015 {
44016 int mask = odd ? 0xdd : 0x88;
44017
44018 if (d->testing_p)
44019 break;
44020 t1 = gen_reg_rtx (V8SFmode);
44021 t2 = gen_reg_rtx (V8SFmode);
44022 t3 = gen_reg_rtx (V8SFmode);
44023
44024 /* Shuffle within the 128-bit lanes to produce:
44025 { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }. */
44026 emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
44027 GEN_INT (mask)));
44028
44029 /* Shuffle the lanes around to produce:
44030 { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }. */
44031 emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
44032 GEN_INT (0x3)));
44033
44034 /* Shuffle within the 128-bit lanes to produce:
44035 { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }. */
44036 emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
44037
44038 /* Shuffle within the 128-bit lanes to produce:
44039 { 8 a c e c e 8 a } | { 9 b d f d f 9 b }. */
44040 emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
44041
44042 /* Shuffle the lanes around to produce:
44043 { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }. */
44044 emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
44045 GEN_INT (0x20)));
44046 }
44047 break;
44048
44049 case V2DFmode:
44050 case V4SFmode:
44051 case V2DImode:
44052 case V4SImode:
44053 /* These are always directly implementable by expand_vec_perm_1. */
44054 gcc_unreachable ();
44055
44056 case V8HImode:
44057 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44058 return expand_vec_perm_pshufb2 (d);
44059 else
44060 {
44061 if (d->testing_p)
44062 break;
44063 /* We need 2*log2(N)-1 operations to achieve odd/even
44064 with interleave. */
44065 t1 = gen_reg_rtx (V8HImode);
44066 t2 = gen_reg_rtx (V8HImode);
44067 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
44068 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
44069 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
44070 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
44071 if (odd)
44072 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
44073 else
44074 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
44075 emit_insn (t3);
44076 }
44077 break;
44078
44079 case V16QImode:
44080 if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
44081 return expand_vec_perm_pshufb2 (d);
44082 else
44083 {
44084 if (d->testing_p)
44085 break;
44086 t1 = gen_reg_rtx (V16QImode);
44087 t2 = gen_reg_rtx (V16QImode);
44088 t3 = gen_reg_rtx (V16QImode);
44089 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
44090 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
44091 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
44092 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
44093 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
44094 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
44095 if (odd)
44096 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
44097 else
44098 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
44099 emit_insn (t3);
44100 }
44101 break;
44102
44103 case V16HImode:
44104 case V32QImode:
44105 return expand_vec_perm_vpshufb2_vpermq_even_odd (d);
44106
44107 case V4DImode:
44108 if (!TARGET_AVX2)
44109 {
44110 struct expand_vec_perm_d d_copy = *d;
44111 d_copy.vmode = V4DFmode;
44112 if (d->testing_p)
44113 d_copy.target = gen_lowpart (V4DFmode, d->target);
44114 else
44115 d_copy.target = gen_reg_rtx (V4DFmode);
44116 d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
44117 d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
44118 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44119 {
44120 if (!d->testing_p)
44121 emit_move_insn (d->target,
44122 gen_lowpart (V4DImode, d_copy.target));
44123 return true;
44124 }
44125 return false;
44126 }
44127
44128 if (d->testing_p)
44129 break;
44130
44131 t1 = gen_reg_rtx (V4DImode);
44132 t2 = gen_reg_rtx (V4DImode);
44133
44134 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
44135 emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
44136 emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
44137
44138 /* Now an vpunpck[lh]qdq will produce the result required. */
44139 if (odd)
44140 t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
44141 else
44142 t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
44143 emit_insn (t3);
44144 break;
44145
44146 case V8SImode:
44147 if (!TARGET_AVX2)
44148 {
44149 struct expand_vec_perm_d d_copy = *d;
44150 d_copy.vmode = V8SFmode;
44151 if (d->testing_p)
44152 d_copy.target = gen_lowpart (V8SFmode, d->target);
44153 else
44154 d_copy.target = gen_reg_rtx (V8SFmode);
44155 d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
44156 d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
44157 if (expand_vec_perm_even_odd_1 (&d_copy, odd))
44158 {
44159 if (!d->testing_p)
44160 emit_move_insn (d->target,
44161 gen_lowpart (V8SImode, d_copy.target));
44162 return true;
44163 }
44164 return false;
44165 }
44166
44167 if (d->testing_p)
44168 break;
44169
44170 t1 = gen_reg_rtx (V8SImode);
44171 t2 = gen_reg_rtx (V8SImode);
44172 t3 = gen_reg_rtx (V4DImode);
44173 t4 = gen_reg_rtx (V4DImode);
44174 t5 = gen_reg_rtx (V4DImode);
44175
44176 /* Shuffle the lanes around into
44177 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }. */
44178 emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
44179 gen_lowpart (V4DImode, d->op1),
44180 GEN_INT (0x20)));
44181 emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
44182 gen_lowpart (V4DImode, d->op1),
44183 GEN_INT (0x31)));
44184
44185 /* Swap the 2nd and 3rd position in each lane into
44186 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
44187 emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
44188 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44189 emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
44190 GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
44191
44192 /* Now an vpunpck[lh]qdq will produce
44193 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }. */
44194 if (odd)
44195 t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
44196 gen_lowpart (V4DImode, t2));
44197 else
44198 t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
44199 gen_lowpart (V4DImode, t2));
44200 emit_insn (t3);
44201 emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
44202 break;
44203
44204 default:
44205 gcc_unreachable ();
44206 }
44207
44208 return true;
44209 }
44210
44211 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44212 extract-even and extract-odd permutations. */
44213
44214 static bool
44215 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
44216 {
44217 unsigned i, odd, nelt = d->nelt;
44218
44219 odd = d->perm[0];
44220 if (odd != 0 && odd != 1)
44221 return false;
44222
44223 for (i = 1; i < nelt; ++i)
44224 if (d->perm[i] != 2 * i + odd)
44225 return false;
44226
44227 return expand_vec_perm_even_odd_1 (d, odd);
44228 }
44229
44230 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
44231 permutations. We assume that expand_vec_perm_1 has already failed. */
44232
44233 static bool
44234 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
44235 {
44236 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
44237 enum machine_mode vmode = d->vmode;
44238 unsigned char perm2[4];
44239 rtx op0 = d->op0, dest;
44240 bool ok;
44241
44242 switch (vmode)
44243 {
44244 case V4DFmode:
44245 case V8SFmode:
44246 /* These are special-cased in sse.md so that we can optionally
44247 use the vbroadcast instruction. They expand to two insns
44248 if the input happens to be in a register. */
44249 gcc_unreachable ();
44250
44251 case V2DFmode:
44252 case V2DImode:
44253 case V4SFmode:
44254 case V4SImode:
44255 /* These are always implementable using standard shuffle patterns. */
44256 gcc_unreachable ();
44257
44258 case V8HImode:
44259 case V16QImode:
44260 /* These can be implemented via interleave. We save one insn by
44261 stopping once we have promoted to V4SImode and then use pshufd. */
44262 if (d->testing_p)
44263 return true;
44264 do
44265 {
44266 rtx dest;
44267 rtx (*gen) (rtx, rtx, rtx)
44268 = vmode == V16QImode ? gen_vec_interleave_lowv16qi
44269 : gen_vec_interleave_lowv8hi;
44270
44271 if (elt >= nelt2)
44272 {
44273 gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
44274 : gen_vec_interleave_highv8hi;
44275 elt -= nelt2;
44276 }
44277 nelt2 /= 2;
44278
44279 dest = gen_reg_rtx (vmode);
44280 emit_insn (gen (dest, op0, op0));
44281 vmode = get_mode_wider_vector (vmode);
44282 op0 = gen_lowpart (vmode, dest);
44283 }
44284 while (vmode != V4SImode);
44285
44286 memset (perm2, elt, 4);
44287 dest = gen_reg_rtx (V4SImode);
44288 ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
44289 gcc_assert (ok);
44290 if (!d->testing_p)
44291 emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
44292 return true;
44293
44294 case V32QImode:
44295 case V16HImode:
44296 case V8SImode:
44297 case V4DImode:
44298 /* For AVX2 broadcasts of the first element vpbroadcast* or
44299 vpermq should be used by expand_vec_perm_1. */
44300 gcc_assert (!TARGET_AVX2 || d->perm[0]);
44301 return false;
44302
44303 default:
44304 gcc_unreachable ();
44305 }
44306 }
44307
44308 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
44309 broadcast permutations. */
44310
44311 static bool
44312 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
44313 {
44314 unsigned i, elt, nelt = d->nelt;
44315
44316 if (!d->one_operand_p)
44317 return false;
44318
44319 elt = d->perm[0];
44320 for (i = 1; i < nelt; ++i)
44321 if (d->perm[i] != elt)
44322 return false;
44323
44324 return expand_vec_perm_broadcast_1 (d);
44325 }
44326
44327 /* Implement arbitrary permutation of two V32QImode and V16QImode operands
44328 with 4 vpshufb insns, 2 vpermq and 3 vpor. We should have already failed
44329 all the shorter instruction sequences. */
44330
44331 static bool
44332 expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
44333 {
44334 rtx rperm[4][32], vperm, l[2], h[2], op, m128;
44335 unsigned int i, nelt, eltsz;
44336 bool used[4];
44337
44338 if (!TARGET_AVX2
44339 || d->one_operand_p
44340 || (d->vmode != V32QImode && d->vmode != V16HImode))
44341 return false;
44342
44343 if (d->testing_p)
44344 return true;
44345
44346 nelt = d->nelt;
44347 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
44348
44349 /* Generate 4 permutation masks. If the required element is within
44350 the same lane, it is shuffled in. If the required element from the
44351 other lane, force a zero by setting bit 7 in the permutation mask.
44352 In the other mask the mask has non-negative elements if element
44353 is requested from the other lane, but also moved to the other lane,
44354 so that the result of vpshufb can have the two V2TImode halves
44355 swapped. */
44356 m128 = GEN_INT (-128);
44357 for (i = 0; i < 32; ++i)
44358 {
44359 rperm[0][i] = m128;
44360 rperm[1][i] = m128;
44361 rperm[2][i] = m128;
44362 rperm[3][i] = m128;
44363 }
44364 used[0] = false;
44365 used[1] = false;
44366 used[2] = false;
44367 used[3] = false;
44368 for (i = 0; i < nelt; ++i)
44369 {
44370 unsigned j, e = d->perm[i] & (nelt / 2 - 1);
44371 unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
44372 unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
44373
44374 for (j = 0; j < eltsz; ++j)
44375 rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
44376 used[which] = true;
44377 }
44378
44379 for (i = 0; i < 2; ++i)
44380 {
44381 if (!used[2 * i + 1])
44382 {
44383 h[i] = NULL_RTX;
44384 continue;
44385 }
44386 vperm = gen_rtx_CONST_VECTOR (V32QImode,
44387 gen_rtvec_v (32, rperm[2 * i + 1]));
44388 vperm = force_reg (V32QImode, vperm);
44389 h[i] = gen_reg_rtx (V32QImode);
44390 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44391 emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
44392 }
44393
44394 /* Swap the 128-byte lanes of h[X]. */
44395 for (i = 0; i < 2; ++i)
44396 {
44397 if (h[i] == NULL_RTX)
44398 continue;
44399 op = gen_reg_rtx (V4DImode);
44400 emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
44401 const2_rtx, GEN_INT (3), const0_rtx,
44402 const1_rtx));
44403 h[i] = gen_lowpart (V32QImode, op);
44404 }
44405
44406 for (i = 0; i < 2; ++i)
44407 {
44408 if (!used[2 * i])
44409 {
44410 l[i] = NULL_RTX;
44411 continue;
44412 }
44413 vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
44414 vperm = force_reg (V32QImode, vperm);
44415 l[i] = gen_reg_rtx (V32QImode);
44416 op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
44417 emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
44418 }
44419
44420 for (i = 0; i < 2; ++i)
44421 {
44422 if (h[i] && l[i])
44423 {
44424 op = gen_reg_rtx (V32QImode);
44425 emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
44426 l[i] = op;
44427 }
44428 else if (h[i])
44429 l[i] = h[i];
44430 }
44431
44432 gcc_assert (l[0] && l[1]);
44433 op = d->target;
44434 if (d->vmode != V32QImode)
44435 op = gen_reg_rtx (V32QImode);
44436 emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
44437 if (op != d->target)
44438 emit_move_insn (d->target, gen_lowpart (d->vmode, op));
44439 return true;
44440 }
44441
44442 /* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
44443 With all of the interface bits taken care of, perform the expansion
44444 in D and return true on success. */
44445
44446 static bool
44447 ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
44448 {
44449 /* Try a single instruction expansion. */
44450 if (expand_vec_perm_1 (d))
44451 return true;
44452
44453 /* Try sequences of two instructions. */
44454
44455 if (expand_vec_perm_pshuflw_pshufhw (d))
44456 return true;
44457
44458 if (expand_vec_perm_palignr (d))
44459 return true;
44460
44461 if (expand_vec_perm_interleave2 (d))
44462 return true;
44463
44464 if (expand_vec_perm_broadcast (d))
44465 return true;
44466
44467 if (expand_vec_perm_vpermq_perm_1 (d))
44468 return true;
44469
44470 if (expand_vec_perm_vperm2f128 (d))
44471 return true;
44472
44473 /* Try sequences of three instructions. */
44474
44475 if (expand_vec_perm_2vperm2f128_vshuf (d))
44476 return true;
44477
44478 if (expand_vec_perm_pshufb2 (d))
44479 return true;
44480
44481 if (expand_vec_perm_interleave3 (d))
44482 return true;
44483
44484 if (expand_vec_perm_vperm2f128_vblend (d))
44485 return true;
44486
44487 /* Try sequences of four instructions. */
44488
44489 if (expand_vec_perm_vpshufb2_vpermq (d))
44490 return true;
44491
44492 if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
44493 return true;
44494
44495 /* ??? Look for narrow permutations whose element orderings would
44496 allow the promotion to a wider mode. */
44497
44498 /* ??? Look for sequences of interleave or a wider permute that place
44499 the data into the correct lanes for a half-vector shuffle like
44500 pshuf[lh]w or vpermilps. */
44501
44502 /* ??? Look for sequences of interleave that produce the desired results.
44503 The combinatorics of punpck[lh] get pretty ugly... */
44504
44505 if (expand_vec_perm_even_odd (d))
44506 return true;
44507
44508 /* Even longer sequences. */
44509 if (expand_vec_perm_vpshufb4_vpermq2 (d))
44510 return true;
44511
44512 return false;
44513 }
44514
44515 /* If a permutation only uses one operand, make it clear. Returns true
44516 if the permutation references both operands. */
44517
44518 static bool
44519 canonicalize_perm (struct expand_vec_perm_d *d)
44520 {
44521 int i, which, nelt = d->nelt;
44522
44523 for (i = which = 0; i < nelt; ++i)
44524 which |= (d->perm[i] < nelt ? 1 : 2);
44525
44526 d->one_operand_p = true;
44527 switch (which)
44528 {
44529 default:
44530 gcc_unreachable();
44531
44532 case 3:
44533 if (!rtx_equal_p (d->op0, d->op1))
44534 {
44535 d->one_operand_p = false;
44536 break;
44537 }
44538 /* The elements of PERM do not suggest that only the first operand
44539 is used, but both operands are identical. Allow easier matching
44540 of the permutation by folding the permutation into the single
44541 input vector. */
44542 /* FALLTHRU */
44543
44544 case 2:
44545 for (i = 0; i < nelt; ++i)
44546 d->perm[i] &= nelt - 1;
44547 d->op0 = d->op1;
44548 break;
44549
44550 case 1:
44551 d->op1 = d->op0;
44552 break;
44553 }
44554
44555 return (which == 3);
44556 }
44557
44558 bool
44559 ix86_expand_vec_perm_const (rtx operands[4])
44560 {
44561 struct expand_vec_perm_d d;
44562 unsigned char perm[MAX_VECT_LEN];
44563 int i, nelt;
44564 bool two_args;
44565 rtx sel;
44566
44567 d.target = operands[0];
44568 d.op0 = operands[1];
44569 d.op1 = operands[2];
44570 sel = operands[3];
44571
44572 d.vmode = GET_MODE (d.target);
44573 gcc_assert (VECTOR_MODE_P (d.vmode));
44574 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44575 d.testing_p = false;
44576
44577 gcc_assert (GET_CODE (sel) == CONST_VECTOR);
44578 gcc_assert (XVECLEN (sel, 0) == nelt);
44579 gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
44580
44581 for (i = 0; i < nelt; ++i)
44582 {
44583 rtx e = XVECEXP (sel, 0, i);
44584 int ei = INTVAL (e) & (2 * nelt - 1);
44585 d.perm[i] = ei;
44586 perm[i] = ei;
44587 }
44588
44589 two_args = canonicalize_perm (&d);
44590
44591 if (ix86_expand_vec_perm_const_1 (&d))
44592 return true;
44593
44594 /* If the selector says both arguments are needed, but the operands are the
44595 same, the above tried to expand with one_operand_p and flattened selector.
44596 If that didn't work, retry without one_operand_p; we succeeded with that
44597 during testing. */
44598 if (two_args && d.one_operand_p)
44599 {
44600 d.one_operand_p = false;
44601 memcpy (d.perm, perm, sizeof (perm));
44602 return ix86_expand_vec_perm_const_1 (&d);
44603 }
44604
44605 return false;
44606 }
44607
44608 /* Implement targetm.vectorize.vec_perm_const_ok. */
44609
44610 static bool
44611 ix86_vectorize_vec_perm_const_ok (enum machine_mode vmode,
44612 const unsigned char *sel)
44613 {
44614 struct expand_vec_perm_d d;
44615 unsigned int i, nelt, which;
44616 bool ret;
44617
44618 d.vmode = vmode;
44619 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44620 d.testing_p = true;
44621
44622 /* Given sufficient ISA support we can just return true here
44623 for selected vector modes. */
44624 if (d.vmode == V16SImode || d.vmode == V16SFmode
44625 || d.vmode == V8DFmode || d.vmode == V8DImode)
44626 /* All implementable with a single vpermi2 insn. */
44627 return true;
44628 if (GET_MODE_SIZE (d.vmode) == 16)
44629 {
44630 /* All implementable with a single vpperm insn. */
44631 if (TARGET_XOP)
44632 return true;
44633 /* All implementable with 2 pshufb + 1 ior. */
44634 if (TARGET_SSSE3)
44635 return true;
44636 /* All implementable with shufpd or unpck[lh]pd. */
44637 if (d.nelt == 2)
44638 return true;
44639 }
44640
44641 /* Extract the values from the vector CST into the permutation
44642 array in D. */
44643 memcpy (d.perm, sel, nelt);
44644 for (i = which = 0; i < nelt; ++i)
44645 {
44646 unsigned char e = d.perm[i];
44647 gcc_assert (e < 2 * nelt);
44648 which |= (e < nelt ? 1 : 2);
44649 }
44650
44651 /* For all elements from second vector, fold the elements to first. */
44652 if (which == 2)
44653 for (i = 0; i < nelt; ++i)
44654 d.perm[i] -= nelt;
44655
44656 /* Check whether the mask can be applied to the vector type. */
44657 d.one_operand_p = (which != 3);
44658
44659 /* Implementable with shufps or pshufd. */
44660 if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
44661 return true;
44662
44663 /* Otherwise we have to go through the motions and see if we can
44664 figure out how to generate the requested permutation. */
44665 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
44666 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
44667 if (!d.one_operand_p)
44668 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
44669
44670 start_sequence ();
44671 ret = ix86_expand_vec_perm_const_1 (&d);
44672 end_sequence ();
44673
44674 return ret;
44675 }
44676
44677 void
44678 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
44679 {
44680 struct expand_vec_perm_d d;
44681 unsigned i, nelt;
44682
44683 d.target = targ;
44684 d.op0 = op0;
44685 d.op1 = op1;
44686 d.vmode = GET_MODE (targ);
44687 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44688 d.one_operand_p = false;
44689 d.testing_p = false;
44690
44691 for (i = 0; i < nelt; ++i)
44692 d.perm[i] = i * 2 + odd;
44693
44694 /* We'll either be able to implement the permutation directly... */
44695 if (expand_vec_perm_1 (&d))
44696 return;
44697
44698 /* ... or we use the special-case patterns. */
44699 expand_vec_perm_even_odd_1 (&d, odd);
44700 }
44701
44702 static void
44703 ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
44704 {
44705 struct expand_vec_perm_d d;
44706 unsigned i, nelt, base;
44707 bool ok;
44708
44709 d.target = targ;
44710 d.op0 = op0;
44711 d.op1 = op1;
44712 d.vmode = GET_MODE (targ);
44713 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
44714 d.one_operand_p = false;
44715 d.testing_p = false;
44716
44717 base = high_p ? nelt / 2 : 0;
44718 for (i = 0; i < nelt / 2; ++i)
44719 {
44720 d.perm[i * 2] = i + base;
44721 d.perm[i * 2 + 1] = i + base + nelt;
44722 }
44723
44724 /* Note that for AVX this isn't one instruction. */
44725 ok = ix86_expand_vec_perm_const_1 (&d);
44726 gcc_assert (ok);
44727 }
44728
44729
44730 /* Expand a vector operation CODE for a V*QImode in terms of the
44731 same operation on V*HImode. */
44732
44733 void
44734 ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
44735 {
44736 enum machine_mode qimode = GET_MODE (dest);
44737 enum machine_mode himode;
44738 rtx (*gen_il) (rtx, rtx, rtx);
44739 rtx (*gen_ih) (rtx, rtx, rtx);
44740 rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
44741 struct expand_vec_perm_d d;
44742 bool ok, full_interleave;
44743 bool uns_p = false;
44744 int i;
44745
44746 switch (qimode)
44747 {
44748 case V16QImode:
44749 himode = V8HImode;
44750 gen_il = gen_vec_interleave_lowv16qi;
44751 gen_ih = gen_vec_interleave_highv16qi;
44752 break;
44753 case V32QImode:
44754 himode = V16HImode;
44755 gen_il = gen_avx2_interleave_lowv32qi;
44756 gen_ih = gen_avx2_interleave_highv32qi;
44757 break;
44758 default:
44759 gcc_unreachable ();
44760 }
44761
44762 op2_l = op2_h = op2;
44763 switch (code)
44764 {
44765 case MULT:
44766 /* Unpack data such that we've got a source byte in each low byte of
44767 each word. We don't care what goes into the high byte of each word.
44768 Rather than trying to get zero in there, most convenient is to let
44769 it be a copy of the low byte. */
44770 op2_l = gen_reg_rtx (qimode);
44771 op2_h = gen_reg_rtx (qimode);
44772 emit_insn (gen_il (op2_l, op2, op2));
44773 emit_insn (gen_ih (op2_h, op2, op2));
44774 /* FALLTHRU */
44775
44776 op1_l = gen_reg_rtx (qimode);
44777 op1_h = gen_reg_rtx (qimode);
44778 emit_insn (gen_il (op1_l, op1, op1));
44779 emit_insn (gen_ih (op1_h, op1, op1));
44780 full_interleave = qimode == V16QImode;
44781 break;
44782
44783 case ASHIFT:
44784 case LSHIFTRT:
44785 uns_p = true;
44786 /* FALLTHRU */
44787 case ASHIFTRT:
44788 op1_l = gen_reg_rtx (himode);
44789 op1_h = gen_reg_rtx (himode);
44790 ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
44791 ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
44792 full_interleave = true;
44793 break;
44794 default:
44795 gcc_unreachable ();
44796 }
44797
44798 /* Perform the operation. */
44799 res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
44800 1, OPTAB_DIRECT);
44801 res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
44802 1, OPTAB_DIRECT);
44803 gcc_assert (res_l && res_h);
44804
44805 /* Merge the data back into the right place. */
44806 d.target = dest;
44807 d.op0 = gen_lowpart (qimode, res_l);
44808 d.op1 = gen_lowpart (qimode, res_h);
44809 d.vmode = qimode;
44810 d.nelt = GET_MODE_NUNITS (qimode);
44811 d.one_operand_p = false;
44812 d.testing_p = false;
44813
44814 if (full_interleave)
44815 {
44816 /* For SSE2, we used an full interleave, so the desired
44817 results are in the even elements. */
44818 for (i = 0; i < 32; ++i)
44819 d.perm[i] = i * 2;
44820 }
44821 else
44822 {
44823 /* For AVX, the interleave used above was not cross-lane. So the
44824 extraction is evens but with the second and third quarter swapped.
44825 Happily, that is even one insn shorter than even extraction. */
44826 for (i = 0; i < 32; ++i)
44827 d.perm[i] = i * 2 + ((i & 24) == 8 ? 16 : (i & 24) == 16 ? -16 : 0);
44828 }
44829
44830 ok = ix86_expand_vec_perm_const_1 (&d);
44831 gcc_assert (ok);
44832
44833 set_unique_reg_note (get_last_insn (), REG_EQUAL,
44834 gen_rtx_fmt_ee (code, qimode, op1, op2));
44835 }
44836
44837 /* Helper function of ix86_expand_mul_widen_evenodd. Return true
44838 if op is CONST_VECTOR with all odd elements equal to their
44839 preceding element. */
44840
44841 static bool
44842 const_vector_equal_evenodd_p (rtx op)
44843 {
44844 enum machine_mode mode = GET_MODE (op);
44845 int i, nunits = GET_MODE_NUNITS (mode);
44846 if (GET_CODE (op) != CONST_VECTOR
44847 || nunits != CONST_VECTOR_NUNITS (op))
44848 return false;
44849 for (i = 0; i < nunits; i += 2)
44850 if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
44851 return false;
44852 return true;
44853 }
44854
44855 void
44856 ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
44857 bool uns_p, bool odd_p)
44858 {
44859 enum machine_mode mode = GET_MODE (op1);
44860 enum machine_mode wmode = GET_MODE (dest);
44861 rtx x;
44862 rtx orig_op1 = op1, orig_op2 = op2;
44863
44864 if (!nonimmediate_operand (op1, mode))
44865 op1 = force_reg (mode, op1);
44866 if (!nonimmediate_operand (op2, mode))
44867 op2 = force_reg (mode, op2);
44868
44869 /* We only play even/odd games with vectors of SImode. */
44870 gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
44871
44872 /* If we're looking for the odd results, shift those members down to
44873 the even slots. For some cpus this is faster than a PSHUFD. */
44874 if (odd_p)
44875 {
44876 /* For XOP use vpmacsdqh, but only for smult, as it is only
44877 signed. */
44878 if (TARGET_XOP && mode == V4SImode && !uns_p)
44879 {
44880 x = force_reg (wmode, CONST0_RTX (wmode));
44881 emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
44882 return;
44883 }
44884
44885 x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
44886 if (!const_vector_equal_evenodd_p (orig_op1))
44887 op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
44888 x, NULL, 1, OPTAB_DIRECT);
44889 if (!const_vector_equal_evenodd_p (orig_op2))
44890 op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
44891 x, NULL, 1, OPTAB_DIRECT);
44892 op1 = gen_lowpart (mode, op1);
44893 op2 = gen_lowpart (mode, op2);
44894 }
44895
44896 if (mode == V16SImode)
44897 {
44898 if (uns_p)
44899 x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
44900 else
44901 x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
44902 }
44903 else if (mode == V8SImode)
44904 {
44905 if (uns_p)
44906 x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
44907 else
44908 x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
44909 }
44910 else if (uns_p)
44911 x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
44912 else if (TARGET_SSE4_1)
44913 x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
44914 else
44915 {
44916 rtx s1, s2, t0, t1, t2;
44917
44918 /* The easiest way to implement this without PMULDQ is to go through
44919 the motions as if we are performing a full 64-bit multiply. With
44920 the exception that we need to do less shuffling of the elements. */
44921
44922 /* Compute the sign-extension, aka highparts, of the two operands. */
44923 s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44924 op1, pc_rtx, pc_rtx);
44925 s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
44926 op2, pc_rtx, pc_rtx);
44927
44928 /* Multiply LO(A) * HI(B), and vice-versa. */
44929 t1 = gen_reg_rtx (wmode);
44930 t2 = gen_reg_rtx (wmode);
44931 emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
44932 emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
44933
44934 /* Multiply LO(A) * LO(B). */
44935 t0 = gen_reg_rtx (wmode);
44936 emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
44937
44938 /* Combine and shift the highparts into place. */
44939 t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
44940 t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
44941 1, OPTAB_DIRECT);
44942
44943 /* Combine high and low parts. */
44944 force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
44945 return;
44946 }
44947 emit_insn (x);
44948 }
44949
44950 void
44951 ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
44952 bool uns_p, bool high_p)
44953 {
44954 enum machine_mode wmode = GET_MODE (dest);
44955 enum machine_mode mode = GET_MODE (op1);
44956 rtx t1, t2, t3, t4, mask;
44957
44958 switch (mode)
44959 {
44960 case V4SImode:
44961 t1 = gen_reg_rtx (mode);
44962 t2 = gen_reg_rtx (mode);
44963 if (TARGET_XOP && !uns_p)
44964 {
44965 /* With XOP, we have pmacsdqh, aka mul_widen_odd. In this case,
44966 shuffle the elements once so that all elements are in the right
44967 place for immediate use: { A C B D }. */
44968 emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
44969 const1_rtx, GEN_INT (3)));
44970 emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
44971 const1_rtx, GEN_INT (3)));
44972 }
44973 else
44974 {
44975 /* Put the elements into place for the multiply. */
44976 ix86_expand_vec_interleave (t1, op1, op1, high_p);
44977 ix86_expand_vec_interleave (t2, op2, op2, high_p);
44978 high_p = false;
44979 }
44980 ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
44981 break;
44982
44983 case V8SImode:
44984 /* Shuffle the elements between the lanes. After this we
44985 have { A B E F | C D G H } for each operand. */
44986 t1 = gen_reg_rtx (V4DImode);
44987 t2 = gen_reg_rtx (V4DImode);
44988 emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
44989 const0_rtx, const2_rtx,
44990 const1_rtx, GEN_INT (3)));
44991 emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
44992 const0_rtx, const2_rtx,
44993 const1_rtx, GEN_INT (3)));
44994
44995 /* Shuffle the elements within the lanes. After this we
44996 have { A A B B | C C D D } or { E E F F | G G H H }. */
44997 t3 = gen_reg_rtx (V8SImode);
44998 t4 = gen_reg_rtx (V8SImode);
44999 mask = GEN_INT (high_p
45000 ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
45001 : 0 + (0 << 2) + (1 << 4) + (1 << 6));
45002 emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
45003 emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
45004
45005 ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
45006 break;
45007
45008 case V8HImode:
45009 case V16HImode:
45010 t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
45011 uns_p, OPTAB_DIRECT);
45012 t2 = expand_binop (mode,
45013 uns_p ? umul_highpart_optab : smul_highpart_optab,
45014 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
45015 gcc_assert (t1 && t2);
45016
45017 t3 = gen_reg_rtx (mode);
45018 ix86_expand_vec_interleave (t3, t1, t2, high_p);
45019 emit_move_insn (dest, gen_lowpart (wmode, t3));
45020 break;
45021
45022 case V16QImode:
45023 case V32QImode:
45024 t1 = gen_reg_rtx (wmode);
45025 t2 = gen_reg_rtx (wmode);
45026 ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
45027 ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
45028
45029 emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
45030 break;
45031
45032 default:
45033 gcc_unreachable ();
45034 }
45035 }
45036
45037 void
45038 ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
45039 {
45040 rtx res_1, res_2, res_3, res_4;
45041
45042 res_1 = gen_reg_rtx (V4SImode);
45043 res_2 = gen_reg_rtx (V4SImode);
45044 res_3 = gen_reg_rtx (V2DImode);
45045 res_4 = gen_reg_rtx (V2DImode);
45046 ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
45047 ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
45048
45049 /* Move the results in element 2 down to element 1; we don't care
45050 what goes in elements 2 and 3. Then we can merge the parts
45051 back together with an interleave.
45052
45053 Note that two other sequences were tried:
45054 (1) Use interleaves at the start instead of psrldq, which allows
45055 us to use a single shufps to merge things back at the end.
45056 (2) Use shufps here to combine the two vectors, then pshufd to
45057 put the elements in the correct order.
45058 In both cases the cost of the reformatting stall was too high
45059 and the overall sequence slower. */
45060
45061 emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
45062 const0_rtx, const2_rtx,
45063 const0_rtx, const0_rtx));
45064 emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
45065 const0_rtx, const2_rtx,
45066 const0_rtx, const0_rtx));
45067 res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
45068
45069 set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
45070 }
45071
45072 void
45073 ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
45074 {
45075 enum machine_mode mode = GET_MODE (op0);
45076 rtx t1, t2, t3, t4, t5, t6;
45077
45078 if (TARGET_XOP && mode == V2DImode)
45079 {
45080 /* op1: A,B,C,D, op2: E,F,G,H */
45081 op1 = gen_lowpart (V4SImode, op1);
45082 op2 = gen_lowpart (V4SImode, op2);
45083
45084 t1 = gen_reg_rtx (V4SImode);
45085 t2 = gen_reg_rtx (V4SImode);
45086 t3 = gen_reg_rtx (V2DImode);
45087 t4 = gen_reg_rtx (V2DImode);
45088
45089 /* t1: B,A,D,C */
45090 emit_insn (gen_sse2_pshufd_1 (t1, op1,
45091 GEN_INT (1),
45092 GEN_INT (0),
45093 GEN_INT (3),
45094 GEN_INT (2)));
45095
45096 /* t2: (B*E),(A*F),(D*G),(C*H) */
45097 emit_insn (gen_mulv4si3 (t2, t1, op2));
45098
45099 /* t3: (B*E)+(A*F), (D*G)+(C*H) */
45100 emit_insn (gen_xop_phadddq (t3, t2));
45101
45102 /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
45103 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
45104
45105 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
45106 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
45107 }
45108 else
45109 {
45110 enum machine_mode nmode;
45111 rtx (*umul) (rtx, rtx, rtx);
45112
45113 if (mode == V2DImode)
45114 {
45115 umul = gen_vec_widen_umult_even_v4si;
45116 nmode = V4SImode;
45117 }
45118 else if (mode == V4DImode)
45119 {
45120 umul = gen_vec_widen_umult_even_v8si;
45121 nmode = V8SImode;
45122 }
45123 else if (mode == V8DImode)
45124 {
45125 umul = gen_vec_widen_umult_even_v16si;
45126 nmode = V16SImode;
45127 }
45128 else
45129 gcc_unreachable ();
45130
45131
45132 /* Multiply low parts. */
45133 t1 = gen_reg_rtx (mode);
45134 emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
45135
45136 /* Shift input vectors right 32 bits so we can multiply high parts. */
45137 t6 = GEN_INT (32);
45138 t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
45139 t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
45140
45141 /* Multiply high parts by low parts. */
45142 t4 = gen_reg_rtx (mode);
45143 t5 = gen_reg_rtx (mode);
45144 emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
45145 emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
45146
45147 /* Combine and shift the highparts back. */
45148 t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
45149 t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
45150
45151 /* Combine high and low parts. */
45152 force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
45153 }
45154
45155 set_unique_reg_note (get_last_insn (), REG_EQUAL,
45156 gen_rtx_MULT (mode, op1, op2));
45157 }
45158
45159 /* Calculate integer abs() using only SSE2 instructions. */
45160
45161 void
45162 ix86_expand_sse2_abs (rtx target, rtx input)
45163 {
45164 enum machine_mode mode = GET_MODE (target);
45165 rtx tmp0, tmp1, x;
45166
45167 switch (mode)
45168 {
45169 /* For 32-bit signed integer X, the best way to calculate the absolute
45170 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
45171 case V4SImode:
45172 tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
45173 GEN_INT (GET_MODE_BITSIZE
45174 (GET_MODE_INNER (mode)) - 1),
45175 NULL, 0, OPTAB_DIRECT);
45176 tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
45177 NULL, 0, OPTAB_DIRECT);
45178 x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
45179 target, 0, OPTAB_DIRECT);
45180 break;
45181
45182 /* For 16-bit signed integer X, the best way to calculate the absolute
45183 value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
45184 case V8HImode:
45185 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45186
45187 x = expand_simple_binop (mode, SMAX, tmp0, input,
45188 target, 0, OPTAB_DIRECT);
45189 break;
45190
45191 /* For 8-bit signed integer X, the best way to calculate the absolute
45192 value of X is min ((unsigned char) X, (unsigned char) (-X)),
45193 as SSE2 provides the PMINUB insn. */
45194 case V16QImode:
45195 tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
45196
45197 x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
45198 target, 0, OPTAB_DIRECT);
45199 break;
45200
45201 default:
45202 gcc_unreachable ();
45203 }
45204
45205 if (x != target)
45206 emit_move_insn (target, x);
45207 }
45208
45209 /* Expand an insert into a vector register through pinsr insn.
45210 Return true if successful. */
45211
45212 bool
45213 ix86_expand_pinsr (rtx *operands)
45214 {
45215 rtx dst = operands[0];
45216 rtx src = operands[3];
45217
45218 unsigned int size = INTVAL (operands[1]);
45219 unsigned int pos = INTVAL (operands[2]);
45220
45221 if (GET_CODE (dst) == SUBREG)
45222 {
45223 pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
45224 dst = SUBREG_REG (dst);
45225 }
45226
45227 if (GET_CODE (src) == SUBREG)
45228 src = SUBREG_REG (src);
45229
45230 switch (GET_MODE (dst))
45231 {
45232 case V16QImode:
45233 case V8HImode:
45234 case V4SImode:
45235 case V2DImode:
45236 {
45237 enum machine_mode srcmode, dstmode;
45238 rtx (*pinsr)(rtx, rtx, rtx, rtx);
45239
45240 srcmode = mode_for_size (size, MODE_INT, 0);
45241
45242 switch (srcmode)
45243 {
45244 case QImode:
45245 if (!TARGET_SSE4_1)
45246 return false;
45247 dstmode = V16QImode;
45248 pinsr = gen_sse4_1_pinsrb;
45249 break;
45250
45251 case HImode:
45252 if (!TARGET_SSE2)
45253 return false;
45254 dstmode = V8HImode;
45255 pinsr = gen_sse2_pinsrw;
45256 break;
45257
45258 case SImode:
45259 if (!TARGET_SSE4_1)
45260 return false;
45261 dstmode = V4SImode;
45262 pinsr = gen_sse4_1_pinsrd;
45263 break;
45264
45265 case DImode:
45266 gcc_assert (TARGET_64BIT);
45267 if (!TARGET_SSE4_1)
45268 return false;
45269 dstmode = V2DImode;
45270 pinsr = gen_sse4_1_pinsrq;
45271 break;
45272
45273 default:
45274 return false;
45275 }
45276
45277 rtx d = dst;
45278 if (GET_MODE (dst) != dstmode)
45279 d = gen_reg_rtx (dstmode);
45280 src = gen_lowpart (srcmode, src);
45281
45282 pos /= size;
45283
45284 emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
45285 GEN_INT (1 << pos)));
45286 if (d != dst)
45287 emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
45288 return true;
45289 }
45290
45291 default:
45292 return false;
45293 }
45294 }
45295 \f
45296 /* This function returns the calling abi specific va_list type node.
45297 It returns the FNDECL specific va_list type. */
45298
45299 static tree
45300 ix86_fn_abi_va_list (tree fndecl)
45301 {
45302 if (!TARGET_64BIT)
45303 return va_list_type_node;
45304 gcc_assert (fndecl != NULL_TREE);
45305
45306 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
45307 return ms_va_list_type_node;
45308 else
45309 return sysv_va_list_type_node;
45310 }
45311
45312 /* Returns the canonical va_list type specified by TYPE. If there
45313 is no valid TYPE provided, it return NULL_TREE. */
45314
45315 static tree
45316 ix86_canonical_va_list_type (tree type)
45317 {
45318 tree wtype, htype;
45319
45320 /* Resolve references and pointers to va_list type. */
45321 if (TREE_CODE (type) == MEM_REF)
45322 type = TREE_TYPE (type);
45323 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
45324 type = TREE_TYPE (type);
45325 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
45326 type = TREE_TYPE (type);
45327
45328 if (TARGET_64BIT && va_list_type_node != NULL_TREE)
45329 {
45330 wtype = va_list_type_node;
45331 gcc_assert (wtype != NULL_TREE);
45332 htype = type;
45333 if (TREE_CODE (wtype) == ARRAY_TYPE)
45334 {
45335 /* If va_list is an array type, the argument may have decayed
45336 to a pointer type, e.g. by being passed to another function.
45337 In that case, unwrap both types so that we can compare the
45338 underlying records. */
45339 if (TREE_CODE (htype) == ARRAY_TYPE
45340 || POINTER_TYPE_P (htype))
45341 {
45342 wtype = TREE_TYPE (wtype);
45343 htype = TREE_TYPE (htype);
45344 }
45345 }
45346 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45347 return va_list_type_node;
45348 wtype = sysv_va_list_type_node;
45349 gcc_assert (wtype != NULL_TREE);
45350 htype = type;
45351 if (TREE_CODE (wtype) == ARRAY_TYPE)
45352 {
45353 /* If va_list is an array type, the argument may have decayed
45354 to a pointer type, e.g. by being passed to another function.
45355 In that case, unwrap both types so that we can compare the
45356 underlying records. */
45357 if (TREE_CODE (htype) == ARRAY_TYPE
45358 || POINTER_TYPE_P (htype))
45359 {
45360 wtype = TREE_TYPE (wtype);
45361 htype = TREE_TYPE (htype);
45362 }
45363 }
45364 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45365 return sysv_va_list_type_node;
45366 wtype = ms_va_list_type_node;
45367 gcc_assert (wtype != NULL_TREE);
45368 htype = type;
45369 if (TREE_CODE (wtype) == ARRAY_TYPE)
45370 {
45371 /* If va_list is an array type, the argument may have decayed
45372 to a pointer type, e.g. by being passed to another function.
45373 In that case, unwrap both types so that we can compare the
45374 underlying records. */
45375 if (TREE_CODE (htype) == ARRAY_TYPE
45376 || POINTER_TYPE_P (htype))
45377 {
45378 wtype = TREE_TYPE (wtype);
45379 htype = TREE_TYPE (htype);
45380 }
45381 }
45382 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
45383 return ms_va_list_type_node;
45384 return NULL_TREE;
45385 }
45386 return std_canonical_va_list_type (type);
45387 }
45388
45389 /* Iterate through the target-specific builtin types for va_list.
45390 IDX denotes the iterator, *PTREE is set to the result type of
45391 the va_list builtin, and *PNAME to its internal type.
45392 Returns zero if there is no element for this index, otherwise
45393 IDX should be increased upon the next call.
45394 Note, do not iterate a base builtin's name like __builtin_va_list.
45395 Used from c_common_nodes_and_builtins. */
45396
45397 static int
45398 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
45399 {
45400 if (TARGET_64BIT)
45401 {
45402 switch (idx)
45403 {
45404 default:
45405 break;
45406
45407 case 0:
45408 *ptree = ms_va_list_type_node;
45409 *pname = "__builtin_ms_va_list";
45410 return 1;
45411
45412 case 1:
45413 *ptree = sysv_va_list_type_node;
45414 *pname = "__builtin_sysv_va_list";
45415 return 1;
45416 }
45417 }
45418
45419 return 0;
45420 }
45421
45422 #undef TARGET_SCHED_DISPATCH
45423 #define TARGET_SCHED_DISPATCH has_dispatch
45424 #undef TARGET_SCHED_DISPATCH_DO
45425 #define TARGET_SCHED_DISPATCH_DO do_dispatch
45426 #undef TARGET_SCHED_REASSOCIATION_WIDTH
45427 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
45428 #undef TARGET_SCHED_REORDER
45429 #define TARGET_SCHED_REORDER ix86_sched_reorder
45430 #undef TARGET_SCHED_ADJUST_PRIORITY
45431 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
45432 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
45433 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
45434 ix86_dependencies_evaluation_hook
45435
45436 /* The size of the dispatch window is the total number of bytes of
45437 object code allowed in a window. */
45438 #define DISPATCH_WINDOW_SIZE 16
45439
45440 /* Number of dispatch windows considered for scheduling. */
45441 #define MAX_DISPATCH_WINDOWS 3
45442
45443 /* Maximum number of instructions in a window. */
45444 #define MAX_INSN 4
45445
45446 /* Maximum number of immediate operands in a window. */
45447 #define MAX_IMM 4
45448
45449 /* Maximum number of immediate bits allowed in a window. */
45450 #define MAX_IMM_SIZE 128
45451
45452 /* Maximum number of 32 bit immediates allowed in a window. */
45453 #define MAX_IMM_32 4
45454
45455 /* Maximum number of 64 bit immediates allowed in a window. */
45456 #define MAX_IMM_64 2
45457
45458 /* Maximum total of loads or prefetches allowed in a window. */
45459 #define MAX_LOAD 2
45460
45461 /* Maximum total of stores allowed in a window. */
45462 #define MAX_STORE 1
45463
45464 #undef BIG
45465 #define BIG 100
45466
45467
45468 /* Dispatch groups. Istructions that affect the mix in a dispatch window. */
45469 enum dispatch_group {
45470 disp_no_group = 0,
45471 disp_load,
45472 disp_store,
45473 disp_load_store,
45474 disp_prefetch,
45475 disp_imm,
45476 disp_imm_32,
45477 disp_imm_64,
45478 disp_branch,
45479 disp_cmp,
45480 disp_jcc,
45481 disp_last
45482 };
45483
45484 /* Number of allowable groups in a dispatch window. It is an array
45485 indexed by dispatch_group enum. 100 is used as a big number,
45486 because the number of these kind of operations does not have any
45487 effect in dispatch window, but we need them for other reasons in
45488 the table. */
45489 static unsigned int num_allowable_groups[disp_last] = {
45490 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
45491 };
45492
45493 char group_name[disp_last + 1][16] = {
45494 "disp_no_group", "disp_load", "disp_store", "disp_load_store",
45495 "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
45496 "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
45497 };
45498
45499 /* Instruction path. */
45500 enum insn_path {
45501 no_path = 0,
45502 path_single, /* Single micro op. */
45503 path_double, /* Double micro op. */
45504 path_multi, /* Instructions with more than 2 micro op.. */
45505 last_path
45506 };
45507
45508 /* sched_insn_info defines a window to the instructions scheduled in
45509 the basic block. It contains a pointer to the insn_info table and
45510 the instruction scheduled.
45511
45512 Windows are allocated for each basic block and are linked
45513 together. */
45514 typedef struct sched_insn_info_s {
45515 rtx insn;
45516 enum dispatch_group group;
45517 enum insn_path path;
45518 int byte_len;
45519 int imm_bytes;
45520 } sched_insn_info;
45521
45522 /* Linked list of dispatch windows. This is a two way list of
45523 dispatch windows of a basic block. It contains information about
45524 the number of uops in the window and the total number of
45525 instructions and of bytes in the object code for this dispatch
45526 window. */
45527 typedef struct dispatch_windows_s {
45528 int num_insn; /* Number of insn in the window. */
45529 int num_uops; /* Number of uops in the window. */
45530 int window_size; /* Number of bytes in the window. */
45531 int window_num; /* Window number between 0 or 1. */
45532 int num_imm; /* Number of immediates in an insn. */
45533 int num_imm_32; /* Number of 32 bit immediates in an insn. */
45534 int num_imm_64; /* Number of 64 bit immediates in an insn. */
45535 int imm_size; /* Total immediates in the window. */
45536 int num_loads; /* Total memory loads in the window. */
45537 int num_stores; /* Total memory stores in the window. */
45538 int violation; /* Violation exists in window. */
45539 sched_insn_info *window; /* Pointer to the window. */
45540 struct dispatch_windows_s *next;
45541 struct dispatch_windows_s *prev;
45542 } dispatch_windows;
45543
45544 /* Immediate valuse used in an insn. */
45545 typedef struct imm_info_s
45546 {
45547 int imm;
45548 int imm32;
45549 int imm64;
45550 } imm_info;
45551
45552 static dispatch_windows *dispatch_window_list;
45553 static dispatch_windows *dispatch_window_list1;
45554
45555 /* Get dispatch group of insn. */
45556
45557 static enum dispatch_group
45558 get_mem_group (rtx insn)
45559 {
45560 enum attr_memory memory;
45561
45562 if (INSN_CODE (insn) < 0)
45563 return disp_no_group;
45564 memory = get_attr_memory (insn);
45565 if (memory == MEMORY_STORE)
45566 return disp_store;
45567
45568 if (memory == MEMORY_LOAD)
45569 return disp_load;
45570
45571 if (memory == MEMORY_BOTH)
45572 return disp_load_store;
45573
45574 return disp_no_group;
45575 }
45576
45577 /* Return true if insn is a compare instruction. */
45578
45579 static bool
45580 is_cmp (rtx insn)
45581 {
45582 enum attr_type type;
45583
45584 type = get_attr_type (insn);
45585 return (type == TYPE_TEST
45586 || type == TYPE_ICMP
45587 || type == TYPE_FCMP
45588 || GET_CODE (PATTERN (insn)) == COMPARE);
45589 }
45590
45591 /* Return true if a dispatch violation encountered. */
45592
45593 static bool
45594 dispatch_violation (void)
45595 {
45596 if (dispatch_window_list->next)
45597 return dispatch_window_list->next->violation;
45598 return dispatch_window_list->violation;
45599 }
45600
45601 /* Return true if insn is a branch instruction. */
45602
45603 static bool
45604 is_branch (rtx insn)
45605 {
45606 return (CALL_P (insn) || JUMP_P (insn));
45607 }
45608
45609 /* Return true if insn is a prefetch instruction. */
45610
45611 static bool
45612 is_prefetch (rtx insn)
45613 {
45614 return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
45615 }
45616
45617 /* This function initializes a dispatch window and the list container holding a
45618 pointer to the window. */
45619
45620 static void
45621 init_window (int window_num)
45622 {
45623 int i;
45624 dispatch_windows *new_list;
45625
45626 if (window_num == 0)
45627 new_list = dispatch_window_list;
45628 else
45629 new_list = dispatch_window_list1;
45630
45631 new_list->num_insn = 0;
45632 new_list->num_uops = 0;
45633 new_list->window_size = 0;
45634 new_list->next = NULL;
45635 new_list->prev = NULL;
45636 new_list->window_num = window_num;
45637 new_list->num_imm = 0;
45638 new_list->num_imm_32 = 0;
45639 new_list->num_imm_64 = 0;
45640 new_list->imm_size = 0;
45641 new_list->num_loads = 0;
45642 new_list->num_stores = 0;
45643 new_list->violation = false;
45644
45645 for (i = 0; i < MAX_INSN; i++)
45646 {
45647 new_list->window[i].insn = NULL;
45648 new_list->window[i].group = disp_no_group;
45649 new_list->window[i].path = no_path;
45650 new_list->window[i].byte_len = 0;
45651 new_list->window[i].imm_bytes = 0;
45652 }
45653 return;
45654 }
45655
45656 /* This function allocates and initializes a dispatch window and the
45657 list container holding a pointer to the window. */
45658
45659 static dispatch_windows *
45660 allocate_window (void)
45661 {
45662 dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
45663 new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
45664
45665 return new_list;
45666 }
45667
45668 /* This routine initializes the dispatch scheduling information. It
45669 initiates building dispatch scheduler tables and constructs the
45670 first dispatch window. */
45671
45672 static void
45673 init_dispatch_sched (void)
45674 {
45675 /* Allocate a dispatch list and a window. */
45676 dispatch_window_list = allocate_window ();
45677 dispatch_window_list1 = allocate_window ();
45678 init_window (0);
45679 init_window (1);
45680 }
45681
45682 /* This function returns true if a branch is detected. End of a basic block
45683 does not have to be a branch, but here we assume only branches end a
45684 window. */
45685
45686 static bool
45687 is_end_basic_block (enum dispatch_group group)
45688 {
45689 return group == disp_branch;
45690 }
45691
45692 /* This function is called when the end of a window processing is reached. */
45693
45694 static void
45695 process_end_window (void)
45696 {
45697 gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
45698 if (dispatch_window_list->next)
45699 {
45700 gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
45701 gcc_assert (dispatch_window_list->window_size
45702 + dispatch_window_list1->window_size <= 48);
45703 init_window (1);
45704 }
45705 init_window (0);
45706 }
45707
45708 /* Allocates a new dispatch window and adds it to WINDOW_LIST.
45709 WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
45710 for 48 bytes of instructions. Note that these windows are not dispatch
45711 windows that their sizes are DISPATCH_WINDOW_SIZE. */
45712
45713 static dispatch_windows *
45714 allocate_next_window (int window_num)
45715 {
45716 if (window_num == 0)
45717 {
45718 if (dispatch_window_list->next)
45719 init_window (1);
45720 init_window (0);
45721 return dispatch_window_list;
45722 }
45723
45724 dispatch_window_list->next = dispatch_window_list1;
45725 dispatch_window_list1->prev = dispatch_window_list;
45726
45727 return dispatch_window_list1;
45728 }
45729
45730 /* Increment the number of immediate operands of an instruction. */
45731
45732 static int
45733 find_constant_1 (rtx *in_rtx, imm_info *imm_values)
45734 {
45735 if (*in_rtx == 0)
45736 return 0;
45737
45738 switch ( GET_CODE (*in_rtx))
45739 {
45740 case CONST:
45741 case SYMBOL_REF:
45742 case CONST_INT:
45743 (imm_values->imm)++;
45744 if (x86_64_immediate_operand (*in_rtx, SImode))
45745 (imm_values->imm32)++;
45746 else
45747 (imm_values->imm64)++;
45748 break;
45749
45750 case CONST_DOUBLE:
45751 (imm_values->imm)++;
45752 (imm_values->imm64)++;
45753 break;
45754
45755 case CODE_LABEL:
45756 if (LABEL_KIND (*in_rtx) == LABEL_NORMAL)
45757 {
45758 (imm_values->imm)++;
45759 (imm_values->imm32)++;
45760 }
45761 break;
45762
45763 default:
45764 break;
45765 }
45766
45767 return 0;
45768 }
45769
45770 /* Compute number of immediate operands of an instruction. */
45771
45772 static void
45773 find_constant (rtx in_rtx, imm_info *imm_values)
45774 {
45775 for_each_rtx (INSN_P (in_rtx) ? &PATTERN (in_rtx) : &in_rtx,
45776 (rtx_function) find_constant_1, (void *) imm_values);
45777 }
45778
45779 /* Return total size of immediate operands of an instruction along with number
45780 of corresponding immediate-operands. It initializes its parameters to zero
45781 befor calling FIND_CONSTANT.
45782 INSN is the input instruction. IMM is the total of immediates.
45783 IMM32 is the number of 32 bit immediates. IMM64 is the number of 64
45784 bit immediates. */
45785
45786 static int
45787 get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
45788 {
45789 imm_info imm_values = {0, 0, 0};
45790
45791 find_constant (insn, &imm_values);
45792 *imm = imm_values.imm;
45793 *imm32 = imm_values.imm32;
45794 *imm64 = imm_values.imm64;
45795 return imm_values.imm32 * 4 + imm_values.imm64 * 8;
45796 }
45797
45798 /* This function indicates if an operand of an instruction is an
45799 immediate. */
45800
45801 static bool
45802 has_immediate (rtx insn)
45803 {
45804 int num_imm_operand;
45805 int num_imm32_operand;
45806 int num_imm64_operand;
45807
45808 if (insn)
45809 return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45810 &num_imm64_operand);
45811 return false;
45812 }
45813
45814 /* Return single or double path for instructions. */
45815
45816 static enum insn_path
45817 get_insn_path (rtx insn)
45818 {
45819 enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
45820
45821 if ((int)path == 0)
45822 return path_single;
45823
45824 if ((int)path == 1)
45825 return path_double;
45826
45827 return path_multi;
45828 }
45829
45830 /* Return insn dispatch group. */
45831
45832 static enum dispatch_group
45833 get_insn_group (rtx insn)
45834 {
45835 enum dispatch_group group = get_mem_group (insn);
45836 if (group)
45837 return group;
45838
45839 if (is_branch (insn))
45840 return disp_branch;
45841
45842 if (is_cmp (insn))
45843 return disp_cmp;
45844
45845 if (has_immediate (insn))
45846 return disp_imm;
45847
45848 if (is_prefetch (insn))
45849 return disp_prefetch;
45850
45851 return disp_no_group;
45852 }
45853
45854 /* Count number of GROUP restricted instructions in a dispatch
45855 window WINDOW_LIST. */
45856
45857 static int
45858 count_num_restricted (rtx insn, dispatch_windows *window_list)
45859 {
45860 enum dispatch_group group = get_insn_group (insn);
45861 int imm_size;
45862 int num_imm_operand;
45863 int num_imm32_operand;
45864 int num_imm64_operand;
45865
45866 if (group == disp_no_group)
45867 return 0;
45868
45869 if (group == disp_imm)
45870 {
45871 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45872 &num_imm64_operand);
45873 if (window_list->imm_size + imm_size > MAX_IMM_SIZE
45874 || num_imm_operand + window_list->num_imm > MAX_IMM
45875 || (num_imm32_operand > 0
45876 && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
45877 || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
45878 || (num_imm64_operand > 0
45879 && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
45880 || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
45881 || (window_list->imm_size + imm_size == MAX_IMM_SIZE
45882 && num_imm64_operand > 0
45883 && ((window_list->num_imm_64 > 0
45884 && window_list->num_insn >= 2)
45885 || window_list->num_insn >= 3)))
45886 return BIG;
45887
45888 return 1;
45889 }
45890
45891 if ((group == disp_load_store
45892 && (window_list->num_loads >= MAX_LOAD
45893 || window_list->num_stores >= MAX_STORE))
45894 || ((group == disp_load
45895 || group == disp_prefetch)
45896 && window_list->num_loads >= MAX_LOAD)
45897 || (group == disp_store
45898 && window_list->num_stores >= MAX_STORE))
45899 return BIG;
45900
45901 return 1;
45902 }
45903
45904 /* This function returns true if insn satisfies dispatch rules on the
45905 last window scheduled. */
45906
45907 static bool
45908 fits_dispatch_window (rtx insn)
45909 {
45910 dispatch_windows *window_list = dispatch_window_list;
45911 dispatch_windows *window_list_next = dispatch_window_list->next;
45912 unsigned int num_restrict;
45913 enum dispatch_group group = get_insn_group (insn);
45914 enum insn_path path = get_insn_path (insn);
45915 int sum;
45916
45917 /* Make disp_cmp and disp_jcc get scheduled at the latest. These
45918 instructions should be given the lowest priority in the
45919 scheduling process in Haifa scheduler to make sure they will be
45920 scheduled in the same dispatch window as the reference to them. */
45921 if (group == disp_jcc || group == disp_cmp)
45922 return false;
45923
45924 /* Check nonrestricted. */
45925 if (group == disp_no_group || group == disp_branch)
45926 return true;
45927
45928 /* Get last dispatch window. */
45929 if (window_list_next)
45930 window_list = window_list_next;
45931
45932 if (window_list->window_num == 1)
45933 {
45934 sum = window_list->prev->window_size + window_list->window_size;
45935
45936 if (sum == 32
45937 || (min_insn_size (insn) + sum) >= 48)
45938 /* Window 1 is full. Go for next window. */
45939 return true;
45940 }
45941
45942 num_restrict = count_num_restricted (insn, window_list);
45943
45944 if (num_restrict > num_allowable_groups[group])
45945 return false;
45946
45947 /* See if it fits in the first window. */
45948 if (window_list->window_num == 0)
45949 {
45950 /* The first widow should have only single and double path
45951 uops. */
45952 if (path == path_double
45953 && (window_list->num_uops + 2) > MAX_INSN)
45954 return false;
45955 else if (path != path_single)
45956 return false;
45957 }
45958 return true;
45959 }
45960
45961 /* Add an instruction INSN with NUM_UOPS micro-operations to the
45962 dispatch window WINDOW_LIST. */
45963
45964 static void
45965 add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
45966 {
45967 int byte_len = min_insn_size (insn);
45968 int num_insn = window_list->num_insn;
45969 int imm_size;
45970 sched_insn_info *window = window_list->window;
45971 enum dispatch_group group = get_insn_group (insn);
45972 enum insn_path path = get_insn_path (insn);
45973 int num_imm_operand;
45974 int num_imm32_operand;
45975 int num_imm64_operand;
45976
45977 if (!window_list->violation && group != disp_cmp
45978 && !fits_dispatch_window (insn))
45979 window_list->violation = true;
45980
45981 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
45982 &num_imm64_operand);
45983
45984 /* Initialize window with new instruction. */
45985 window[num_insn].insn = insn;
45986 window[num_insn].byte_len = byte_len;
45987 window[num_insn].group = group;
45988 window[num_insn].path = path;
45989 window[num_insn].imm_bytes = imm_size;
45990
45991 window_list->window_size += byte_len;
45992 window_list->num_insn = num_insn + 1;
45993 window_list->num_uops = window_list->num_uops + num_uops;
45994 window_list->imm_size += imm_size;
45995 window_list->num_imm += num_imm_operand;
45996 window_list->num_imm_32 += num_imm32_operand;
45997 window_list->num_imm_64 += num_imm64_operand;
45998
45999 if (group == disp_store)
46000 window_list->num_stores += 1;
46001 else if (group == disp_load
46002 || group == disp_prefetch)
46003 window_list->num_loads += 1;
46004 else if (group == disp_load_store)
46005 {
46006 window_list->num_stores += 1;
46007 window_list->num_loads += 1;
46008 }
46009 }
46010
46011 /* Adds a scheduled instruction, INSN, to the current dispatch window.
46012 If the total bytes of instructions or the number of instructions in
46013 the window exceed allowable, it allocates a new window. */
46014
46015 static void
46016 add_to_dispatch_window (rtx insn)
46017 {
46018 int byte_len;
46019 dispatch_windows *window_list;
46020 dispatch_windows *next_list;
46021 dispatch_windows *window0_list;
46022 enum insn_path path;
46023 enum dispatch_group insn_group;
46024 bool insn_fits;
46025 int num_insn;
46026 int num_uops;
46027 int window_num;
46028 int insn_num_uops;
46029 int sum;
46030
46031 if (INSN_CODE (insn) < 0)
46032 return;
46033
46034 byte_len = min_insn_size (insn);
46035 window_list = dispatch_window_list;
46036 next_list = window_list->next;
46037 path = get_insn_path (insn);
46038 insn_group = get_insn_group (insn);
46039
46040 /* Get the last dispatch window. */
46041 if (next_list)
46042 window_list = dispatch_window_list->next;
46043
46044 if (path == path_single)
46045 insn_num_uops = 1;
46046 else if (path == path_double)
46047 insn_num_uops = 2;
46048 else
46049 insn_num_uops = (int) path;
46050
46051 /* If current window is full, get a new window.
46052 Window number zero is full, if MAX_INSN uops are scheduled in it.
46053 Window number one is full, if window zero's bytes plus window
46054 one's bytes is 32, or if the bytes of the new instruction added
46055 to the total makes it greater than 48, or it has already MAX_INSN
46056 instructions in it. */
46057 num_insn = window_list->num_insn;
46058 num_uops = window_list->num_uops;
46059 window_num = window_list->window_num;
46060 insn_fits = fits_dispatch_window (insn);
46061
46062 if (num_insn >= MAX_INSN
46063 || num_uops + insn_num_uops > MAX_INSN
46064 || !(insn_fits))
46065 {
46066 window_num = ~window_num & 1;
46067 window_list = allocate_next_window (window_num);
46068 }
46069
46070 if (window_num == 0)
46071 {
46072 add_insn_window (insn, window_list, insn_num_uops);
46073 if (window_list->num_insn >= MAX_INSN
46074 && insn_group == disp_branch)
46075 {
46076 process_end_window ();
46077 return;
46078 }
46079 }
46080 else if (window_num == 1)
46081 {
46082 window0_list = window_list->prev;
46083 sum = window0_list->window_size + window_list->window_size;
46084 if (sum == 32
46085 || (byte_len + sum) >= 48)
46086 {
46087 process_end_window ();
46088 window_list = dispatch_window_list;
46089 }
46090
46091 add_insn_window (insn, window_list, insn_num_uops);
46092 }
46093 else
46094 gcc_unreachable ();
46095
46096 if (is_end_basic_block (insn_group))
46097 {
46098 /* End of basic block is reached do end-basic-block process. */
46099 process_end_window ();
46100 return;
46101 }
46102 }
46103
46104 /* Print the dispatch window, WINDOW_NUM, to FILE. */
46105
46106 DEBUG_FUNCTION static void
46107 debug_dispatch_window_file (FILE *file, int window_num)
46108 {
46109 dispatch_windows *list;
46110 int i;
46111
46112 if (window_num == 0)
46113 list = dispatch_window_list;
46114 else
46115 list = dispatch_window_list1;
46116
46117 fprintf (file, "Window #%d:\n", list->window_num);
46118 fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
46119 list->num_insn, list->num_uops, list->window_size);
46120 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46121 list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
46122
46123 fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
46124 list->num_stores);
46125 fprintf (file, " insn info:\n");
46126
46127 for (i = 0; i < MAX_INSN; i++)
46128 {
46129 if (!list->window[i].insn)
46130 break;
46131 fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
46132 i, group_name[list->window[i].group],
46133 i, (void *)list->window[i].insn,
46134 i, list->window[i].path,
46135 i, list->window[i].byte_len,
46136 i, list->window[i].imm_bytes);
46137 }
46138 }
46139
46140 /* Print to stdout a dispatch window. */
46141
46142 DEBUG_FUNCTION void
46143 debug_dispatch_window (int window_num)
46144 {
46145 debug_dispatch_window_file (stdout, window_num);
46146 }
46147
46148 /* Print INSN dispatch information to FILE. */
46149
46150 DEBUG_FUNCTION static void
46151 debug_insn_dispatch_info_file (FILE *file, rtx insn)
46152 {
46153 int byte_len;
46154 enum insn_path path;
46155 enum dispatch_group group;
46156 int imm_size;
46157 int num_imm_operand;
46158 int num_imm32_operand;
46159 int num_imm64_operand;
46160
46161 if (INSN_CODE (insn) < 0)
46162 return;
46163
46164 byte_len = min_insn_size (insn);
46165 path = get_insn_path (insn);
46166 group = get_insn_group (insn);
46167 imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
46168 &num_imm64_operand);
46169
46170 fprintf (file, " insn info:\n");
46171 fprintf (file, " group = %s, path = %d, byte_len = %d\n",
46172 group_name[group], path, byte_len);
46173 fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
46174 num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
46175 }
46176
46177 /* Print to STDERR the status of the ready list with respect to
46178 dispatch windows. */
46179
46180 DEBUG_FUNCTION void
46181 debug_ready_dispatch (void)
46182 {
46183 int i;
46184 int no_ready = number_in_ready ();
46185
46186 fprintf (stdout, "Number of ready: %d\n", no_ready);
46187
46188 for (i = 0; i < no_ready; i++)
46189 debug_insn_dispatch_info_file (stdout, get_ready_element (i));
46190 }
46191
46192 /* This routine is the driver of the dispatch scheduler. */
46193
46194 static void
46195 do_dispatch (rtx insn, int mode)
46196 {
46197 if (mode == DISPATCH_INIT)
46198 init_dispatch_sched ();
46199 else if (mode == ADD_TO_DISPATCH_WINDOW)
46200 add_to_dispatch_window (insn);
46201 }
46202
46203 /* Return TRUE if Dispatch Scheduling is supported. */
46204
46205 static bool
46206 has_dispatch (rtx insn, int action)
46207 {
46208 if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
46209 && flag_dispatch_scheduler)
46210 switch (action)
46211 {
46212 default:
46213 return false;
46214
46215 case IS_DISPATCH_ON:
46216 return true;
46217 break;
46218
46219 case IS_CMP:
46220 return is_cmp (insn);
46221
46222 case DISPATCH_VIOLATION:
46223 return dispatch_violation ();
46224
46225 case FITS_DISPATCH_WINDOW:
46226 return fits_dispatch_window (insn);
46227 }
46228
46229 return false;
46230 }
46231
46232 /* Implementation of reassociation_width target hook used by
46233 reassoc phase to identify parallelism level in reassociated
46234 tree. Statements tree_code is passed in OPC. Arguments type
46235 is passed in MODE.
46236
46237 Currently parallel reassociation is enabled for Atom
46238 processors only and we set reassociation width to be 2
46239 because Atom may issue up to 2 instructions per cycle.
46240
46241 Return value should be fixed if parallel reassociation is
46242 enabled for other processors. */
46243
46244 static int
46245 ix86_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
46246 enum machine_mode mode)
46247 {
46248 int res = 1;
46249
46250 if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
46251 res = 2;
46252 else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
46253 res = 2;
46254
46255 return res;
46256 }
46257
46258 /* ??? No autovectorization into MMX or 3DNOW until we can reliably
46259 place emms and femms instructions. */
46260
46261 static enum machine_mode
46262 ix86_preferred_simd_mode (enum machine_mode mode)
46263 {
46264 if (!TARGET_SSE)
46265 return word_mode;
46266
46267 switch (mode)
46268 {
46269 case QImode:
46270 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
46271 case HImode:
46272 return (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
46273 case SImode:
46274 return TARGET_AVX512F ? V16SImode :
46275 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
46276 case DImode:
46277 return TARGET_AVX512F ? V8DImode :
46278 (TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
46279
46280 case SFmode:
46281 if (TARGET_AVX512F)
46282 return V16SFmode;
46283 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46284 return V8SFmode;
46285 else
46286 return V4SFmode;
46287
46288 case DFmode:
46289 if (!TARGET_VECTORIZE_DOUBLE)
46290 return word_mode;
46291 else if (TARGET_AVX512F)
46292 return V8DFmode;
46293 else if (TARGET_AVX && !TARGET_PREFER_AVX128)
46294 return V4DFmode;
46295 else if (TARGET_SSE2)
46296 return V2DFmode;
46297 /* FALLTHRU */
46298
46299 default:
46300 return word_mode;
46301 }
46302 }
46303
46304 /* If AVX is enabled then try vectorizing with both 256bit and 128bit
46305 vectors. If AVX512F is enabled then try vectorizing with 512bit,
46306 256bit and 128bit vectors. */
46307
46308 static unsigned int
46309 ix86_autovectorize_vector_sizes (void)
46310 {
46311 return TARGET_AVX512F ? 64 | 32 | 16 :
46312 (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
46313 }
46314
46315 \f
46316
46317 /* Return class of registers which could be used for pseudo of MODE
46318 and of class RCLASS for spilling instead of memory. Return NO_REGS
46319 if it is not possible or non-profitable. */
46320 static reg_class_t
46321 ix86_spill_class (reg_class_t rclass, enum machine_mode mode)
46322 {
46323 if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
46324 && (mode == SImode || (TARGET_64BIT && mode == DImode))
46325 && INTEGER_CLASS_P (rclass))
46326 return ALL_SSE_REGS;
46327 return NO_REGS;
46328 }
46329
46330 /* Implement targetm.vectorize.init_cost. */
46331
46332 static void *
46333 ix86_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
46334 {
46335 unsigned *cost = XNEWVEC (unsigned, 3);
46336 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
46337 return cost;
46338 }
46339
46340 /* Implement targetm.vectorize.add_stmt_cost. */
46341
46342 static unsigned
46343 ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
46344 struct _stmt_vec_info *stmt_info, int misalign,
46345 enum vect_cost_model_location where)
46346 {
46347 unsigned *cost = (unsigned *) data;
46348 unsigned retval = 0;
46349
46350 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
46351 int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
46352
46353 /* Statements in an inner loop relative to the loop being
46354 vectorized are weighted more heavily. The value here is
46355 arbitrary and could potentially be improved with analysis. */
46356 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
46357 count *= 50; /* FIXME. */
46358
46359 retval = (unsigned) (count * stmt_cost);
46360
46361 /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
46362 for Silvermont as it has out of order integer pipeline and can execute
46363 2 scalar instruction per tick, but has in order SIMD pipeline. */
46364 if (TARGET_SILVERMONT || TARGET_INTEL)
46365 if (stmt_info && stmt_info->stmt)
46366 {
46367 tree lhs_op = gimple_get_lhs (stmt_info->stmt);
46368 if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
46369 retval = (retval * 17) / 10;
46370 }
46371
46372 cost[where] += retval;
46373
46374 return retval;
46375 }
46376
46377 /* Implement targetm.vectorize.finish_cost. */
46378
46379 static void
46380 ix86_finish_cost (void *data, unsigned *prologue_cost,
46381 unsigned *body_cost, unsigned *epilogue_cost)
46382 {
46383 unsigned *cost = (unsigned *) data;
46384 *prologue_cost = cost[vect_prologue];
46385 *body_cost = cost[vect_body];
46386 *epilogue_cost = cost[vect_epilogue];
46387 }
46388
46389 /* Implement targetm.vectorize.destroy_cost_data. */
46390
46391 static void
46392 ix86_destroy_cost_data (void *data)
46393 {
46394 free (data);
46395 }
46396
46397 /* Validate target specific memory model bits in VAL. */
46398
46399 static unsigned HOST_WIDE_INT
46400 ix86_memmodel_check (unsigned HOST_WIDE_INT val)
46401 {
46402 unsigned HOST_WIDE_INT model = val & MEMMODEL_MASK;
46403 bool strong;
46404
46405 if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
46406 |MEMMODEL_MASK)
46407 || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
46408 {
46409 warning (OPT_Winvalid_memory_model,
46410 "Unknown architecture specific memory model");
46411 return MEMMODEL_SEQ_CST;
46412 }
46413 strong = (model == MEMMODEL_ACQ_REL || model == MEMMODEL_SEQ_CST);
46414 if (val & IX86_HLE_ACQUIRE && !(model == MEMMODEL_ACQUIRE || strong))
46415 {
46416 warning (OPT_Winvalid_memory_model,
46417 "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
46418 return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
46419 }
46420 if (val & IX86_HLE_RELEASE && !(model == MEMMODEL_RELEASE || strong))
46421 {
46422 warning (OPT_Winvalid_memory_model,
46423 "HLE_RELEASE not used with RELEASE or stronger memory model");
46424 return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
46425 }
46426 return val;
46427 }
46428
46429 /* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
46430 CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
46431 CLONEI->simdlen. Return 0 if SIMD clones shouldn't be emitted,
46432 or number of vecsize_mangle variants that should be emitted. */
46433
46434 static int
46435 ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
46436 struct cgraph_simd_clone *clonei,
46437 tree base_type, int num)
46438 {
46439 int ret = 1;
46440
46441 if (clonei->simdlen
46442 && (clonei->simdlen < 2
46443 || clonei->simdlen > 16
46444 || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
46445 {
46446 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46447 "unsupported simdlen %d", clonei->simdlen);
46448 return 0;
46449 }
46450
46451 tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
46452 if (TREE_CODE (ret_type) != VOID_TYPE)
46453 switch (TYPE_MODE (ret_type))
46454 {
46455 case QImode:
46456 case HImode:
46457 case SImode:
46458 case DImode:
46459 case SFmode:
46460 case DFmode:
46461 /* case SCmode: */
46462 /* case DCmode: */
46463 break;
46464 default:
46465 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46466 "unsupported return type %qT for simd\n", ret_type);
46467 return 0;
46468 }
46469
46470 tree t;
46471 int i;
46472
46473 for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
46474 /* FIXME: Shouldn't we allow such arguments if they are uniform? */
46475 switch (TYPE_MODE (TREE_TYPE (t)))
46476 {
46477 case QImode:
46478 case HImode:
46479 case SImode:
46480 case DImode:
46481 case SFmode:
46482 case DFmode:
46483 /* case SCmode: */
46484 /* case DCmode: */
46485 break;
46486 default:
46487 warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
46488 "unsupported argument type %qT for simd\n", TREE_TYPE (t));
46489 return 0;
46490 }
46491
46492 if (clonei->cilk_elemental)
46493 {
46494 /* Parse here processor clause. If not present, default to 'b'. */
46495 clonei->vecsize_mangle = 'b';
46496 }
46497 else if (!TREE_PUBLIC (node->decl))
46498 {
46499 /* If the function isn't exported, we can pick up just one ISA
46500 for the clones. */
46501 if (TARGET_AVX2)
46502 clonei->vecsize_mangle = 'd';
46503 else if (TARGET_AVX)
46504 clonei->vecsize_mangle = 'c';
46505 else
46506 clonei->vecsize_mangle = 'b';
46507 ret = 1;
46508 }
46509 else
46510 {
46511 clonei->vecsize_mangle = "bcd"[num];
46512 ret = 3;
46513 }
46514 switch (clonei->vecsize_mangle)
46515 {
46516 case 'b':
46517 clonei->vecsize_int = 128;
46518 clonei->vecsize_float = 128;
46519 break;
46520 case 'c':
46521 clonei->vecsize_int = 128;
46522 clonei->vecsize_float = 256;
46523 break;
46524 case 'd':
46525 clonei->vecsize_int = 256;
46526 clonei->vecsize_float = 256;
46527 break;
46528 }
46529 if (clonei->simdlen == 0)
46530 {
46531 if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
46532 clonei->simdlen = clonei->vecsize_int;
46533 else
46534 clonei->simdlen = clonei->vecsize_float;
46535 clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
46536 if (clonei->simdlen > 16)
46537 clonei->simdlen = 16;
46538 }
46539 return ret;
46540 }
46541
46542 /* Add target attribute to SIMD clone NODE if needed. */
46543
46544 static void
46545 ix86_simd_clone_adjust (struct cgraph_node *node)
46546 {
46547 const char *str = NULL;
46548 gcc_assert (node->decl == cfun->decl);
46549 switch (node->simdclone->vecsize_mangle)
46550 {
46551 case 'b':
46552 if (!TARGET_SSE2)
46553 str = "sse2";
46554 break;
46555 case 'c':
46556 if (!TARGET_AVX)
46557 str = "avx";
46558 break;
46559 case 'd':
46560 if (!TARGET_AVX2)
46561 str = "avx2";
46562 break;
46563 default:
46564 gcc_unreachable ();
46565 }
46566 if (str == NULL)
46567 return;
46568 push_cfun (NULL);
46569 tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
46570 bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
46571 gcc_assert (ok);
46572 pop_cfun ();
46573 ix86_previous_fndecl = NULL_TREE;
46574 ix86_set_current_function (node->decl);
46575 }
46576
46577 /* If SIMD clone NODE can't be used in a vectorized loop
46578 in current function, return -1, otherwise return a badness of using it
46579 (0 if it is most desirable from vecsize_mangle point of view, 1
46580 slightly less desirable, etc.). */
46581
46582 static int
46583 ix86_simd_clone_usable (struct cgraph_node *node)
46584 {
46585 switch (node->simdclone->vecsize_mangle)
46586 {
46587 case 'b':
46588 if (!TARGET_SSE2)
46589 return -1;
46590 if (!TARGET_AVX)
46591 return 0;
46592 return TARGET_AVX2 ? 2 : 1;
46593 case 'c':
46594 if (!TARGET_AVX)
46595 return -1;
46596 return TARGET_AVX2 ? 1 : 0;
46597 break;
46598 case 'd':
46599 if (!TARGET_AVX2)
46600 return -1;
46601 return 0;
46602 default:
46603 gcc_unreachable ();
46604 }
46605 }
46606
46607 /* This function gives out the number of memory references.
46608 This value determines the unrolling factor for
46609 bdver3 and bdver4 architectures. */
46610
46611 static int
46612 ix86_loop_memcount (rtx *x, unsigned *mem_count)
46613 {
46614 if (*x != NULL_RTX && MEM_P (*x))
46615 {
46616 enum machine_mode mode;
46617 unsigned int n_words;
46618
46619 mode = GET_MODE (*x);
46620 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
46621
46622 if (n_words > 4)
46623 (*mem_count)+=2;
46624 else
46625 (*mem_count)+=1;
46626 }
46627 return 0;
46628 }
46629
46630 /* This function adjusts the unroll factor based on
46631 the hardware capabilities. For ex, bdver3 has
46632 a loop buffer which makes unrolling of smaller
46633 loops less important. This function decides the
46634 unroll factor using number of memory references
46635 (value 32 is used) as a heuristic. */
46636
46637 static unsigned
46638 ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
46639 {
46640 basic_block *bbs;
46641 rtx insn;
46642 unsigned i;
46643 unsigned mem_count = 0;
46644
46645 if (!TARGET_ADJUST_UNROLL)
46646 return nunroll;
46647
46648 /* Count the number of memory references within the loop body. */
46649 bbs = get_loop_body (loop);
46650 for (i = 0; i < loop->num_nodes; i++)
46651 {
46652 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
46653 if (NONDEBUG_INSN_P (insn))
46654 for_each_rtx (&insn, (rtx_function) ix86_loop_memcount, &mem_count);
46655 }
46656 free (bbs);
46657
46658 if (mem_count && mem_count <=32)
46659 return 32/mem_count;
46660
46661 return nunroll;
46662 }
46663
46664
46665 /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P. */
46666
46667 static bool
46668 ix86_float_exceptions_rounding_supported_p (void)
46669 {
46670 /* For x87 floating point with standard excess precision handling,
46671 there is no adddf3 pattern (since x87 floating point only has
46672 XFmode operations) so the default hook implementation gets this
46673 wrong. */
46674 return TARGET_80387 || TARGET_SSE_MATH;
46675 }
46676
46677 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
46678
46679 static void
46680 ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
46681 {
46682 if (!TARGET_80387 && !TARGET_SSE_MATH)
46683 return;
46684 tree exceptions_var = create_tmp_var (integer_type_node, NULL);
46685 if (TARGET_80387)
46686 {
46687 tree fenv_index_type = build_index_type (size_int (6));
46688 tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
46689 tree fenv_var = create_tmp_var (fenv_type, NULL);
46690 mark_addressable (fenv_var);
46691 tree fenv_ptr = build_pointer_type (fenv_type);
46692 tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
46693 fenv_addr = fold_convert (ptr_type_node, fenv_addr);
46694 tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
46695 tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
46696 tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
46697 tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
46698 tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
46699 tree hold_fnclex = build_call_expr (fnclex, 0);
46700 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
46701 hold_fnclex);
46702 *clear = build_call_expr (fnclex, 0);
46703 tree sw_var = create_tmp_var (short_unsigned_type_node, NULL);
46704 mark_addressable (sw_var);
46705 tree su_ptr = build_pointer_type (short_unsigned_type_node);
46706 tree sw_addr = build1 (ADDR_EXPR, su_ptr, sw_var);
46707 tree fnstsw_call = build_call_expr (fnstsw, 1, sw_addr);
46708 tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
46709 tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
46710 exceptions_var, exceptions_x87);
46711 *update = build2 (COMPOUND_EXPR, integer_type_node,
46712 fnstsw_call, update_mod);
46713 tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
46714 *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
46715 }
46716 if (TARGET_SSE_MATH)
46717 {
46718 tree mxcsr_orig_var = create_tmp_var (unsigned_type_node, NULL);
46719 tree mxcsr_mod_var = create_tmp_var (unsigned_type_node, NULL);
46720 tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
46721 tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
46722 tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
46723 tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
46724 mxcsr_orig_var, stmxcsr_hold_call);
46725 tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
46726 mxcsr_orig_var,
46727 build_int_cst (unsigned_type_node, 0x1f80));
46728 hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
46729 build_int_cst (unsigned_type_node, 0xffffffc0));
46730 tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
46731 mxcsr_mod_var, hold_mod_val);
46732 tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46733 tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
46734 hold_assign_orig, hold_assign_mod);
46735 hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
46736 ldmxcsr_hold_call);
46737 if (*hold)
46738 *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
46739 else
46740 *hold = hold_all;
46741 tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
46742 if (*clear)
46743 *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
46744 ldmxcsr_clear_call);
46745 else
46746 *clear = ldmxcsr_clear_call;
46747 tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
46748 tree exceptions_sse = fold_convert (integer_type_node,
46749 stxmcsr_update_call);
46750 if (*update)
46751 {
46752 tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
46753 exceptions_var, exceptions_sse);
46754 tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
46755 exceptions_var, exceptions_mod);
46756 *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
46757 exceptions_assign);
46758 }
46759 else
46760 *update = build2 (MODIFY_EXPR, integer_type_node,
46761 exceptions_var, exceptions_sse);
46762 tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
46763 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46764 ldmxcsr_update_call);
46765 }
46766 tree atomic_feraiseexcept
46767 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
46768 tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
46769 1, exceptions_var);
46770 *update = build2 (COMPOUND_EXPR, void_type_node, *update,
46771 atomic_feraiseexcept_call);
46772 }
46773
46774 /* Initialize the GCC target structure. */
46775 #undef TARGET_RETURN_IN_MEMORY
46776 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
46777
46778 #undef TARGET_LEGITIMIZE_ADDRESS
46779 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
46780
46781 #undef TARGET_ATTRIBUTE_TABLE
46782 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
46783 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
46784 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
46785 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46786 # undef TARGET_MERGE_DECL_ATTRIBUTES
46787 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
46788 #endif
46789
46790 #undef TARGET_COMP_TYPE_ATTRIBUTES
46791 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
46792
46793 #undef TARGET_INIT_BUILTINS
46794 #define TARGET_INIT_BUILTINS ix86_init_builtins
46795 #undef TARGET_BUILTIN_DECL
46796 #define TARGET_BUILTIN_DECL ix86_builtin_decl
46797 #undef TARGET_EXPAND_BUILTIN
46798 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
46799
46800 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
46801 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
46802 ix86_builtin_vectorized_function
46803
46804 #undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
46805 #define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
46806
46807 #undef TARGET_VECTORIZE_BUILTIN_TM_STORE
46808 #define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
46809
46810 #undef TARGET_VECTORIZE_BUILTIN_GATHER
46811 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
46812
46813 #undef TARGET_BUILTIN_RECIPROCAL
46814 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
46815
46816 #undef TARGET_ASM_FUNCTION_EPILOGUE
46817 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
46818
46819 #undef TARGET_ENCODE_SECTION_INFO
46820 #ifndef SUBTARGET_ENCODE_SECTION_INFO
46821 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
46822 #else
46823 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
46824 #endif
46825
46826 #undef TARGET_ASM_OPEN_PAREN
46827 #define TARGET_ASM_OPEN_PAREN ""
46828 #undef TARGET_ASM_CLOSE_PAREN
46829 #define TARGET_ASM_CLOSE_PAREN ""
46830
46831 #undef TARGET_ASM_BYTE_OP
46832 #define TARGET_ASM_BYTE_OP ASM_BYTE
46833
46834 #undef TARGET_ASM_ALIGNED_HI_OP
46835 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
46836 #undef TARGET_ASM_ALIGNED_SI_OP
46837 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
46838 #ifdef ASM_QUAD
46839 #undef TARGET_ASM_ALIGNED_DI_OP
46840 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
46841 #endif
46842
46843 #undef TARGET_PROFILE_BEFORE_PROLOGUE
46844 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
46845
46846 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
46847 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
46848
46849 #undef TARGET_ASM_UNALIGNED_HI_OP
46850 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
46851 #undef TARGET_ASM_UNALIGNED_SI_OP
46852 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
46853 #undef TARGET_ASM_UNALIGNED_DI_OP
46854 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
46855
46856 #undef TARGET_PRINT_OPERAND
46857 #define TARGET_PRINT_OPERAND ix86_print_operand
46858 #undef TARGET_PRINT_OPERAND_ADDRESS
46859 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
46860 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
46861 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
46862 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
46863 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
46864
46865 #undef TARGET_SCHED_INIT_GLOBAL
46866 #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
46867 #undef TARGET_SCHED_ADJUST_COST
46868 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
46869 #undef TARGET_SCHED_ISSUE_RATE
46870 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
46871 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
46872 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
46873 ia32_multipass_dfa_lookahead
46874 #undef TARGET_SCHED_MACRO_FUSION_P
46875 #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
46876 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
46877 #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
46878
46879 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
46880 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
46881
46882 #undef TARGET_MEMMODEL_CHECK
46883 #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
46884
46885 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
46886 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
46887
46888 #ifdef HAVE_AS_TLS
46889 #undef TARGET_HAVE_TLS
46890 #define TARGET_HAVE_TLS true
46891 #endif
46892 #undef TARGET_CANNOT_FORCE_CONST_MEM
46893 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
46894 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
46895 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
46896
46897 #undef TARGET_DELEGITIMIZE_ADDRESS
46898 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
46899
46900 #undef TARGET_MS_BITFIELD_LAYOUT_P
46901 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
46902
46903 #if TARGET_MACHO
46904 #undef TARGET_BINDS_LOCAL_P
46905 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
46906 #endif
46907 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46908 #undef TARGET_BINDS_LOCAL_P
46909 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
46910 #endif
46911
46912 #undef TARGET_ASM_OUTPUT_MI_THUNK
46913 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
46914 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
46915 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
46916
46917 #undef TARGET_ASM_FILE_START
46918 #define TARGET_ASM_FILE_START x86_file_start
46919
46920 #undef TARGET_OPTION_OVERRIDE
46921 #define TARGET_OPTION_OVERRIDE ix86_option_override
46922
46923 #undef TARGET_REGISTER_MOVE_COST
46924 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
46925 #undef TARGET_MEMORY_MOVE_COST
46926 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
46927 #undef TARGET_RTX_COSTS
46928 #define TARGET_RTX_COSTS ix86_rtx_costs
46929 #undef TARGET_ADDRESS_COST
46930 #define TARGET_ADDRESS_COST ix86_address_cost
46931
46932 #undef TARGET_FIXED_CONDITION_CODE_REGS
46933 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
46934 #undef TARGET_CC_MODES_COMPATIBLE
46935 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
46936
46937 #undef TARGET_MACHINE_DEPENDENT_REORG
46938 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
46939
46940 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
46941 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
46942
46943 #undef TARGET_BUILD_BUILTIN_VA_LIST
46944 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
46945
46946 #undef TARGET_FOLD_BUILTIN
46947 #define TARGET_FOLD_BUILTIN ix86_fold_builtin
46948
46949 #undef TARGET_COMPARE_VERSION_PRIORITY
46950 #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
46951
46952 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
46953 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
46954 ix86_generate_version_dispatcher_body
46955
46956 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
46957 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
46958 ix86_get_function_versions_dispatcher
46959
46960 #undef TARGET_ENUM_VA_LIST_P
46961 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
46962
46963 #undef TARGET_FN_ABI_VA_LIST
46964 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
46965
46966 #undef TARGET_CANONICAL_VA_LIST_TYPE
46967 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
46968
46969 #undef TARGET_EXPAND_BUILTIN_VA_START
46970 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
46971
46972 #undef TARGET_MD_ASM_CLOBBERS
46973 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
46974
46975 #undef TARGET_PROMOTE_PROTOTYPES
46976 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
46977 #undef TARGET_SETUP_INCOMING_VARARGS
46978 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
46979 #undef TARGET_MUST_PASS_IN_STACK
46980 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
46981 #undef TARGET_FUNCTION_ARG_ADVANCE
46982 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
46983 #undef TARGET_FUNCTION_ARG
46984 #define TARGET_FUNCTION_ARG ix86_function_arg
46985 #undef TARGET_FUNCTION_ARG_BOUNDARY
46986 #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
46987 #undef TARGET_PASS_BY_REFERENCE
46988 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
46989 #undef TARGET_INTERNAL_ARG_POINTER
46990 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
46991 #undef TARGET_UPDATE_STACK_BOUNDARY
46992 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
46993 #undef TARGET_GET_DRAP_RTX
46994 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
46995 #undef TARGET_STRICT_ARGUMENT_NAMING
46996 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
46997 #undef TARGET_STATIC_CHAIN
46998 #define TARGET_STATIC_CHAIN ix86_static_chain
46999 #undef TARGET_TRAMPOLINE_INIT
47000 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
47001 #undef TARGET_RETURN_POPS_ARGS
47002 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
47003
47004 #undef TARGET_LEGITIMATE_COMBINED_INSN
47005 #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
47006
47007 #undef TARGET_ASAN_SHADOW_OFFSET
47008 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
47009
47010 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
47011 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
47012
47013 #undef TARGET_SCALAR_MODE_SUPPORTED_P
47014 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
47015
47016 #undef TARGET_VECTOR_MODE_SUPPORTED_P
47017 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
47018
47019 #undef TARGET_C_MODE_FOR_SUFFIX
47020 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
47021
47022 #ifdef HAVE_AS_TLS
47023 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
47024 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
47025 #endif
47026
47027 #ifdef SUBTARGET_INSERT_ATTRIBUTES
47028 #undef TARGET_INSERT_ATTRIBUTES
47029 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
47030 #endif
47031
47032 #undef TARGET_MANGLE_TYPE
47033 #define TARGET_MANGLE_TYPE ix86_mangle_type
47034
47035 #if !TARGET_MACHO
47036 #undef TARGET_STACK_PROTECT_FAIL
47037 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
47038 #endif
47039
47040 #undef TARGET_FUNCTION_VALUE
47041 #define TARGET_FUNCTION_VALUE ix86_function_value
47042
47043 #undef TARGET_FUNCTION_VALUE_REGNO_P
47044 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
47045
47046 #undef TARGET_PROMOTE_FUNCTION_MODE
47047 #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
47048
47049 #undef TARGET_MEMBER_TYPE_FORCES_BLK
47050 #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
47051
47052 #undef TARGET_INSTANTIATE_DECLS
47053 #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
47054
47055 #undef TARGET_SECONDARY_RELOAD
47056 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
47057
47058 #undef TARGET_CLASS_MAX_NREGS
47059 #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
47060
47061 #undef TARGET_PREFERRED_RELOAD_CLASS
47062 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
47063 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
47064 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
47065 #undef TARGET_CLASS_LIKELY_SPILLED_P
47066 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
47067
47068 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
47069 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
47070 ix86_builtin_vectorization_cost
47071 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
47072 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
47073 ix86_vectorize_vec_perm_const_ok
47074 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
47075 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
47076 ix86_preferred_simd_mode
47077 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
47078 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
47079 ix86_autovectorize_vector_sizes
47080 #undef TARGET_VECTORIZE_INIT_COST
47081 #define TARGET_VECTORIZE_INIT_COST ix86_init_cost
47082 #undef TARGET_VECTORIZE_ADD_STMT_COST
47083 #define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
47084 #undef TARGET_VECTORIZE_FINISH_COST
47085 #define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
47086 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
47087 #define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
47088
47089 #undef TARGET_SET_CURRENT_FUNCTION
47090 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
47091
47092 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
47093 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
47094
47095 #undef TARGET_OPTION_SAVE
47096 #define TARGET_OPTION_SAVE ix86_function_specific_save
47097
47098 #undef TARGET_OPTION_RESTORE
47099 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
47100
47101 #undef TARGET_OPTION_PRINT
47102 #define TARGET_OPTION_PRINT ix86_function_specific_print
47103
47104 #undef TARGET_OPTION_FUNCTION_VERSIONS
47105 #define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
47106
47107 #undef TARGET_CAN_INLINE_P
47108 #define TARGET_CAN_INLINE_P ix86_can_inline_p
47109
47110 #undef TARGET_EXPAND_TO_RTL_HOOK
47111 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
47112
47113 #undef TARGET_LEGITIMATE_ADDRESS_P
47114 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
47115
47116 #undef TARGET_LRA_P
47117 #define TARGET_LRA_P hook_bool_void_true
47118
47119 #undef TARGET_REGISTER_PRIORITY
47120 #define TARGET_REGISTER_PRIORITY ix86_register_priority
47121
47122 #undef TARGET_REGISTER_USAGE_LEVELING_P
47123 #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
47124
47125 #undef TARGET_LEGITIMATE_CONSTANT_P
47126 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
47127
47128 #undef TARGET_FRAME_POINTER_REQUIRED
47129 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
47130
47131 #undef TARGET_CAN_ELIMINATE
47132 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
47133
47134 #undef TARGET_EXTRA_LIVE_ON_ENTRY
47135 #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
47136
47137 #undef TARGET_ASM_CODE_END
47138 #define TARGET_ASM_CODE_END ix86_code_end
47139
47140 #undef TARGET_CONDITIONAL_REGISTER_USAGE
47141 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
47142
47143 #if TARGET_MACHO
47144 #undef TARGET_INIT_LIBFUNCS
47145 #define TARGET_INIT_LIBFUNCS darwin_rename_builtins
47146 #endif
47147
47148 #undef TARGET_LOOP_UNROLL_ADJUST
47149 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
47150
47151 #undef TARGET_SPILL_CLASS
47152 #define TARGET_SPILL_CLASS ix86_spill_class
47153
47154 #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
47155 #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
47156 ix86_simd_clone_compute_vecsize_and_simdlen
47157
47158 #undef TARGET_SIMD_CLONE_ADJUST
47159 #define TARGET_SIMD_CLONE_ADJUST \
47160 ix86_simd_clone_adjust
47161
47162 #undef TARGET_SIMD_CLONE_USABLE
47163 #define TARGET_SIMD_CLONE_USABLE \
47164 ix86_simd_clone_usable
47165
47166 #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
47167 #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
47168 ix86_float_exceptions_rounding_supported_p
47169
47170 #undef TARGET_MODE_EMIT
47171 #define TARGET_MODE_EMIT ix86_emit_mode_set
47172
47173 #undef TARGET_MODE_NEEDED
47174 #define TARGET_MODE_NEEDED ix86_mode_needed
47175
47176 #undef TARGET_MODE_AFTER
47177 #define TARGET_MODE_AFTER ix86_mode_after
47178
47179 #undef TARGET_MODE_ENTRY
47180 #define TARGET_MODE_ENTRY ix86_mode_entry
47181
47182 #undef TARGET_MODE_EXIT
47183 #define TARGET_MODE_EXIT ix86_mode_exit
47184
47185 #undef TARGET_MODE_PRIORITY
47186 #define TARGET_MODE_PRIORITY ix86_mode_priority
47187
47188 struct gcc_target targetm = TARGET_INITIALIZER;
47189 \f
47190 #include "gt-i386.h"